Add f16
and f128
inline ASM support for x86
and x86-64
· rust-lang/rust@dfc5514 (original) (raw)
`@@ -959,6 +959,43 @@ fn llvm_fixup_input<'ll, 'tcx>(
`
959
959
`InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
`
960
960
`Abi::Vector { .. },
`
961
961
`) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),
`
``
962
`+
(
`
``
963
`+
InlineAsmRegClass::X86(
`
``
964
`+
X86InlineAsmRegClass::xmm_reg
`
``
965
`+
| X86InlineAsmRegClass::ymm_reg
`
``
966
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
967
`+
),
`
``
968
`+
Abi::Scalar(s),
`
``
969
`+
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
`
``
970
`+
&& s.primitive() == Primitive::Float(Float::F128) =>
`
``
971
`+
{
`
``
972
`+
bx.bitcast(value, bx.type_vector(bx.type_i32(), 4))
`
``
973
`+
}
`
``
974
`+
(
`
``
975
`+
InlineAsmRegClass::X86(
`
``
976
`+
X86InlineAsmRegClass::xmm_reg
`
``
977
`+
| X86InlineAsmRegClass::ymm_reg
`
``
978
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
979
`+
),
`
``
980
`+
Abi::Scalar(s),
`
``
981
`+
) if s.primitive() == Primitive::Float(Float::F16) => {
`
``
982
`+
let value = bx.insert_element(
`
``
983
`+
bx.const_undef(bx.type_vector(bx.type_f16(), 8)),
`
``
984
`+
value,
`
``
985
`+
bx.const_usize(0),
`
``
986
`+
);
`
``
987
`+
bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
`
``
988
`+
}
`
``
989
`+
(
`
``
990
`+
InlineAsmRegClass::X86(
`
``
991
`+
X86InlineAsmRegClass::xmm_reg
`
``
992
`+
| X86InlineAsmRegClass::ymm_reg
`
``
993
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
994
`+
),
`
``
995
`+
Abi::Vector { element, count: count @ (8 | 16) },
`
``
996
`+
) if element.primitive() == Primitive::Float(Float::F16) => {
`
``
997
`+
bx.bitcast(value, bx.type_vector(bx.type_i16(), count))
`
``
998
`+
}
`
962
999
`(
`
963
1000
`InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
`
964
1001
`Abi::Scalar(s),
`
`@@ -1036,6 +1073,39 @@ fn llvm_fixup_output<'ll, 'tcx>(
`
1036
1073
`InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
`
1037
1074
`Abi::Vector { .. },
`
1038
1075
`) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),
`
``
1076
`+
(
`
``
1077
`+
InlineAsmRegClass::X86(
`
``
1078
`+
X86InlineAsmRegClass::xmm_reg
`
``
1079
`+
| X86InlineAsmRegClass::ymm_reg
`
``
1080
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
1081
`+
),
`
``
1082
`+
Abi::Scalar(s),
`
``
1083
`+
) if bx.sess().asm_arch == Some(InlineAsmArch::X86)
`
``
1084
`+
&& s.primitive() == Primitive::Float(Float::F128) =>
`
``
1085
`+
{
`
``
1086
`+
bx.bitcast(value, bx.type_f128())
`
``
1087
`+
}
`
``
1088
`+
(
`
``
1089
`+
InlineAsmRegClass::X86(
`
``
1090
`+
X86InlineAsmRegClass::xmm_reg
`
``
1091
`+
| X86InlineAsmRegClass::ymm_reg
`
``
1092
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
1093
`+
),
`
``
1094
`+
Abi::Scalar(s),
`
``
1095
`+
) if s.primitive() == Primitive::Float(Float::F16) => {
`
``
1096
`+
let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8));
`
``
1097
`+
bx.extract_element(value, bx.const_usize(0))
`
``
1098
`+
}
`
``
1099
`+
(
`
``
1100
`+
InlineAsmRegClass::X86(
`
``
1101
`+
X86InlineAsmRegClass::xmm_reg
`
``
1102
`+
| X86InlineAsmRegClass::ymm_reg
`
``
1103
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
1104
`+
),
`
``
1105
`+
Abi::Vector { element, count: count @ (8 | 16) },
`
``
1106
`+
) if element.primitive() == Primitive::Float(Float::F16) => {
`
``
1107
`+
bx.bitcast(value, bx.type_vector(bx.type_f16(), count))
`
``
1108
`+
}
`
1039
1109
`(
`
1040
1110
`InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
`
1041
1111
`Abi::Scalar(s),
`
`@@ -1109,6 +1179,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
`
1109
1179
`InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),
`
1110
1180
`Abi::Vector { .. },
`
1111
1181
`) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),
`
``
1182
`+
(
`
``
1183
`+
InlineAsmRegClass::X86(
`
``
1184
`+
X86InlineAsmRegClass::xmm_reg
`
``
1185
`+
| X86InlineAsmRegClass::ymm_reg
`
``
1186
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
1187
`+
),
`
``
1188
`+
Abi::Scalar(s),
`
``
1189
`+
) if cx.sess().asm_arch == Some(InlineAsmArch::X86)
`
``
1190
`+
&& s.primitive() == Primitive::Float(Float::F128) =>
`
``
1191
`+
{
`
``
1192
`+
cx.type_vector(cx.type_i32(), 4)
`
``
1193
`+
}
`
``
1194
`+
(
`
``
1195
`+
InlineAsmRegClass::X86(
`
``
1196
`+
X86InlineAsmRegClass::xmm_reg
`
``
1197
`+
| X86InlineAsmRegClass::ymm_reg
`
``
1198
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
1199
`+
),
`
``
1200
`+
Abi::Scalar(s),
`
``
1201
`+
) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8),
`
``
1202
`+
(
`
``
1203
`+
InlineAsmRegClass::X86(
`
``
1204
`+
X86InlineAsmRegClass::xmm_reg
`
``
1205
`+
| X86InlineAsmRegClass::ymm_reg
`
``
1206
`+
| X86InlineAsmRegClass::zmm_reg,
`
``
1207
`+
),
`
``
1208
`+
Abi::Vector { element, count: count @ (8 | 16) },
`
``
1209
`+
) if element.primitive() == Primitive::Float(Float::F16) => {
`
``
1210
`+
cx.type_vector(cx.type_i16(), count)
`
``
1211
`+
}
`
1112
1212
`(
`
1113
1213
`InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),
`
1114
1214
`Abi::Scalar(s),
`