Add f16 and f128 inline ASM support for x86 and x86-64 · rust-lang/rust@dfc5514 (original) (raw)

`@@ -959,6 +959,43 @@ fn llvm_fixup_input<'ll, 'tcx>(

`

959

959

`InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),

`

960

960

`Abi::Vector { .. },

`

961

961

`) if layout.size.bytes() == 64 => bx.bitcast(value, bx.cx.type_vector(bx.cx.type_f64(), 8)),

`

``

962

`+

(

`

``

963

`+

InlineAsmRegClass::X86(

`

``

964

`+

X86InlineAsmRegClass::xmm_reg

`

``

965

`+

| X86InlineAsmRegClass::ymm_reg

`

``

966

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

967

`+

),

`

``

968

`+

Abi::Scalar(s),

`

``

969

`+

) if bx.sess().asm_arch == Some(InlineAsmArch::X86)

`

``

970

`+

&& s.primitive() == Primitive::Float(Float::F128) =>

`

``

971

`+

{

`

``

972

`+

bx.bitcast(value, bx.type_vector(bx.type_i32(), 4))

`

``

973

`+

}

`

``

974

`+

(

`

``

975

`+

InlineAsmRegClass::X86(

`

``

976

`+

X86InlineAsmRegClass::xmm_reg

`

``

977

`+

| X86InlineAsmRegClass::ymm_reg

`

``

978

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

979

`+

),

`

``

980

`+

Abi::Scalar(s),

`

``

981

`+

) if s.primitive() == Primitive::Float(Float::F16) => {

`

``

982

`+

let value = bx.insert_element(

`

``

983

`+

bx.const_undef(bx.type_vector(bx.type_f16(), 8)),

`

``

984

`+

value,

`

``

985

`+

bx.const_usize(0),

`

``

986

`+

);

`

``

987

`+

bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))

`

``

988

`+

}

`

``

989

`+

(

`

``

990

`+

InlineAsmRegClass::X86(

`

``

991

`+

X86InlineAsmRegClass::xmm_reg

`

``

992

`+

| X86InlineAsmRegClass::ymm_reg

`

``

993

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

994

`+

),

`

``

995

`+

Abi::Vector { element, count: count @ (8 | 16) },

`

``

996

`+

) if element.primitive() == Primitive::Float(Float::F16) => {

`

``

997

`+

bx.bitcast(value, bx.type_vector(bx.type_i16(), count))

`

``

998

`+

}

`

962

999

`(

`

963

1000

`InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),

`

964

1001

`Abi::Scalar(s),

`

`@@ -1036,6 +1073,39 @@ fn llvm_fixup_output<'ll, 'tcx>(

`

1036

1073

`InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),

`

1037

1074

`Abi::Vector { .. },

`

1038

1075

`) if layout.size.bytes() == 64 => bx.bitcast(value, layout.llvm_type(bx.cx)),

`

``

1076

`+

(

`

``

1077

`+

InlineAsmRegClass::X86(

`

``

1078

`+

X86InlineAsmRegClass::xmm_reg

`

``

1079

`+

| X86InlineAsmRegClass::ymm_reg

`

``

1080

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

1081

`+

),

`

``

1082

`+

Abi::Scalar(s),

`

``

1083

`+

) if bx.sess().asm_arch == Some(InlineAsmArch::X86)

`

``

1084

`+

&& s.primitive() == Primitive::Float(Float::F128) =>

`

``

1085

`+

{

`

``

1086

`+

bx.bitcast(value, bx.type_f128())

`

``

1087

`+

}

`

``

1088

`+

(

`

``

1089

`+

InlineAsmRegClass::X86(

`

``

1090

`+

X86InlineAsmRegClass::xmm_reg

`

``

1091

`+

| X86InlineAsmRegClass::ymm_reg

`

``

1092

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

1093

`+

),

`

``

1094

`+

Abi::Scalar(s),

`

``

1095

`+

) if s.primitive() == Primitive::Float(Float::F16) => {

`

``

1096

`+

let value = bx.bitcast(value, bx.type_vector(bx.type_f16(), 8));

`

``

1097

`+

bx.extract_element(value, bx.const_usize(0))

`

``

1098

`+

}

`

``

1099

`+

(

`

``

1100

`+

InlineAsmRegClass::X86(

`

``

1101

`+

X86InlineAsmRegClass::xmm_reg

`

``

1102

`+

| X86InlineAsmRegClass::ymm_reg

`

``

1103

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

1104

`+

),

`

``

1105

`+

Abi::Vector { element, count: count @ (8 | 16) },

`

``

1106

`+

) if element.primitive() == Primitive::Float(Float::F16) => {

`

``

1107

`+

bx.bitcast(value, bx.type_vector(bx.type_f16(), count))

`

``

1108

`+

}

`

1039

1109

`(

`

1040

1110

`InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),

`

1041

1111

`Abi::Scalar(s),

`

`@@ -1109,6 +1179,36 @@ fn llvm_fixup_output_type<'ll, 'tcx>(

`

1109

1179

`InlineAsmRegClass::X86(X86InlineAsmRegClass::xmm_reg | X86InlineAsmRegClass::zmm_reg),

`

1110

1180

`Abi::Vector { .. },

`

1111

1181

`) if layout.size.bytes() == 64 => cx.type_vector(cx.type_f64(), 8),

`

``

1182

`+

(

`

``

1183

`+

InlineAsmRegClass::X86(

`

``

1184

`+

X86InlineAsmRegClass::xmm_reg

`

``

1185

`+

| X86InlineAsmRegClass::ymm_reg

`

``

1186

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

1187

`+

),

`

``

1188

`+

Abi::Scalar(s),

`

``

1189

`+

) if cx.sess().asm_arch == Some(InlineAsmArch::X86)

`

``

1190

`+

&& s.primitive() == Primitive::Float(Float::F128) =>

`

``

1191

`+

{

`

``

1192

`+

cx.type_vector(cx.type_i32(), 4)

`

``

1193

`+

}

`

``

1194

`+

(

`

``

1195

`+

InlineAsmRegClass::X86(

`

``

1196

`+

X86InlineAsmRegClass::xmm_reg

`

``

1197

`+

| X86InlineAsmRegClass::ymm_reg

`

``

1198

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

1199

`+

),

`

``

1200

`+

Abi::Scalar(s),

`

``

1201

`+

) if s.primitive() == Primitive::Float(Float::F16) => cx.type_vector(cx.type_i16(), 8),

`

``

1202

`+

(

`

``

1203

`+

InlineAsmRegClass::X86(

`

``

1204

`+

X86InlineAsmRegClass::xmm_reg

`

``

1205

`+

| X86InlineAsmRegClass::ymm_reg

`

``

1206

`+

| X86InlineAsmRegClass::zmm_reg,

`

``

1207

`+

),

`

``

1208

`+

Abi::Vector { element, count: count @ (8 | 16) },

`

``

1209

`+

) if element.primitive() == Primitive::Float(Float::F16) => {

`

``

1210

`+

cx.type_vector(cx.type_i16(), count)

`

``

1211

`+

}

`

1112

1212

`(

`

1113

1213

`InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg | ArmInlineAsmRegClass::sreg_low16),

`

1114

1214

`Abi::Scalar(s),

`