Auto merge of #135408 - RalfJung:x86-sse2, r= · rust-lang/rust@0937552 (original) (raw)
`@@ -7,7 +7,7 @@ use rustc_abi::{
`
7
7
`};
`
8
8
`use rustc_macros::HashStable_Generic;
`
9
9
``
10
``
`-
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
`
``
10
`+
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
`
11
11
``
12
12
`mod aarch64;
`
13
13
`mod amdgpu;
`
`@@ -386,6 +386,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
`
386
386
`/// Pass this argument directly instead. Should NOT be used!
`
387
387
`/// Only exists because of past ABI mistakes that will take time to fix
`
388
388
`/// (see https://github.com/rust-lang/rust/issues/115666).
`
``
389
`+
#[track_caller]
`
389
390
`pub fn make_direct_deprecated(&mut self) {
`
390
391
`match self.mode {
`
391
392
`PassMode::Indirect { .. } => {
`
`@@ -398,6 +399,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
`
398
399
``
399
400
`/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.
`
400
401
`/// This is valid for both sized and unsized arguments.
`
``
402
`+
#[track_caller]
`
401
403
`pub fn make_indirect(&mut self) {
`
402
404
`match self.mode {
`
403
405
`PassMode::Direct() | PassMode::Pair(, _) => {
`
`@@ -412,6 +414,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
`
412
414
``
413
415
`` /// Same as make_indirect
, but for arguments that are ignored. Only needed for ABIs that pass
``
414
416
`/// ZSTs indirectly.
`
``
417
`+
#[track_caller]
`
415
418
`pub fn make_indirect_from_ignore(&mut self) {
`
416
419
`match self.mode {
`
417
420
`PassMode::Ignore => {
`
`@@ -716,27 +719,46 @@ impl<'a, Ty> FnAbi<'a, Ty> {
`
716
719
`C: HasDataLayout + HasTargetSpec,
`
717
720
`{
`
718
721
`let spec = cx.target_spec();
`
719
``
`-
match &spec.arch[..] {
`
``
722
`+
match &*spec.arch {
`
720
723
`"x86" => x86::compute_rust_abi_info(cx, self, abi),
`
721
724
`"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),
`
722
725
`"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),
`
723
726
`"aarch64" => aarch64::compute_rust_abi_info(cx, self),
`
724
727
` _ => {}
`
725
728
`};
`
726
729
``
``
730
`` +
// Decides whether we can pass the given SIMD argument via PassMode::Direct
.
``
``
731
`` +
// May only return true
if the target will always pass those arguments the same way,
``
``
732
`` +
// no matter what the user does with -Ctarget-feature
! In other words, whatever
``
``
733
`+
// target features are required to pass a SIMD value in registers must be listed in
`
``
734
`` +
// the abi_required_features
for the current target and ABI.
``
``
735
`+
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
`
``
736
`+
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
`
``
737
`+
// to 128-bit-sized vectors.
`
``
738
`+
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
`
``
739
`+
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
`
``
740
`+
arg.layout.size.bits() <= 128
`
``
741
`+
}
`
``
742
`+
// So far, we haven't implemented this logic for any other target.
`
``
743
`+
_ => false,
`
``
744
`+
};
`
``
745
+
727
746
`for (arg_idx, arg) in self
`
728
747
`.args
`
729
748
`.iter_mut()
`
730
749
`.enumerate()
`
731
750
`.map(|(idx, arg)| (Some(idx), arg))
`
732
751
`.chain(iter::once((None, &mut self.ret)))
`
733
752
`{
`
734
``
`-
if arg.is_ignore() {
`
``
753
`+
// If the logic above already picked a specific type to cast the argument to, leave that
`
``
754
`+
// in place.
`
``
755
`+
if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {
`
735
756
`continue;
`
736
757
`}
`
737
758
``
738
759
`if arg_idx.is_none()
`
739
760
` && arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2
`
``
761
`+
&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })
`
740
762
`{
`
741
763
`// Return values larger than 2 registers using a return area
`
742
764
`// pointer. LLVM and Cranelift disagree about how to return
`
`@@ -746,7 +768,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {
`
746
768
`// return value independently and decide to pass it in a
`
747
769
`// register or not, which would result in the return value
`
748
770
`// being passed partially in registers and partially through a
`
749
``
`-
// return area pointer.
`
``
771
`` +
// return area pointer. For large IR-level values such as i128
,
``
``
772
`+
// cranelift will even split up the value into smaller chunks.
`
750
773
`//
`
751
774
`// While Cranelift may need to be fixed as the LLVM behavior is
`
752
775
`// generally more correct with respect to the surface language,
`
`@@ -776,53 +799,60 @@ impl<'a, Ty> FnAbi<'a, Ty> {
`
776
799
`// rustc_target already ensure any return value which doesn't
`
777
800
`// fit in the available amount of return registers is passed in
`
778
801
`// the right way for the current target.
`
``
802
`+
//
`
``
803
`+
// The adjustment is not necessary nor desired for types with a vector
`
``
804
`+
// representation; those are handled below.
`
779
805
` arg.make_indirect();
`
780
806
`continue;
`
781
807
`}
`
782
808
``
783
809
`match arg.layout.backend_repr {
`
784
``
`-
BackendRepr::Memory { .. } => {}
`
785
``
-
786
``
`-
// This is a fun case! The gist of what this is doing is
`
787
``
`-
// that we want callers and callees to always agree on the
`
788
``
`-
// ABI of how they pass SIMD arguments. If we were to not
`
789
``
`-
// make these arguments indirect then they'd be immediates
`
790
``
`-
// in LLVM, which means that they'd used whatever the
`
791
``
`-
// appropriate ABI is for the callee and the caller. That
`
792
``
`-
// means, for example, if the caller doesn't have AVX
`
793
``
`-
// enabled but the callee does, then passing an AVX argument
`
794
``
`-
// across this boundary would cause corrupt data to show up.
`
795
``
`-
//
`
796
``
`-
// This problem is fixed by unconditionally passing SIMD
`
797
``
`-
// arguments through memory between callers and callees
`
798
``
`-
// which should get them all to agree on ABI regardless of
`
799
``
`-
// target feature sets. Some more information about this
`
800
``
`-
// issue can be found in #44367.
`
801
``
`-
//
`
802
``
`-
// Note that the intrinsic ABI is exempt here as
`
803
``
`-
// that's how we connect up to LLVM and it's unstable
`
804
``
`-
// anyway, we control all calls to it in libstd.
`
805
``
`-
BackendRepr::Vector { .. }
`
806
``
`-
if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>
`
807
``
`-
{
`
808
``
`-
arg.make_indirect();
`
809
``
`-
continue;
`
``
810
`+
BackendRepr::Memory { .. } => {
`
``
811
`` +
// Compute Aggregate
ABI.
``
``
812
+
``
813
`+
let is_indirect_not_on_stack =
`
``
814
`+
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
`
``
815
`+
assert!(is_indirect_not_on_stack);
`
``
816
+
``
817
`+
let size = arg.layout.size;
`
``
818
`+
if arg.layout.is_sized()
`
``
819
`+
&& size <= Primitive::Pointer(AddressSpace::DATA).size(cx)
`
``
820
`+
{
`
``
821
`+
// We want to pass small aggregates as immediates, but using
`
``
822
`+
// an LLVM aggregate type for this leads to bad optimizations,
`
``
823
`+
// so we pick an appropriately sized integer type instead.
`
``
824
`+
arg.cast_to(Reg { kind: RegKind::Integer, size });
`
``
825
`+
}
`
810
826
`}
`
811
827
``
812
``
`-
_ => continue,
`
813
``
`-
}
`
814
``
`` -
// Compute Aggregate
ABI.
``
815
``
-
816
``
`-
let is_indirect_not_on_stack =
`
817
``
`-
matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });
`
818
``
`-
assert!(is_indirect_not_on_stack);
`
819
``
-
820
``
`-
let size = arg.layout.size;
`
821
``
`-
if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {
`
822
``
`-
// We want to pass small aggregates as immediates, but using
`
823
``
`-
// an LLVM aggregate type for this leads to bad optimizations,
`
824
``
`-
// so we pick an appropriately sized integer type instead.
`
825
``
`-
arg.cast_to(Reg { kind: RegKind::Integer, size });
`
``
828
`+
BackendRepr::Vector { .. } => {
`
``
829
`+
// This is a fun case! The gist of what this is doing is
`
``
830
`+
// that we want callers and callees to always agree on the
`
``
831
`+
// ABI of how they pass SIMD arguments. If we were to not
`
``
832
`+
// make these arguments indirect then they'd be immediates
`
``
833
`+
// in LLVM, which means that they'd used whatever the
`
``
834
`+
// appropriate ABI is for the callee and the caller. That
`
``
835
`+
// means, for example, if the caller doesn't have AVX
`
``
836
`+
// enabled but the callee does, then passing an AVX argument
`
``
837
`+
// across this boundary would cause corrupt data to show up.
`
``
838
`+
//
`
``
839
`+
// This problem is fixed by unconditionally passing SIMD
`
``
840
`+
// arguments through memory between callers and callees
`
``
841
`+
// which should get them all to agree on ABI regardless of
`
``
842
`+
// target feature sets. Some more information about this
`
``
843
`+
// issue can be found in #44367.
`
``
844
`+
//
`
``
845
`+
// Note that the intrinsic ABI is exempt here as those are not
`
``
846
`+
// real functions anyway, and the backend expects very specific types.
`
``
847
`+
if abi != ExternAbi::RustIntrinsic
`
``
848
`+
&& spec.simd_types_indirect
`
``
849
`+
&& !can_pass_simd_directly(arg)
`
``
850
`+
{
`
``
851
`+
arg.make_indirect();
`
``
852
`+
}
`
``
853
`+
}
`
``
854
+
``
855
`+
_ => {}
`
826
856
`}
`
827
857
`}
`
828
858
`}
`