Auto merge of #135408 - RalfJung:x86-sse2, r= · rust-lang/rust@0937552 (original) (raw)

`@@ -7,7 +7,7 @@ use rustc_abi::{

`

7

7

`};

`

8

8

`use rustc_macros::HashStable_Generic;

`

9

9

``

10

``

`-

use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};

`

``

10

`+

use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};

`

11

11

``

12

12

`mod aarch64;

`

13

13

`mod amdgpu;

`

`@@ -386,6 +386,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {

`

386

386

`/// Pass this argument directly instead. Should NOT be used!

`

387

387

`/// Only exists because of past ABI mistakes that will take time to fix

`

388

388

`/// (see https://github.com/rust-lang/rust/issues/115666).

`

``

389

`+

#[track_caller]

`

389

390

`pub fn make_direct_deprecated(&mut self) {

`

390

391

`match self.mode {

`

391

392

`PassMode::Indirect { .. } => {

`

`@@ -398,6 +399,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {

`

398

399

``

399

400

`/// Pass this argument indirectly, by passing a (thin or wide) pointer to the argument instead.

`

400

401

`/// This is valid for both sized and unsized arguments.

`

``

402

`+

#[track_caller]

`

401

403

`pub fn make_indirect(&mut self) {

`

402

404

`match self.mode {

`

403

405

`PassMode::Direct() | PassMode::Pair(, _) => {

`

`@@ -412,6 +414,7 @@ impl<'a, Ty> ArgAbi<'a, Ty> {

`

412

414

``

413

415

`` /// Same as make_indirect, but for arguments that are ignored. Only needed for ABIs that pass

``

414

416

`/// ZSTs indirectly.

`

``

417

`+

#[track_caller]

`

415

418

`pub fn make_indirect_from_ignore(&mut self) {

`

416

419

`match self.mode {

`

417

420

`PassMode::Ignore => {

`

`@@ -716,27 +719,46 @@ impl<'a, Ty> FnAbi<'a, Ty> {

`

716

719

`C: HasDataLayout + HasTargetSpec,

`

717

720

`{

`

718

721

`let spec = cx.target_spec();

`

719

``

`-

match &spec.arch[..] {

`

``

722

`+

match &*spec.arch {

`

720

723

`"x86" => x86::compute_rust_abi_info(cx, self, abi),

`

721

724

`"riscv32" | "riscv64" => riscv::compute_rust_abi_info(cx, self, abi),

`

722

725

`"loongarch64" => loongarch::compute_rust_abi_info(cx, self, abi),

`

723

726

`"aarch64" => aarch64::compute_rust_abi_info(cx, self),

`

724

727

` _ => {}

`

725

728

`};

`

726

729

``

``

730

`` +

// Decides whether we can pass the given SIMD argument via PassMode::Direct.

``

``

731

`` +

// May only return true if the target will always pass those arguments the same way,

``

``

732

`` +

// no matter what the user does with -Ctarget-feature! In other words, whatever

``

``

733

`+

// target features are required to pass a SIMD value in registers must be listed in

`

``

734

`` +

// the abi_required_features for the current target and ABI.

``

``

735

`+

let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {

`

``

736

`+

// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up

`

``

737

`+

// to 128-bit-sized vectors.

`

``

738

`+

"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,

`

``

739

`+

"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {

`

``

740

`+

arg.layout.size.bits() <= 128

`

``

741

`+

}

`

``

742

`+

// So far, we haven't implemented this logic for any other target.

`

``

743

`+

_ => false,

`

``

744

`+

};

`

``

745

+

727

746

`for (arg_idx, arg) in self

`

728

747

`.args

`

729

748

`.iter_mut()

`

730

749

`.enumerate()

`

731

750

`.map(|(idx, arg)| (Some(idx), arg))

`

732

751

`.chain(iter::once((None, &mut self.ret)))

`

733

752

`{

`

734

``

`-

if arg.is_ignore() {

`

``

753

`+

// If the logic above already picked a specific type to cast the argument to, leave that

`

``

754

`+

// in place.

`

``

755

`+

if matches!(arg.mode, PassMode::Ignore | PassMode::Cast { .. }) {

`

735

756

`continue;

`

736

757

`}

`

737

758

``

738

759

`if arg_idx.is_none()

`

739

760

` && arg.layout.size > Primitive::Pointer(AddressSpace::DATA).size(cx) * 2

`

``

761

`+

&& !matches!(arg.layout.backend_repr, BackendRepr::Vector { .. })

`

740

762

`{

`

741

763

`// Return values larger than 2 registers using a return area

`

742

764

`// pointer. LLVM and Cranelift disagree about how to return

`

`@@ -746,7 +768,8 @@ impl<'a, Ty> FnAbi<'a, Ty> {

`

746

768

`// return value independently and decide to pass it in a

`

747

769

`// register or not, which would result in the return value

`

748

770

`// being passed partially in registers and partially through a

`

749

``

`-

// return area pointer.

`

``

771

`` +

// return area pointer. For large IR-level values such as i128,

``

``

772

`+

// cranelift will even split up the value into smaller chunks.

`

750

773

`//

`

751

774

`// While Cranelift may need to be fixed as the LLVM behavior is

`

752

775

`// generally more correct with respect to the surface language,

`

`@@ -776,53 +799,60 @@ impl<'a, Ty> FnAbi<'a, Ty> {

`

776

799

`// rustc_target already ensure any return value which doesn't

`

777

800

`// fit in the available amount of return registers is passed in

`

778

801

`// the right way for the current target.

`

``

802

`+

//

`

``

803

`+

// The adjustment is not necessary nor desired for types with a vector

`

``

804

`+

// representation; those are handled below.

`

779

805

` arg.make_indirect();

`

780

806

`continue;

`

781

807

`}

`

782

808

``

783

809

`match arg.layout.backend_repr {

`

784

``

`-

BackendRepr::Memory { .. } => {}

`

785

``

-

786

``

`-

// This is a fun case! The gist of what this is doing is

`

787

``

`-

// that we want callers and callees to always agree on the

`

788

``

`-

// ABI of how they pass SIMD arguments. If we were to not

`

789

``

`-

// make these arguments indirect then they'd be immediates

`

790

``

`-

// in LLVM, which means that they'd used whatever the

`

791

``

`-

// appropriate ABI is for the callee and the caller. That

`

792

``

`-

// means, for example, if the caller doesn't have AVX

`

793

``

`-

// enabled but the callee does, then passing an AVX argument

`

794

``

`-

// across this boundary would cause corrupt data to show up.

`

795

``

`-

//

`

796

``

`-

// This problem is fixed by unconditionally passing SIMD

`

797

``

`-

// arguments through memory between callers and callees

`

798

``

`-

// which should get them all to agree on ABI regardless of

`

799

``

`-

// target feature sets. Some more information about this

`

800

``

`-

// issue can be found in #44367.

`

801

``

`-

//

`

802

``

`-

// Note that the intrinsic ABI is exempt here as

`

803

``

`-

// that's how we connect up to LLVM and it's unstable

`

804

``

`-

// anyway, we control all calls to it in libstd.

`

805

``

`-

BackendRepr::Vector { .. }

`

806

``

`-

if abi != ExternAbi::RustIntrinsic && spec.simd_types_indirect =>

`

807

``

`-

{

`

808

``

`-

arg.make_indirect();

`

809

``

`-

continue;

`

``

810

`+

BackendRepr::Memory { .. } => {

`

``

811

`` +

// Compute Aggregate ABI.

``

``

812

+

``

813

`+

let is_indirect_not_on_stack =

`

``

814

`+

matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });

`

``

815

`+

assert!(is_indirect_not_on_stack);

`

``

816

+

``

817

`+

let size = arg.layout.size;

`

``

818

`+

if arg.layout.is_sized()

`

``

819

`+

&& size <= Primitive::Pointer(AddressSpace::DATA).size(cx)

`

``

820

`+

{

`

``

821

`+

// We want to pass small aggregates as immediates, but using

`

``

822

`+

// an LLVM aggregate type for this leads to bad optimizations,

`

``

823

`+

// so we pick an appropriately sized integer type instead.

`

``

824

`+

arg.cast_to(Reg { kind: RegKind::Integer, size });

`

``

825

`+

}

`

810

826

`}

`

811

827

``

812

``

`-

_ => continue,

`

813

``

`-

}

`

814

``

`` -

// Compute Aggregate ABI.

``

815

``

-

816

``

`-

let is_indirect_not_on_stack =

`

817

``

`-

matches!(arg.mode, PassMode::Indirect { on_stack: false, .. });

`

818

``

`-

assert!(is_indirect_not_on_stack);

`

819

``

-

820

``

`-

let size = arg.layout.size;

`

821

``

`-

if !arg.layout.is_unsized() && size <= Primitive::Pointer(AddressSpace::DATA).size(cx) {

`

822

``

`-

// We want to pass small aggregates as immediates, but using

`

823

``

`-

// an LLVM aggregate type for this leads to bad optimizations,

`

824

``

`-

// so we pick an appropriately sized integer type instead.

`

825

``

`-

arg.cast_to(Reg { kind: RegKind::Integer, size });

`

``

828

`+

BackendRepr::Vector { .. } => {

`

``

829

`+

// This is a fun case! The gist of what this is doing is

`

``

830

`+

// that we want callers and callees to always agree on the

`

``

831

`+

// ABI of how they pass SIMD arguments. If we were to not

`

``

832

`+

// make these arguments indirect then they'd be immediates

`

``

833

`+

// in LLVM, which means that they'd used whatever the

`

``

834

`+

// appropriate ABI is for the callee and the caller. That

`

``

835

`+

// means, for example, if the caller doesn't have AVX

`

``

836

`+

// enabled but the callee does, then passing an AVX argument

`

``

837

`+

// across this boundary would cause corrupt data to show up.

`

``

838

`+

//

`

``

839

`+

// This problem is fixed by unconditionally passing SIMD

`

``

840

`+

// arguments through memory between callers and callees

`

``

841

`+

// which should get them all to agree on ABI regardless of

`

``

842

`+

// target feature sets. Some more information about this

`

``

843

`+

// issue can be found in #44367.

`

``

844

`+

//

`

``

845

`+

// Note that the intrinsic ABI is exempt here as those are not

`

``

846

`+

// real functions anyway, and the backend expects very specific types.

`

``

847

`+

if abi != ExternAbi::RustIntrinsic

`

``

848

`+

&& spec.simd_types_indirect

`

``

849

`+

&& !can_pass_simd_directly(arg)

`

``

850

`+

{

`

``

851

`+

arg.make_indirect();

`

``

852

`+

}

`

``

853

`+

}

`

``

854

+

``

855

`+

_ => {}

`

826

856

`}

`

827

857

`}

`

828

858

`}

`