x86_win64 ABI: do not use xmm0 with softfloat ABI by RalfJung · Pull Request #137094 · rust-lang/rust (original) (raw)
I built this code for various different targets with this PR:
#[no_mangle] pub unsafe extern "C" fn ret_u128(x: &u128) -> u128 { *x }
#[repr(simd)] pub struct U64x2([u64; 2]);
impl Copy for U64x2 {}
#[no_mangle] pub unsafe extern "C" fn ret_simd(x: &U64x2) -> U64x2 { *x }
#[no_mangle] pub unsafe extern "C" fn do_div(x: &u128, y: &u128, z: &mut u128) { *z = *x / *y; }
Here's what I got:
- UEFI:
ret_u128:
movq (%rcx), %rax
movq 8(%rcx), %rdx
retq
.def ret_simd;
.scl 2;
.type 32;
.endef
.section .text,"xr",one_only,ret_simd
.globl ret_simd
.p2align 4, 0x90
ret_simd:
movq (%rcx), %rax
movq 8(%rcx), %rdx
retq
.def do_div;
.scl 2;
.type 32;
.endef
.section .text,"xr",one_only,do_div
.globl do_div
.p2align 4, 0x90
do_div:
pushq %rsi
subq $64, %rsp
movq %r8, %rsi
movq (%rdx), %r8
movq 8(%rdx), %rax
movq %r8, %rdx
orq %rax, %rdx
je .LBB2_2
movq (%rcx), %rdx
movq 8(%rcx), %rcx
movq %r8, 32(%rsp)
movq %rcx, 56(%rsp)
movq %rdx, 48(%rsp)
movq %rax, 40(%rsp)
leaq 48(%rsp), %rcx
leaq 32(%rsp), %rdx
callq __udivti3
movq %rax, (%rsi)
movq %rdx, 8(%rsi)
addq $64, %rsp
popq %rsi
retq
.p2align 4, 0x90
.LBB2_2:
jmp .LBB2_2
- MSVC:
ret_u128:
movq (%rcx), %rax
movq 8(%rcx), %rdx
retq
.def ret_simd;
.scl 2;
.type 32;
.endef
.section .text,"xr",one_only,ret_simd
.globl ret_simd
.p2align 4, 0x90
ret_simd:
movaps (%rcx), %xmm0
retq
.def do_div;
.scl 2;
.type 32;
.endef
.section .text,"xr",one_only,do_div
.globl do_div
.p2align 4, 0x90
do_div:
.seh_proc do_div
pushq %rsi
.seh_pushreg %rsi
subq $64, %rsp
.seh_stackalloc 64
.seh_endprologue
movq %r8, %rsi
movq (%rdx), %r8
movq 8(%rdx), %rax
movq %r8, %rdx
orq %rax, %rdx
je .LBB2_2
movaps (%rcx), %xmm0
movq %r8, 32(%rsp)
movaps %xmm0, 48(%rsp)
movq %rax, 40(%rsp)
leaq 48(%rsp), %rcx
leaq 32(%rsp), %rdx
callq __udivti3
movaps %xmm0, (%rsi)
addq $64, %rsp
popq %rsi
retq
.p2align 4, 0x90
.LBB2_2:
jmp .LBB2_2
.seh_endproc
- GNU:
ret_u128:
movq (%rcx), %rax
movq 8(%rcx), %rdx
retq
.def ret_simd;
.scl 2;
.type 32;
.endef
.globl ret_simd
.p2align 4, 0x90
ret_simd:
movaps (%rcx), %xmm0
retq
.def do_div;
.scl 2;
.type 32;
.endef
.globl do_div
.p2align 4, 0x90
do_div:
.seh_proc do_div
pushq %rsi
.seh_pushreg %rsi
subq $64, %rsp
.seh_stackalloc 64
.seh_endprologue
movq %r8, %rsi
movq (%rdx), %r8
movq 8(%rdx), %rax
movq %r8, %rdx
orq %rax, %rdx
je .LBB2_2
movaps (%rcx), %xmm0
movq %r8, 32(%rsp)
movaps %xmm0, 48(%rsp)
movq %rax, 40(%rsp)
leaq 48(%rsp), %rcx
leaq 32(%rsp), %rdx
callq __udivti3
movaps %xmm0, (%rsi)
addq $64, %rsp
popq %rsi
retq
.p2align 4, 0x90
.LBB2_2:
jmp .LBB2_2
.seh_endproc
Does that look like it's using the right registers or not?
I don't see xmm0
in ret_u128
so I guess the answer is "no". We should probably have an asm test for that...