x86_win64 ABI: do not use xmm0 with softfloat ABI by RalfJung · Pull Request #137094 · rust-lang/rust (original) (raw)

I built this code for various different targets with this PR:

#[no_mangle] pub unsafe extern "C" fn ret_u128(x: &u128) -> u128 { *x }

#[repr(simd)] pub struct U64x2([u64; 2]);

impl Copy for U64x2 {}

#[no_mangle] pub unsafe extern "C" fn ret_simd(x: &U64x2) -> U64x2 { *x }

#[no_mangle] pub unsafe extern "C" fn do_div(x: &u128, y: &u128, z: &mut u128) { *z = *x / *y; }

Here's what I got:

ret_u128:
    movq	(%rcx), %rax
    movq	8(%rcx), %rdx
    retq

    .def	ret_simd;
    .scl	2;
    .type	32;
    .endef
    .section	.text,"xr",one_only,ret_simd
    .globl	ret_simd
    .p2align	4, 0x90
ret_simd:
    movq	(%rcx), %rax
    movq	8(%rcx), %rdx
    retq

    .def	do_div;
    .scl	2;
    .type	32;
    .endef
    .section	.text,"xr",one_only,do_div
    .globl	do_div
    .p2align	4, 0x90
do_div:
    pushq	%rsi
    subq	$64, %rsp
    movq	%r8, %rsi
    movq	(%rdx), %r8
    movq	8(%rdx), %rax
    movq	%r8, %rdx
    orq	%rax, %rdx
    je	.LBB2_2
    movq	(%rcx), %rdx
    movq	8(%rcx), %rcx
    movq	%r8, 32(%rsp)
    movq	%rcx, 56(%rsp)
    movq	%rdx, 48(%rsp)
    movq	%rax, 40(%rsp)
    leaq	48(%rsp), %rcx
    leaq	32(%rsp), %rdx
    callq	__udivti3
    movq	%rax, (%rsi)
    movq	%rdx, 8(%rsi)
    addq	$64, %rsp
    popq	%rsi
    retq
    .p2align	4, 0x90
.LBB2_2:
    jmp	.LBB2_2
ret_u128:
    movq	(%rcx), %rax
    movq	8(%rcx), %rdx
    retq

    .def	ret_simd;
    .scl	2;
    .type	32;
    .endef
    .section	.text,"xr",one_only,ret_simd
    .globl	ret_simd
    .p2align	4, 0x90
ret_simd:
    movaps	(%rcx), %xmm0
    retq

    .def	do_div;
    .scl	2;
    .type	32;
    .endef
    .section	.text,"xr",one_only,do_div
    .globl	do_div
    .p2align	4, 0x90
do_div:
.seh_proc do_div
    pushq	%rsi
    .seh_pushreg %rsi
    subq	$64, %rsp
    .seh_stackalloc 64
    .seh_endprologue
    movq	%r8, %rsi
    movq	(%rdx), %r8
    movq	8(%rdx), %rax
    movq	%r8, %rdx
    orq	%rax, %rdx
    je	.LBB2_2
    movaps	(%rcx), %xmm0
    movq	%r8, 32(%rsp)
    movaps	%xmm0, 48(%rsp)
    movq	%rax, 40(%rsp)
    leaq	48(%rsp), %rcx
    leaq	32(%rsp), %rdx
    callq	__udivti3
    movaps	%xmm0, (%rsi)
    addq	$64, %rsp
    popq	%rsi
    retq
    .p2align	4, 0x90
.LBB2_2:
    jmp	.LBB2_2
    .seh_endproc
ret_u128:
    movq	(%rcx), %rax
    movq	8(%rcx), %rdx
    retq

    .def	ret_simd;
    .scl	2;
    .type	32;
    .endef
    .globl	ret_simd
    .p2align	4, 0x90
ret_simd:
    movaps	(%rcx), %xmm0
    retq

    .def	do_div;
    .scl	2;
    .type	32;
    .endef
    .globl	do_div
    .p2align	4, 0x90
do_div:
.seh_proc do_div
    pushq	%rsi
    .seh_pushreg %rsi
    subq	$64, %rsp
    .seh_stackalloc 64
    .seh_endprologue
    movq	%r8, %rsi
    movq	(%rdx), %r8
    movq	8(%rdx), %rax
    movq	%r8, %rdx
    orq	%rax, %rdx
    je	.LBB2_2
    movaps	(%rcx), %xmm0
    movq	%r8, 32(%rsp)
    movaps	%xmm0, 48(%rsp)
    movq	%rax, 40(%rsp)
    leaq	48(%rsp), %rcx
    leaq	32(%rsp), %rdx
    callq	__udivti3
    movaps	%xmm0, (%rsi)
    addq	$64, %rsp
    popq	%rsi
    retq
    .p2align	4, 0x90
.LBB2_2:
    jmp	.LBB2_2
    .seh_endproc

Does that look like it's using the right registers or not?

I don't see xmm0 in ret_u128 so I guess the answer is "no". We should probably have an asm test for that...