Consistently use the highest bit of vector masks when converting to i1 vectors by jhorstmann · Pull Request #104693 · rust-lang/rust (original) (raw)
Does this affect codegen for non-x86 platforms?
Seems to have a very similar effect for target aarch64-unknown-linux-gnu
. Example with simple select
(using 128 bit registers now):
pub fn select(m: mask32x4, a: i32x4, b: i32x4) -> i32x4 { m.select(a, b) }
Before
portable_simd_test::select: ldr q1, [x0] ldr q0, [x1] ldr q2, [x2] shl v1.4s, v1.4s, #31 cmlt v1.4s, v1.4s, #0 bif v0.16b, v2.16b, v1.16b str q0, [x8] ret
After
portable_simd_test::select: ldr q0, [x0] ldr q1, [x1] ldr q2, [x2] cmlt v0.4s, v0.4s, #0 bsl v0.16b, v1.16b, v2.16b str q0, [x8] ret
Interestingly there is a big effect on all
reduction in the following example:
pub unsafe fn mask_all(m: mask8x16) -> bool { m.all() }
Before
portable_simd_test::mask_all: .cfi_startproc sub sp, sp, #16 .cfi_def_cfa_offset 16
ldr q0, [x0]
mov w8, #65535
umov w9, v0.b[1]
umov w11, v0.b[2]
umov w10, v0.b[0]
umov w12, v0.b[3]
umov w13, v0.b[4]
umov w14, v0.b[5]
and w9, w9, #0x1
and w11, w11, #0x1
and w10, w10, #0x1
and w12, w12, #0x1
and w13, w13, #0x1
and w14, w14, #0x1
bfi w10, w9, #1, #1
umov w9, v0.b[6]
bfi w10, w11, #2, #1
umov w11, v0.b[7]
bfi w10, w12, #3, #1
umov w12, v0.b[8]
bfi w10, w13, #4, #1
umov w13, v0.b[9]
and w9, w9, #0x1
bfi w10, w14, #5, #1
umov w14, v0.b[10]
and w11, w11, #0x1
orr w9, w10, w9, lsl #6
umov w10, v0.b[11]
and w12, w12, #0x1
orr w9, w9, w11, lsl #7
umov w11, v0.b[12]
and w13, w13, #0x1
orr w9, w9, w12, lsl #8
umov w12, v0.b[13]
and w14, w14, #0x1
orr w9, w9, w13, lsl #9
umov w13, v0.b[14]
and w10, w10, #0x1
orr w9, w9, w14, lsl #10
and w11, w11, #0x1
orr w9, w9, w10, lsl #11
and w10, w12, #0x1
umov w12, v0.b[15]
orr w9, w9, w11, lsl #12
and w11, w13, #0x1
orr w9, w9, w10, lsl #13
orr w9, w9, w11, lsl #14
orr w9, w9, w12, lsl #15
bics wzr, w8, w9
cset w0, eq
add sp, sp, #16
.cfi_def_cfa_offset 0
ret
After
portable_simd_test::mask_all: movi v0.2d, #0xffffffffffffffff ldr q1, [x0] cmgt v0.16b, v1.16b, v0.16b umaxv b0, v0.16b fmov w8, s0 mvn w8, w8 and w0, w8, #0x1 ret
But this does not seem to lead to an improvement for the is_hex
function from rust-lang/portable-simd#303.
I haven't checked any other target platforms yet. Thanks for the pointer to the tests, I'll have a look at those.