Bad codegen for bitwise OR/AND masks · Issue #303 · rust-lang/portable-simd (original) (raw)

https://rust.godbolt.org/z/5obPq9W3G

#![feature(portable_simd)]

use std::simd::{u8x16, LaneCount, Simd, SimdElement, SimdInt, SimdPartialOrd, SupportedLaneCount};

fn splat<T, const N: usize>(x: T) -> Simd<T, N> where T: SimdElement, LaneCount: SupportedLaneCount, { Simd::splat(x) }

pub fn is_hex(chunk: &[u8; 16]) -> bool { let x = u8x16::from_array(*chunk).cast(); let m1 = x.simd_gt(splat(b'0' - 1)); let m2 = x.simd_lt(splat(b'9' + 1)); let m3 = x.simd_gt(splat(b'a' - 1)); let m4 = x.simd_lt(splat(b'f' + 1)); let m = (m1 & m2) | (m3 & m4); m.all() }

pub fn is_ascii(chunk: &[u8; 16]) -> bool { let x = u8x16::from_array(*chunk); let m = x.cast::().simd_lt(splat(0)); m.all() }

The m.all() expression in function is_hex should generate umaxv(uminv/smaxv/sminv) instruction like the one in function is_ascii. But it generates lots of scalar instructions, leading to poor performance.