" = type { i16, [3 x i16] } %"core::result::Result::Ok" = type { [1 x i16], i16 } %"core::result::Result

Missed optimization · Issue #67342 · llvm/llvm-project (original) (raw)

From: rust-lang/rust#116150

With LLVM 17.0.1:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] } %"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 } %"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 }

define i64 @f(i32 %err) unnamed_addr #0 { %r = alloca %"core::result::Result<u16, i32>", align 4 %_3 = icmp slt i32 %err, 0 br i1 %_3, label %bb1, label %bb2

bb2: ; preds = %start %ok = trunc i32 %err to i16 %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1 store i16 %ok, ptr %1, align 2 store i16 0, ptr %r, align 4 br label %bb3

bb1: ; preds = %start %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1 store i32 %err, ptr %2, align 4 store i16 1, ptr %r, align 4 br label %bb3

bb3: ; preds = %bb1, %bb2 %3 = load i64, ptr %r, align 4 ret i64 %3 }

optimizes to:

define i64 @f(i32 %err) unnamed_addr #0 { %_3 = icmp slt i32 %err, 0 %err.lobit = lshr i32 %err, 31 %r.sroa.4.0.insert.ext = zext i32 %err to i64 %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32 %0 = shl i32 %err, 16 %1 = select i1 %_3, i32 0, i32 %0 %r.sroa.3.0.insert.shift = zext i32 %1 to i64 %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64 %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext ret i64 %r.sroa.0.0.insert.insert }

f: # @f mov eax, edi shr eax, 31 mov rcx, rdi shl rcx, 32 mov edx, edi shl edx, 16 xor esi, esi test edi, edi cmovns esi, edx or rsi, rcx or rax, rsi ret

But if the trunc is away from the store:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] } %"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 } %"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 }

define i64 @f(i32 %err) unnamed_addr #0 { %r = alloca %"core::result::Result<u16, i32>", align 4 %ok = trunc i32 %err to i16 %_3 = icmp slt i32 %err, 0 br i1 %_3, label %bb1, label %bb2

bb2: ; preds = %start %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1 store i16 %ok, ptr %1, align 2 store i16 0, ptr %r, align 4 br label %bb3

bb1: ; preds = %start %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1 store i32 %err, ptr %2, align 4 store i16 1, ptr %r, align 4 br label %bb3

bb3: ; preds = %bb1, %bb2 %3 = load i64, ptr %r, align 4 ret i64 %3 }

it ends up without a select:

define i64 @f(i32 %err) unnamed_addr #0 { %err.lobit = lshr i32 %err, 31 %r.sroa.4.0.insert.ext = zext i32 %err to i64 %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32 %0 = shl i32 %err, 16 %r.sroa.3.0.insert.shift = zext i32 %0 to i64 %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64 %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext ret i64 %r.sroa.0.0.insert.insert }

f: # @f mov eax, edi shr eax, 31 mov rcx, rdi shl rcx, 32 shl edi, 16 or rcx, rdi or rax, rcx ret