(original) (raw)

diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 6f62545241e9e..d2d50e5359878 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" @@ -1122,6 +1123,17 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, for (unsigned i = 0; i != NumEdges; ++i) APN->addIncoming(IncomingVals[AllocaNo], Pred); + // For the sequence `return X > 0.0 ? X : -X`, it is expected that this + // results in fabs intrinsic. However, without no-signed-zeros(nsz) flag + // on the phi node generated at this stage, fabs folding does not + // happen. So, we try to infer nsz flag from the function attributes to + // enable this fabs folding. + if (APN->isComplete() && isa(APN) && + BB->getParent() + ->getFnAttribute("no-signed-zeros-fp-math") + .getValueAsBool()) + APN->setHasNoSignedZeros(true); + // The currently active variable for this block is now the PHI. IncomingVals[AllocaNo] = APN; AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB); diff --git a/llvm/test/Transforms/PhaseOrdering/generate-fabs.ll b/llvm/test/Transforms/PhaseOrdering/generate-fabs.ll new file mode 100644 index 0000000000000..25ac51033af9b --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/generate-fabs.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='default' -S < %s | FileCheck %s + +; Following test must generate fabs intrinsic. It goes through following stages +; 1. SROA propagates the nsz function attribute on the phi node. +; 2. SimplifyCFG pass converts phi node to select. +; 3. InstCombine converts select with nsz flag into fabs intrinsic. + +define double @fabs_fcmp_olt_nsz_func_attr(double %0, double %1) "no-signed-zeros-fp-math"="true" { +; CHECK-LABEL: define double @fabs_fcmp_olt_nsz_func_attr( +; CHECK-SAME: double [[TMP0:%.*]], double [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[X_0:%.*]] = tail call nnan nsz double @llvm.fabs.f64(double [[TMP0]]) +; CHECK-NEXT: ret double [[X_0]] +entry: + %x = alloca double + store double %0, ptr %x + %cmp = fcmp nnan nsz olt double %0, 0.000000e+00 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %fneg = fneg nnan nsz double %0 + store double %fneg, ptr %x + br label %return + +return: ; preds = %entry, %if.then + %ret = load double, ptr %x + ret double %ret +} diff --git a/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll new file mode 100644 index 0000000000000..2cc26363daf9c --- /dev/null +++ b/llvm/test/Transforms/SROA/propagate-fast-math-flags-on-phi.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes='sroa' -S | FileCheck %s +define double @phi_with_nsz(double %x) "no-signed-zeros-fp-math"="true" { +; CHECK-LABEL: define double @phi_with_nsz( +; CHECK-SAME: double [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[X_ADDR_0]] +entry: + %x.addr = alloca double + %cmp = fcmp olt double %x, 0.0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %fneg = fneg double %x + store double %fneg, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load double, ptr %x.addr + ret double %retval +} + +define <2 x double> @vector_phi_with_nsz(<2 x double> %x, i1 %cmp, <2 x double> %a, <2 x double> %b) "no-signed-zeros-fp-math"="true" { +; CHECK-LABEL: define <2 x double> @vector_phi_with_nsz( +; CHECK-SAME: <2 x double> [[X:%.*]], i1 [[CMP:%.*]], <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi nsz <2 x double> [ [[B]], [[IF_THEN]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret <2 x double> [[X_ADDR_0]] +entry: + %x.addr = alloca <2 x double> + store <2 x double> %a, ptr %x.addr + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + store <2 x double> %b, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load <2 x double>, ptr %x.addr + ret <2 x double> %retval +} + +define double @phi_without_nsz(double %x) "no-signed-zeros-fp-math"="false" { +; CHECK-LABEL: define double @phi_without_nsz( +; CHECK-SAME: double [[X:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[X]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[FNEG:%.*]] = fneg double [[X]] +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[X_ADDR_0:%.*]] = phi double [ [[FNEG]], [[IF_THEN]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret double [[X_ADDR_0]] +entry: + %x.addr = alloca double + %cmp = fcmp olt double %x, 0.0 + br i1 %cmp, label %if.then, label %return + +if.then: ; preds = %entry + %fneg = fneg double %x + store double %fneg, ptr %x.addr + br label %return + +return: ; preds = %entry,%if.then + %retval = load double, ptr %x.addr + ret double %retval +}