[IR] Add CallBr intrinsics support by ro-i · Pull Request #133907 · llvm/llvm-project (original) (raw)
@llvm/pr-subscribers-llvm-ir
Author: Robert Imschweiler (ro-i)
Changes
This commit adds support for using intrinsics with callbr. The uses of this will most of the time look like this example:
callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] kill: unreachable cont: ...
@arsenm
Full diff: https://github.com/llvm/llvm-project/pull/133907.diff
8 Files Affected:
- (modified) llvm/include/llvm/Analysis/RegionInfoImpl.h (+12)
- (modified) llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h (+3-3)
- (modified) llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp (+58-2)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+35-11)
- (modified) llvm/lib/IR/Verifier.cpp (+24-5)
- (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+5-6)
- (modified) llvm/lib/Transforms/Utils/BasicBlockUtils.cpp (+2-2)
- (added) llvm/test/CodeGen/AMDGPU/callbr.ll (+70)
diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h
index eb99d8bc6fb23..759e9c47bebb8 100644
--- a/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -553,6 +553,18 @@ bool RegionInfoBase::isRegion(BlockT *entry, BlockT *exit) const {
using DST = typename DomFrontierT::DomSetType;
+ // TODO? post domination frontier?
+ if constexpr (std::is_same_v<BlockT, BasicBlock>) {
+ if (DomTreeNodeT *PDTNode = PDT->getNode(exit); PDTNode) {
+ for (DomTreeNodeT *PredNode : *PDTNode) {
+ for (BasicBlock *Pred : predecessors(PredNode->getBlock())) {
+ if (isa(Pred->getTerminator()))
+ return false;
+ }
+ }
+ }
+ }
+
DST *entrySuccs = &DF->find(entry)->second;
// Exit is the header of a loop that contains the entry. In this case,
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 6faff3d1fd8e3..59143d235eb93 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -606,9 +606,9 @@ bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI,
// successors
void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
-// Check whether the function only has simple terminator:
-// br/brcond/unreachable/ret
-bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators:
+// br/brcond/unreachable/ret (or callbr if AllowCallBr)
+bool hasOnlySimpleTerminator(const Function &F, bool AllowCallBr = true);
// Returns true if these basic blocks belong to a presplit coroutine and the
// edge corresponds to the 'default' case in the switch statement in the
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index f8afb42bf5535..0f698375ad6cf 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3009,8 +3009,64 @@ bool IRTranslator::translateInvoke(const User &U,
bool IRTranslator::translateCallBr(const User &U,
MachineIRBuilder &MIRBuilder) {
- // FIXME: Implement this.
- return false;
+ const CallBrInst &I = cast(U);
+ MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB();
+
+ // TODO: operand bundles (see SelDAG implementation of callbr)?
+ assert(!I.hasOperandBundles() &&
+ "Cannot lower callbrs with operand bundles yet");
+
+ if (I.isInlineAsm()) {
+ // FIXME: inline asm not yet supported
+ if (!translateInlineAsm(I, MIRBuilder))
+ return false;
+ } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (I.getIntrinsicID()) {
+ default:
+ report_fatal_error("Unsupported intrinsic for callbr");
+ case Intrinsic::amdgcn_kill:
+ if (I.getNumIndirectDests() != 1)
+ report_fatal_error(
+ "amdgcn.kill supportes exactly one indirect destination");
+ CallInst *CI =
+ CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+ SmallVector<Value *, 1>(I.args()));
+ bool Success = translateCall(*CI, MIRBuilder);
+ CI->deleteValue();
+ if (!Success)
+ return false;
+ break;
+ }
+ } else {
+ report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+ }
+
+ // Retrieve successors.
+ SmallPtrSet<BasicBlock *, 8> Dests;
+ Dests.insert(I.getDefaultDest());
+ MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ if (I.isInlineAsm()) {
+ for (BasicBlock *Dest : I.getIndirectDests()) {
+ MachineBasicBlock *Target = &getMBB(*Dest);
+ Target->setIsInlineAsmBrIndirectTarget();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
+ }
+ CallBrMBB->normalizeSuccProbs();
+
+ return true;
}
bool IRTranslator::translateLandingPad(const User &U,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 6db2a5ffbfb84..c9501128cd593 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3385,8 +3385,26 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower callbrs with arbitrary operand bundles yet!");
- assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
- visitInlineAsm(I);
+ if (I.isInlineAsm()) {
+ visitInlineAsm(I);
+ } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (I.getIntrinsicID()) {
+ default:
+ report_fatal_error("Unsupported intrinsic for callbr");
+ case Intrinsic::amdgcn_kill:
+ if (I.getNumIndirectDests() != 1)
+ report_fatal_error(
+ "amdgcn.kill supportes exactly one indirect destination");
+ CallInst *CI =
+ CallInst::Create(I.getFunctionType(), I.getCalledFunction(),
+ SmallVector<Value *, 1>(I.args()));
+ visitCall(*CI);
+ CI->deleteValue();
+ break;
+ }
+ } else {
+ report_fatal_error("Only know how to handle inlineasm/intrinsic callbr");
+ }
CopyToExportRegsIfNeeded(&I);
// Retrieve successors.
@@ -3396,15 +3414,21 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
// Update successor info.
addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
- for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
- BasicBlock *Dest = I.getIndirectDest(i);
- MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
- Target->setIsInlineAsmBrIndirectTarget();
- Target->setMachineBlockAddressTaken();
- Target->setLabelMustBeEmitted();
- // Don't add duplicate machine successors.
- if (Dests.insert(Dest).second)
- addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ // TODO: For most of the cases where there is an intrinsic callbr, we're
+ // having exactly one indirect target, which will be unreachable. As soon as
+ // this changes, we might need to enhance
+ // Target->setIsInlineAsmBrIndirectTarget or add something similar for
+ // intrinsic indirect branches.
+ if (I.isInlineAsm()) {
+ for (BasicBlock *Dest : I.getIndirectDests()) {
+ MachineBasicBlock *Target = FuncInfo.getMBB(Dest);
+ Target->setIsInlineAsmBrIndirectTarget();
+ Target->setMachineBlockAddressTaken();
+ Target->setLabelMustBeEmitted();
+ // Don't add duplicate machine successors.
+ if (Dests.insert(Dest).second)
+ addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+ }
}
CallBrMBB->normalizeSuccProbs();
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ed86a10c3a25f..fbf6e087177c6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -3249,11 +3249,30 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
}
void Verifier::visitCallBrInst(CallBrInst &CBI) {
- Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI);
- const InlineAsm *IA = cast(CBI.getCalledOperand());
- Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
+ if (!CBI.isInlineAsm()) {
+ switch (CBI.getIntrinsicID()) {
+ case Intrinsic::amdgcn_kill: {
+ Check(CBI.getNumIndirectDests() == 1,
+ "Callbr amdgcn_kill only supports one indirect dest");
+ bool Unreachable = isa(CBI.getIndirectDest(0)->begin());
+ CallInst *Call = dyn_cast(CBI.getIndirectDest(0)->begin());
+ Check(Unreachable || (Call && Call->getIntrinsicID() ==
+ Intrinsic::amdgcn_unreachable),
+ "Callbr amdgcn_kill indirect dest needs to be unreachable");
+ visitIntrinsicCall(Intrinsic::amdgcn_kill, CBI);
+ break;
+ }
+ default:
+ CheckFailed(
+ "Callbr currently only supports asm-goto and selected intrinsics");
+ }
+ visitIntrinsicCall(CBI.getIntrinsicID(), CBI);
+ } else {
+ const InlineAsm *IA = cast(CBI.getCalledOperand());
+ Check(!IA->canThrow(), "Unwinding from Callbr is not allowed");
- verifyInlineAsmCall(CBI);
+ verifyInlineAsmCall(CBI);
+ }
visitTerminator(CBI);
}
@@ -5211,7 +5230,7 @@ void Verifier::visitInstruction(Instruction &I) {
(CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
IsAttachedCallOperand(F, CBI, i)),
"Cannot take the address of an intrinsic!", &I);
- Check(!F->isIntrinsic() || isa(I) ||
+ Check(!F->isIntrinsic() || isa(I) || isa(I) ||
F->getIntrinsicID() == Intrinsic::donothing ||
F->getIntrinsicID() == Intrinsic::seh_try_begin ||
F->getIntrinsicID() == Intrinsic::seh_try_end ||
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index d1054b9b045ca..bdd8b5fbb3212 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -486,11 +486,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs();
- BranchInst *Term = cast(BB->getTerminator());
- for (BasicBlock *Succ : Term->successors())
if (Visited.count(Succ))
Loops[Succ] = BB;
- if (BranchInst *Term = dyn_cast(BB->getTerminator()); Term)
for (BasicBlock *Succ : Term->successors())
if (Visited.count(Succ))
} }Loops[Succ] = BB;
@@ -522,7 +521,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
for (BasicBlock *P : predecessors(BB)) { // Ignore it if it's a branch from outside into our region entry
- if (!ParentRegion->contains(P))
if (!ParentRegion->contains(P) || !dyn_cast(P->getTerminator())) continue;
Region *R = RI->getRegionFor(P);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index ce5bf0c7207c7..3090f65fac627 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1907,11 +1907,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) { PBI->swapSuccessors(); }
-bool llvm::hasOnlySimpleTerminator(const Function &F) { +bool llvm::hasOnlySimpleTerminator(const Function &F, bool AllowCallBr) { for (auto &BB : F) { auto *Term = BB.getTerminator(); if (!(isa(Term) || isa(Term) ||
isa<BranchInst>(Term)))
} return true; diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll new file mode 100644 index 0000000000000..e2e84dca96cbf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/callbr.ll @@ -0,0 +1,70 @@ +; RUN: rm -rf %t && split-file %s %t +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-seldag.s < %t/with-callbr.ll +; RUN: FileCheck --check-prefix=SELDAG %s < %t/with-callbr-seldag.s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/with-callbr-gisel.s -global-isel < %t/with-callbr.ll +; RUN: FileCheck --check-prefix=GISEL %s < %t/with-callbr-gisel.s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-seldag.s < %t/without-callbr.ll +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o %t/without-callbr-gisel.s -global-isel < %t/without-callbr.ll +; RUN: diff %t/with-callbr-seldag.s %t/without-callbr-seldag.s +; RUN: diff %t/with-callbr-gisel.s %t/without-callbr-gisel.sisa<BranchInst>(Term) || (AllowCallBr && isa<CallBrInst>(Term)))) return false;
- +;--- with-callbr.ll
- +; SELDAG-LABEL: test_kill: +; SELDAG-NEXT: ; %bb.0: +; SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SELDAG-NEXT: flat_load_dword v0, v[0:1] +; SELDAG-NEXT: v_and_b32_e32 v1, 1, v4 +; SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; SELDAG-NEXT: s_mov_b64 s[4:5], exec +; SELDAG-NEXT: s_andn2_b64 s[6:7], exec, vcc +; SELDAG-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] +; SELDAG-NEXT: s_cbranch_scc0 .LBB0_2 +; SELDAG-NEXT: ; %bb.1: +; SELDAG-NEXT: s_and_b64 exec, exec, s[4:5] +; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SELDAG-NEXT: flat_store_dword v[2:3], v0 +; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SELDAG-NEXT: s_setpc_b64 s[30:31] +; SELDAG-NEXT: .LBB0_2: +; SELDAG-NEXT: s_mov_b64 exec, 0 +; SELDAG-NEXT: s_endpgm
- +; GISEL-LABEL: test_kill: +; GISEL-NEXT: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: flat_load_dword v0, v[0:1] +; GISEL-NEXT: v_and_b32_e32 v1, 1, v4 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: s_mov_b64 s[4:5], exec +; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc +; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] +; GISEL-NEXT: s_cbranch_scc0 .LBB0_2 +; GISEL-NEXT: ; %bb.1: +; GISEL-NEXT: s_and_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: flat_store_dword v[2:3], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB0_2: +; GISEL-NEXT: s_mov_b64 exec, 0 +; GISEL-NEXT: s_endpgm
- +define void @test_kill(ptr %src, ptr %dst, i1 %c) {
- %a = load i32, ptr %src, align 4
- callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] +kill:
- unreachable +cont:
- store i32 %a, ptr %dst, align 4
- ret void +}
- +;--- without-callbr.ll
- +define void @test_kill(ptr %src, ptr %dst, i1 %c) {
- %a = load i32, ptr %src, align 4
- call void @llvm.amdgcn.kill(i1 %c)
- store i32 %a, ptr %dst, align 4
- ret void +}