(original) (raw)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 7296bb84b7d95..c3aac8014e616 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -9547,8 +9547,12 @@ The '``callbr``' instruction causes control to transfer to a specified function, with the possibility of control flow transfer to either the '``fallthrough``' label or one of the '``indirect``' labels. -This instruction should only be used to implement the "goto" feature of gcc -style inline assembly. Any other usage is an error in the IR verifier. +This instruction can currently only be used + +#. to implement the "goto" feature of gcc style inline assembly or +#. to call selected intrinsics. + +Any other usage is an error in the IR verifier. Note that in order to support outputs along indirect edges, LLVM may need to split critical edges, which may require synthesizing a replacement block for @@ -9589,7 +9593,8 @@ This instruction requires several arguments: indicates the function accepts a variable number of arguments, the extra arguments can be specified. #. '``fallthrough label``': the label reached when the inline assembly's - execution exits the bottom. + execution exits the bottom. In case of an intrinsic call, the semantic + depends on the semantic of the intrinsic. #. '``indirect labels``': the labels reached when a callee transfers control to a location other than the '``fallthrough label``'. Label constraints refer to these destinations. @@ -9607,9 +9612,12 @@ flow goes after the call. The output values of a '``callbr``' instruction are available both in the the '``fallthrough``' block, and any '``indirect``' blocks(s). -The only use of this today is to implement the "goto" feature of gcc inline -assembly where additional labels can be provided as locations for the inline -assembly to jump to. +The only uses of this today are: + +#. implement the "goto" feature of gcc inline assembly where additional + labels can be provided as locations for the inline assembly to jump to. +#. support selected intrinsics which manipulate control flow and should + be chained to specific terminators, such as '``unreachable``'. Example: """""""" @@ -9624,6 +9632,14 @@ Example: = callbr i32 asm "", "=r,r,!i"(i32 %x) to label %fallthrough [label %indirect] + ; intrinsic which should be followed by unreachable (the order of the + ; blocks after the callbr instruction doesn't matter) + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] + cont: + ... + kill: + unreachable + .. _i_resume: '``resume``' Instruction diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h index eb99d8bc6fb23..3d507f5467037 100644 --- a/llvm/include/llvm/Analysis/RegionInfoImpl.h +++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h @@ -553,6 +553,21 @@ bool RegionInfoBase::isRegion(BlockT *entry, BlockT *exit) const { using DST = typename DomFrontierT::DomSetType; + // Make sure that a region involving a callbr contains every successor + // blocks up to the ones that postdominate the callbr block. Otherwise, + // StructurizeCFG will tear the callbr apart. + // TODO? post domination frontier? + if constexpr (std::is_same_v<blockt, basicblock="">) { + if (DomTreeNodeT *PDTNode = PDT->getNode(exit); PDTNode) { + for (DomTreeNodeT *PredNode : *PDTNode) { + for (BasicBlock *Pred : predecessors(PredNode->getBlock())) { + if (isa(Pred->getTerminator())) + return false; + } + } + } + } + DST *entrySuccs = &DF->find(entry)->second; // Exit is the header of a loop that contains the entry. In this case, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 6fd05c8fddd5f..941750510f1e1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -297,6 +297,10 @@ class IRTranslator : public MachineFunctionPass { /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); + bool translateTargetIntrinsic( + const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr); + /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. @@ -313,6 +317,8 @@ class IRTranslator : public MachineFunctionPass { bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder); bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder); + bool translateCallBrIntrinsic(const CallBrInst &I, + MachineIRBuilder &MIRBuilder); bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder); diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index adc1851c2ec2f..0e89604c7e59c 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -606,9 +606,9 @@ bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, // successors void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder); -// Check whether the function only has simple terminator: -// br/brcond/unreachable/ret -bool hasOnlySimpleTerminator(const Function &F); +// Check whether the function only has blocks with simple terminators: +// br/brcond/unreachable/ret (or callbr if AllowCallBr) +bool hasOnlySimpleTerminator(const Function &F, bool AllowCallBr = true); } // end namespace llvm diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8ab2533afc15f..7edead5775420 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2750,59 +2750,27 @@ bool IRTranslator::translateCallBase(const CallBase &CB, return Success; } -bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { - if (containsBF16Type(U)) - return false; - - const CallInst &CI = cast(U); - const Function *F = CI.getCalledFunction(); - - // FIXME: support Windows dllimport function calls and calls through - // weak symbols. - if (F && (F->hasDLLImportStorageClass() || - (MF->getTarget().getTargetTriple().isOSWindows() && - F->hasExternalWeakLinkage()))) - return false; - - // FIXME: support control flow guard targets. - if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget)) - return false; - - // FIXME: support statepoints and related. - if (isa<gcstatepointinst, gcrelocateinst,="" gcresultinst="">(U)) - return false; - - if (CI.isInlineAsm()) - return translateInlineAsm(CI, MIRBuilder); - - diagnoseDontCall(CI); - - Intrinsic::ID ID = Intrinsic::not_intrinsic; - if (F && F->isIntrinsic()) - ID = F->getIntrinsicID(); - - if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) - return translateCallBase(CI, MIRBuilder); - - assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); - - if (translateKnownIntrinsic(CI, ID, MIRBuilder)) - return true; - +/// Translate a call or callbr to a target intrinsic. +/// Depending on whether TLI->getTgtMemIntrinsic() is true, TgtMemIntrinsicInfo +/// is a pointer to the correspondingly populated IntrinsicInfo object. +/// Otherwise, this pointer is null. +bool IRTranslator::translateTargetIntrinsic( + const CallBase &CB, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { ArrayRef ResultRegs; - if (!CI.getType()->isVoidTy()) - ResultRegs = getOrCreateVRegs(CI); + if (!CB.getType()->isVoidTy()) + ResultRegs = getOrCreateVRegs(CB); // Ignore the callsite attributes. Backend code is most likely not expecting // an intrinsic to sometimes have side effects and sometimes not. MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(ID, ResultRegs); - if (isa(CI)) - MIB->copyIRFlags(CI); + if (isa(CB)) + MIB->copyIRFlags(CB); - for (const auto &Arg : enumerate(CI.args())) { + for (const auto &Arg : enumerate(CB.args())) { // If this is required to be an immediate, don't materialize it in a // register. - if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (CB.paramHasAttr(Arg.index(), Attribute::ImmArg)) { if (ConstantInt *CI = dyn_cast(Arg.value())) { // imm arguments are more convenient than cimm (and realistically // probably sufficient), so use them. @@ -2831,28 +2799,30 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } // Add a MachineMemOperand if it is a target mem intrinsic. - TargetLowering::IntrinsicInfo Info; - // TODO: Add a GlobalISel version of getTgtMemIntrinsic. - if (TLI->getTgtMemIntrinsic(Info, CI, *MF, ID)) { - Align Alignment = Info.align.value_or( - DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext()))); - LLT MemTy = Info.memVT.isSimple() - ? getLLTForMVT(Info.memVT.getSimpleVT()) - : LLT::scalar(Info.memVT.getStoreSizeInBits()); + if (TgtMemIntrinsicInfo) { + const Function *F = CB.getCalledFunction(); + + Align Alignment = TgtMemIntrinsicInfo->align.value_or(DL->getABITypeAlign( + TgtMemIntrinsicInfo->memVT.getTypeForEVT(F->getContext()))); + LLT MemTy = + TgtMemIntrinsicInfo->memVT.isSimple() + ? getLLTForMVT(TgtMemIntrinsicInfo->memVT.getSimpleVT()) + : LLT::scalar(TgtMemIntrinsicInfo->memVT.getStoreSizeInBits()); // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic // didn't yield anything useful. MachinePointerInfo MPI; - if (Info.ptrVal) - MPI = MachinePointerInfo(Info.ptrVal, Info.offset); - else if (Info.fallbackAddressSpace) - MPI = MachinePointerInfo(*Info.fallbackAddressSpace); - MIB.addMemOperand( - MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); + if (TgtMemIntrinsicInfo->ptrVal) + MPI = MachinePointerInfo(TgtMemIntrinsicInfo->ptrVal, + TgtMemIntrinsicInfo->offset); + else if (TgtMemIntrinsicInfo->fallbackAddressSpace) + MPI = MachinePointerInfo(*TgtMemIntrinsicInfo->fallbackAddressSpace); + MIB.addMemOperand(MF->getMachineMemOperand( + MPI, TgtMemIntrinsicInfo->flags, MemTy, Alignment, CB.getAAMetadata())); } - if (CI.isConvergent()) { - if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) { + if (CB.isConvergent()) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { auto *Token = Bundle->Inputs[0].get(); Register TokenReg = getOrCreateVReg(*Token); MIB.addUse(TokenReg, RegState::Implicit); @@ -2862,6 +2832,53 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return true; } +bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { + if (containsBF16Type(U)) + return false; + + const CallInst &CI = cast(U); + const Function *F = CI.getCalledFunction(); + + // FIXME: support Windows dllimport function calls and calls through + // weak symbols. + if (F && (F->hasDLLImportStorageClass() || + (MF->getTarget().getTargetTriple().isOSWindows() && + F->hasExternalWeakLinkage()))) + return false; + + // FIXME: support control flow guard targets. + if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget)) + return false; + + // FIXME: support statepoints and related. + if (isa<gcstatepointinst, gcrelocateinst,="" gcresultinst="">(U)) + return false; + + if (CI.isInlineAsm()) + return translateInlineAsm(CI, MIRBuilder); + + diagnoseDontCall(CI); + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + if (F && F->isIntrinsic()) + ID = F->getIntrinsicID(); + + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) + return translateCallBase(CI, MIRBuilder); + + assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); + + if (translateKnownIntrinsic(CI, ID, MIRBuilder)) + return true; + + TargetLowering::IntrinsicInfo Info; + // TODO: Add a GlobalISel version of getTgtMemIntrinsic. + bool IsTgtMemIntrinsic = TLI->getTgtMemIntrinsic(Info, CI, *MF, ID); + + return translateTargetIntrinsic(CI, ID, MIRBuilder, + IsTgtMemIntrinsic ? &Info : nullptr); +} + bool IRTranslator::findUnwindDestinations( const BasicBlock *EHPadBB, BranchProbability Prob, @@ -3005,10 +3022,50 @@ bool IRTranslator::translateInvoke(const User &U, return true; } +/// The intrinsics currently supported by callbr are implicit control flow +/// intrinsics such as amdgcn.kill. bool IRTranslator::translateCallBr(const User &U, MachineIRBuilder &MIRBuilder) { - // FIXME: Implement this. - return false; + if (containsBF16Type(U)) + return false; // see translateCall + + const CallBrInst &I = cast(U); + MachineBasicBlock *CallBrMBB = &MIRBuilder.getMBB(); + + // FIXME: inline asm not yet supported + if (I.isInlineAsm()) + return false; + if (I.getIntrinsicID() == Intrinsic::not_intrinsic) + return false; + if (!translateTargetIntrinsic(I, I.getIntrinsicID(), MIRBuilder)) + return false; + + // Retrieve successors. + SmallPtrSet Dests; + Dests.insert(I.getDefaultDest()); + MachineBasicBlock *Return = &getMBB(*I.getDefaultDest()); + + // Update successor info. + addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); + // TODO: For most of the cases where there is an intrinsic callbr, we're + // having exactly one indirect target, which will be unreachable. As soon as + // this changes, we might need to enhance + // Target->setIsInlineAsmBrIndirectTarget or add something similar for + // intrinsic indirect branches. + if (I.isInlineAsm()) { + for (BasicBlock *Dest : I.getIndirectDests()) { + MachineBasicBlock *Target = &getMBB(*Dest); + Target->setIsInlineAsmBrIndirectTarget(); + Target->setMachineBlockAddressTaken(); + Target->setLabelMustBeEmitted(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + } + } + CallBrMBB->normalizeSuccProbs(); + + return true; } bool IRTranslator::translateLandingPad(const User &U, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8e74a076cc013..5b27ab6d5b71b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3375,17 +3375,36 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.getBasicBlock(Return))); } +/// The intrinsics currently supported by callbr are implicit control flow +/// intrinsics such as amdgcn.kill. +/// - they should be called (no "dontcall-" attributes) +/// - they do not touch memory on the target (= !TLI.getTgtMemIntrinsic()) +/// - they do not need custom argument handling (no +/// TLI.CollectTargetIntrinsicOperands()) +void SelectionDAGBuilder::visitCallBrIntrinsic(const CallBrInst &I) { + auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I); + + SmallVector<sdvalue, 8=""> Ops = + getTargetIntrinsicOperands(I, HasChain, OnlyLoad); + SDVTList VTs = getTargetIntrinsicVTList(I, HasChain); + + // Create the node. + SDValue Result = getTargetNonMemIntrinsicNode(I, HasChain, Ops, VTs); + Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result); + + setValue(&I, Result); +} + void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { MachineBasicBlock *CallBrMBB = FuncInfo.MBB; - // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't - // have to do anything here to lower funclet bundles. - assert(!I.hasOperandBundlesOtherThan( - {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && - "Cannot lower callbrs with arbitrary operand bundles yet!"); - - assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); - visitInlineAsm(I); + if (I.isInlineAsm()) { + visitInlineAsm(I); + } else if (I.getIntrinsicID() != Intrinsic::not_intrinsic) { + visitCallBrIntrinsic(I); + } else { + report_fatal_error("only know how to handle inlineasm/intrinsic callbr"); + } CopyToExportRegsIfNeeded(&I); // Retrieve successors. @@ -3395,15 +3414,21 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); - for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - BasicBlock *Dest = I.getIndirectDest(i); - MachineBasicBlock *Target = FuncInfo.getMBB(Dest); - Target->setIsInlineAsmBrIndirectTarget(); - Target->setMachineBlockAddressTaken(); - Target->setLabelMustBeEmitted(); - // Don't add duplicate machine successors. - if (Dests.insert(Dest).second) - addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + // TODO: For most of the cases where there is an intrinsic callbr, we're + // having exactly one indirect target, which will be unreachable. As soon as + // this changes, we might need to enhance + // Target->setIsInlineAsmBrIndirectTarget or add something similar for + // intrinsic indirect branches. + if (I.isInlineAsm()) { + for (BasicBlock *Dest : I.getIndirectDests()) { + MachineBasicBlock *Target = FuncInfo.getMBB(Dest); + Target->setIsInlineAsmBrIndirectTarget(); + Target->setMachineBlockAddressTaken(); + Target->setLabelMustBeEmitted(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + } } CallBrMBB->normalizeSuccProbs(); @@ -5210,18 +5235,26 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { DAG.setRoot(OutChain); } -/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC -/// node. -void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, - unsigned Intrinsic) { - // Ignore the callsite's attributes. A specific call site may be marked with - // readnone, but the lowering code will expect the chain based on the - // definition. +/// Check if this intrinsic call depends on the chain (1st return value) +/// and if it only *loads* memory. +/// Ignore the callsite's attributes. A specific call site may be marked with +/// readnone, but the lowering code will expect the chain based on the +/// definition. +std::pair<bool, bool=""> +SelectionDAGBuilder::getTargetIntrinsicCallProperties(const CallBase &I) { const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn() && F->doesNotThrow(); + return {HasChain, OnlyLoad}; +} + +SmallVector<sdvalue, 8=""> SelectionDAGBuilder::getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Build the operand list. SmallVector<sdvalue, 8=""> Ops; if (HasChain) { // If this intrinsic has side-effects, chainify it. @@ -5233,17 +5266,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } - // Info is set by getTgtMemIntrinsic - TargetLowering::IntrinsicInfo Info; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, - DAG.getMachineFunction(), - Intrinsic); - // Add the intrinsic ID as an integer operand if it's not a target intrinsic. - if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || - Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + if (!TgtMemIntrinsicInfo || TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_VOID || + TgtMemIntrinsicInfo->opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(I.getIntrinsicID(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. @@ -5266,13 +5292,94 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } } + if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + SDValue ConvControlToken = getValue(Token); + assert(Ops.back().getValueType() != MVT::Glue && + "Did not expected another glue node here."); + ConvControlToken = + DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); + Ops.push_back(ConvControlToken); + } + + return Ops; +} + +SDVTList SelectionDAGBuilder::getTargetIntrinsicVTList(const CallBase &I, + bool HasChain) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<evt, 4=""> ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs); + return DAG.getVTList(ValueVTs); +} + +/// Get an INTRINSIC node for a target intrinsic which does not touch touch +/// memory. +SDValue SelectionDAGBuilder::getTargetNonMemIntrinsicNode( + const CallBase &I, bool HasChain, SmallVector<sdvalue, 8=""> &Ops, + SDVTList &VTs) { + SDValue Result; + + if (!HasChain) { + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); + } else if (!I.getType()->isVoidTy()) { + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); + } else { + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); + } + + return Result; +} + +/// Set root, convert return type if necessaey and check alignment. +SDValue SelectionDAGBuilder::handleTargetIntrinsicRet(const CallBase &I, + bool HasChain, + bool OnlyLoad, + SDValue Result) { + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues() - 1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (I.getType()->isVoidTy()) + return Result; + + if (!isa(I.getType())) + Result = lowerRangeToAssertZExt(DAG, I, Result); + + MaybeAlign Alignment = I.getRetAlign(); + + // Insert `assertalign` node if there's an alignment. + if (InsertAssertAlign && Alignment) { + Result = DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); + } + + return Result; +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + auto [HasChain, OnlyLoad] = getTargetIntrinsicCallProperties(I); + + // Info is set by getTgtMemIntrinsic + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtMemIntrinsic = + TLI.getTgtMemIntrinsic(Info, I, DAG.getMachineFunction(), Intrinsic); + + SmallVector<sdvalue, 8=""> Ops = getTargetIntrinsicOperands( + I, HasChain, OnlyLoad, IsTgtMemIntrinsic ? &Info : nullptr); + SDVTList VTs = getTargetIntrinsicVTList(I, HasChain); // Propagate fast-math-flags from IR to node(s). SDNodeFlags Flags; @@ -5283,19 +5390,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; - if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) { - auto *Token = Bundle->Inputs[0].get(); - SDValue ConvControlToken = getValue(Token); - assert(Ops.back().getValueType() != MVT::Glue && - "Did not expected another glue node here."); - ConvControlToken = - DAG.getNode(ISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue, ConvControlToken); - Ops.push_back(ConvControlToken); - } - // In some cases, custom collection of operands from CallInst I may be needed. TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); - if (IsTgtIntrinsic) { + if (IsTgtMemIntrinsic) { // This is target intrinsic that touches memory // // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic @@ -5308,34 +5405,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Result = DAG.getMemIntrinsicNode( Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, MPI, Info.align, Info.flags, LocationSize::precise(Info.size), I.getAAMetadata()); - } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); - } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); + Result = getTargetNonMemIntrinsicNode(I, HasChain, Ops, VTs); } - if (HasChain) { - SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); - if (OnlyLoad) - PendingLoads.push_back(Chain); - else - DAG.setRoot(Chain); - } - - if (!I.getType()->isVoidTy()) { - if (!isa(I.getType())) - Result = lowerRangeToAssertZExt(DAG, I, Result); - - MaybeAlign Alignment = I.getRetAlign(); - - // Insert `assertalign` node if there's an alignment. - if (InsertAssertAlign && Alignment) { - Result = - DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); - } - } + Result = handleTargetIntrinsicRet(I, HasChain, OnlyLoad, Result); setValue(&I, Result); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 35c15bc269d4b..c1cf2c4035103 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -536,10 +536,12 @@ class SelectionDAGBuilder { private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); - void visitCallBr(const CallBrInst &I); void visitCallBrLandingPad(const CallInst &I); void visitResume(const ResumeInst &I); + void visitCallBr(const CallBrInst &I); + void visitCallBrIntrinsic(const CallBrInst &I); + void visitUnary(const User &I, unsigned Opcode); void visitFNeg(const User &I) { visitUnary(I, ISD::FNEG); } @@ -709,6 +711,17 @@ class SelectionDAGBuilder { MCSymbol *&BeginLabel); SDValue lowerEndEH(SDValue Chain, const InvokeInst *II, const BasicBlock *EHPadBB, MCSymbol *BeginLabel); + + std::pair<bool, bool=""> getTargetIntrinsicCallProperties(const CallBase &I); + SmallVector<sdvalue, 8=""> getTargetIntrinsicOperands( + const CallBase &I, bool HasChain, bool OnlyLoad, + TargetLowering::IntrinsicInfo *TgtMemIntrinsicInfo = nullptr); + SDVTList getTargetIntrinsicVTList(const CallBase &I, bool HasChain); + SDValue getTargetNonMemIntrinsicNode(const CallBase &I, bool HasChain, + SmallVector<sdvalue, 8=""> &Ops, + SDVTList &VTs); + SDValue handleTargetIntrinsicRet(const CallBase &I, bool HasChain, + bool OnlyLoad, SDValue Result); }; /// This struct represents the registers (physical or virtual) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 83c1264aef12b..f1479f03f1029 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3273,11 +3273,34 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) { } void Verifier::visitCallBrInst(CallBrInst &CBI) { - Check(CBI.isInlineAsm(), "Callbr is currently only used for asm-goto!", &CBI); - const InlineAsm *IA = cast(CBI.getCalledOperand()); - Check(!IA->canThrow(), "Unwinding from Callbr is not allowed"); + if (!CBI.isInlineAsm()) { + Check(CBI.getCalledFunction(), + "Callbr: indirect function / invalid signature"); + Check(!CBI.hasOperandBundles(), + "Callbr currently doesn't support operand bundles"); + + switch (CBI.getIntrinsicID()) { + case Intrinsic::amdgcn_kill: { + Check(CBI.getNumIndirectDests() == 1, + "Callbr amdgcn_kill only supports one indirect dest"); + bool Unreachable = isa(CBI.getIndirectDest(0)->begin()); + CallInst *Call = dyn_cast(CBI.getIndirectDest(0)->begin()); + Check(Unreachable || (Call && Call->getIntrinsicID() == + Intrinsic::amdgcn_unreachable), + "Callbr amdgcn_kill indirect dest needs to be unreachable"); + break; + } + default: + CheckFailed( + "Callbr currently only supports asm-goto and selected intrinsics"); + } + visitIntrinsicCall(CBI.getIntrinsicID(), CBI); + } else { + const InlineAsm *IA = cast(CBI.getCalledOperand()); + Check(!IA->canThrow(), "Unwinding from Callbr is not allowed"); - verifyInlineAsmCall(CBI); + verifyInlineAsmCall(CBI); + } visitTerminator(CBI); } @@ -5235,7 +5258,7 @@ void Verifier::visitInstruction(Instruction &I) { (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) || IsAttachedCallOperand(F, CBI, i)), "Cannot take the address of an intrinsic!", &I); - Check(!F->isIntrinsic() || isa(I) || + Check(!F->isIntrinsic() || isa(I) || isa(I) || F->getIntrinsicID() == Intrinsic::donothing || F->getIntrinsicID() == Intrinsic::seh_try_begin || F->getIntrinsicID() == Intrinsic::seh_try_end || diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index a69d64956d6d9..18d3381b420bd 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -480,11 +480,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { } else { // Test for successors as back edge BasicBlock *BB = N->getNodeAs(); - BranchInst *Term = cast(BB->getTerminator()); - - for (BasicBlock *Succ : Term->successors()) - if (Visited.count(Succ)) - Loops[Succ] = BB; + if (BranchInst *Term = dyn_cast(BB->getTerminator()); Term) + for (BasicBlock *Succ : Term->successors()) + if (Visited.count(Succ)) + Loops[Succ] = BB; } } @@ -516,7 +515,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { for (BasicBlock *P : predecessors(BB)) { // Ignore it if it's a branch from outside into our region entry - if (!ParentRegion->contains(P)) + if (!ParentRegion->contains(P) || !dyn_cast(P->getTerminator())) continue; Region *R = RI->getRegionFor(P); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 6608515e1cbbc..41036b5ec9194 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1900,11 +1900,11 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) { PBI->swapSuccessors(); } -bool llvm::hasOnlySimpleTerminator(const Function &F) { +bool llvm::hasOnlySimpleTerminator(const Function &F, bool AllowCallBr) { for (auto &BB : F) { auto *Term = BB.getTerminator(); if (!(isa(Term) || isa(Term) || - isa(Term))) + isa(Term) || (AllowCallBr && isa(Term)))) return false; } return true; diff --git a/llvm/test/Assembler/callbr.ll b/llvm/test/Assembler/callbr.ll new file mode 100644 index 0000000000000..0084e9763c62c --- /dev/null +++ b/llvm/test/Assembler/callbr.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S < %s | FileCheck %s +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +declare void @llvm.amdgcn.kill(i1) + +define void @test_kill(i1 %c) { +; CHECK-LABEL: define void @test_kill( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: callbr void @llvm.amdgcn.kill(i1 [[C]]) +; CHECK-NEXT: to label %[[CONT:.*]] [label %kill] +; CHECK: [[KILL:.*:]] +; CHECK-NEXT: unreachable +; CHECK: [[CONT]]: +; CHECK-NEXT: ret void +; + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] +kill: + unreachable +cont: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll new file mode 100644 index 0000000000000..52856b872ee2a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/callbr.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o - < %s | FileCheck --check-prefix=SELDAG %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -o - -global-isel < %s | FileCheck --check-prefix=GISEL %s + +; SELDAG-LABEL: test_kill: +; SELDAG-NEXT: ; %bb.0: +; SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SELDAG-NEXT: flat_load_dword v0, v[0:1] +; SELDAG-NEXT: v_and_b32_e32 v1, 1, v4 +; SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; SELDAG-NEXT: s_mov_b64 s[4:5], exec +; SELDAG-NEXT: s_andn2_b64 s[6:7], exec, vcc +; SELDAG-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] +; SELDAG-NEXT: s_cbranch_scc0 .LBB0_2 +; SELDAG-NEXT: ; %bb.1: +; SELDAG-NEXT: s_and_b64 exec, exec, s[4:5] +; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SELDAG-NEXT: flat_store_dword v[2:3], v0 +; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SELDAG-NEXT: s_setpc_b64 s[30:31] +; SELDAG-NEXT: .LBB0_2: +; SELDAG-NEXT: s_mov_b64 exec, 0 +; SELDAG-NEXT: s_endpgm + +; GISEL-LABEL: test_kill: +; GISEL-NEXT: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: flat_load_dword v0, v[0:1] +; GISEL-NEXT: v_and_b32_e32 v1, 1, v4 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: s_mov_b64 s[4:5], exec +; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc +; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] +; GISEL-NEXT: s_cbranch_scc0 .LBB0_2 +; GISEL-NEXT: ; %bb.1: +; GISEL-NEXT: s_and_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: flat_store_dword v[2:3], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB0_2: +; GISEL-NEXT: s_mov_b64 exec, 0 +; GISEL-NEXT: s_endpgm + +define void @test_kill(ptr %src, ptr %dst, i1 %c) { + %a = load i32, ptr %src, align 4 + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] +kill: + unreachable +cont: + store i32 %a, ptr %dst, align 4 + ret void +} + +; SELDAG-LABEL: test_kill_block_order: +; SELDAG-NEXT: ; %bb.0: +; SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SELDAG-NEXT: flat_load_dword v0, v[0:1] +; SELDAG-NEXT: v_and_b32_e32 v1, 1, v4 +; SELDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 +; SELDAG-NEXT: s_mov_b64 s[4:5], exec +; SELDAG-NEXT: s_andn2_b64 s[6:7], exec, vcc +; SELDAG-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] +; SELDAG-NEXT: s_cbranch_scc0 .LBB1_2 +; SELDAG-NEXT: ; %bb.1: +; SELDAG-NEXT: s_and_b64 exec, exec, s[4:5] +; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SELDAG-NEXT: flat_store_dword v[2:3], v0 +; SELDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SELDAG-NEXT: s_setpc_b64 s[30:31] +; SELDAG-NEXT: .LBB1_2: +; SELDAG-NEXT: s_mov_b64 exec, 0 +; SELDAG-NEXT: s_endpgm + +; GISEL-LABEL: test_kill_block_order: +; GISEL-NEXT: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: flat_load_dword v0, v[0:1] +; GISEL-NEXT: v_and_b32_e32 v1, 1, v4 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GISEL-NEXT: s_mov_b64 s[4:5], exec +; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc +; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] +; GISEL-NEXT: s_cbranch_scc0 .LBB1_2 +; GISEL-NEXT: ; %bb.1: +; GISEL-NEXT: s_and_b64 exec, exec, s[4:5] +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: flat_store_dword v[2:3], v0 +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB1_2: +; GISEL-NEXT: s_mov_b64 exec, 0 +; GISEL-NEXT: s_endpgm + +define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) { + %a = load i32, ptr %src, align 4 + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] +cont: + store i32 %a, ptr %dst, align 4 + ret void +kill: + unreachable +} diff --git a/llvm/test/Verifier/callbr.ll b/llvm/test/Verifier/callbr.ll index 9b819c5fed48b..29bd3397b8980 100644 --- a/llvm/test/Verifier/callbr.ll +++ b/llvm/test/Verifier/callbr.ll @@ -120,3 +120,50 @@ landingpad: %out = call i32 @llvm.callbr.landingpad.i32(i32 %0) ret i32 %out } + +declare void @llvm.amdgcn.kill(i1) + +; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest +define void @test_callbr_intrinsic_indirect0(i1 %c) { + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [] +kill: + unreachable +cont: + ret void +} + +; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest +define void @test_callbr_intrinsic_indirect2(i1 %c) { + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill1, label %kill2] +kill1: + unreachable +kill2: + unreachable +cont: + ret void +} + +; CHECK-NEXT: Callbr amdgcn_kill indirect dest needs to be unreachable +define void @test_callbr_intrinsic_no_unreachable(i1 %c) { + callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] +kill: + ret void +cont: + ret void +} + +; CHECK-NEXT: Callbr currently only supports asm-goto and selected intrinsics +declare i32 @llvm.amdgcn.workitem.id.x() +define void @test_callbr_intrinsic_unsupported() { + callbr i32 @llvm.amdgcn.workitem.id.x() to label %cont [] +cont: + ret void +} + +; CHECK-NEXT: Callbr: indirect function / invalid signature +define void @test_callbr_intrinsic_wrong_signature(ptr %ptr) { + %func = load ptr, ptr %ptr, align 8 + callbr void %func() to label %cont [] +cont: + ret void +} diff --git a/polly/test/ScopDetect/callbr.ll b/polly/test/ScopDetect/callbr.ll index 4182974693678..75f676afd79c4 100644 --- a/polly/test/ScopDetect/callbr.ll +++ b/polly/test/ScopDetect/callbr.ll @@ -1,10 +1,7 @@ -; RUN: opt %loadNPMPolly '-passes=print' -polly-detect-track-failures -disable-output -pass-remarks-missed=polly-detect < %s 2>&1 | FileCheck %s --check-prefix=REMARK -; RUN: opt %loadNPMPolly '-passes=print' -polly-detect-track-failures -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STAT -; REQUIRES: asserts +; RUN: opt %loadNPMPolly '-passes=print' -disable-output < %s 2>&1 | FileCheck %s -; REMARK: Branch from indirect terminator. - -; STAT: 1 polly-detect - Number of rejected regions: Branch from indirect terminator +; CHECK-LABEL: func +; CHECK-NOT: Valid target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"</sdvalue,></sdvalue,></bool,></sdvalue,></sdvalue,></evt,></sdvalue,></sdvalue,></bool,></sdvalue,></gcstatepointinst,></gcstatepointinst,></blockt,>