LLVM: lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
19#include "llvm/IR/IntrinsicsAMDGPU.h"
22
23#define DEBUG_TYPE "amdgpu-lower-intrinsics"
24
25using namespace llvm;
26
27namespace {
28
29class AMDGPULowerIntrinsicsImpl {
30public:
33
35 : M(M), TM(TM) {}
36
37 bool run();
38
39private:
41};
42
43class AMDGPULowerIntrinsicsLegacy : public ModulePass {
44public:
45 static char ID;
46
47 AMDGPULowerIntrinsicsLegacy() : ModulePass(ID) {}
48
49 bool runOnModule(Module &M) override;
50
51 void getAnalysisUsage(AnalysisUsage &AU) const override {
53 }
54};
55
59 Callback(CI);
60 }
61}
62
63}
64
65bool AMDGPULowerIntrinsicsImpl::run() {
67
68 for (Function &F : M) {
69 switch (F.getIntrinsicID()) {
70 default:
71 continue;
72 case Intrinsic::amdgcn_s_barrier:
73 case Intrinsic::amdgcn_s_barrier_signal:
74 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
75 case Intrinsic::amdgcn_s_barrier_wait:
76 case Intrinsic::amdgcn_s_cluster_barrier:
78 break;
79 }
80 }
81
83}
84
85
86
87bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {
88 assert(I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||
89 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal ||
90 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst ||
91 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait ||
92 I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier);
93
94 const GCNSubtarget &ST = TM.getSubtarget(*I.getFunction());
95 bool IsSingleWaveWG = false;
96
97 if (TM.getOptLevel() > CodeGenOptLevel::None) {
98 unsigned WGMaxSize = ST.getFlatWorkGroupSizes(*I.getFunction()).second;
99 IsSingleWaveWG = WGMaxSize <= ST.getWavefrontSize();
100 }
101
103
104
105
106 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier) {
107
108
109 if (IsSingleWaveWG) {
110 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_wave_barrier, {});
111 } else {
114 Value *IsFirst = B.CreateIntrinsic(
115 B.getInt1Ty(), Intrinsic::amdgcn_s_barrier_signal_isfirst,
116 {BarrierID_32});
117 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,
118 {BarrierID_16});
119
122 B.SetInsertPoint(ThenTerm);
123 }
124
125
126
129 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal,
130 {BarrierID_32});
131
133 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,
134 {BarrierID_16});
135
136 I.eraseFromParent();
137 return true;
138 }
139
140 bool IsWorkgroupScope = false;
141
142 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait ||
143 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal ||
144 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst) {
145 int BarrierID = cast(I.getArgOperand(0))->getSExtValue();
150 IsWorkgroupScope = true;
151 } else {
152 assert(I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier);
153 IsWorkgroupScope = true;
154 }
155
156 if (IsWorkgroupScope && IsSingleWaveWG) {
157
158 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||
159 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait) {
160 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_wave_barrier, {});
161 } else if (I.getIntrinsicID() ==
162 Intrinsic::amdgcn_s_barrier_signal_isfirst) {
163
164 I.replaceAllUsesWith(B.getInt1(true));
165 }
166 I.eraseFromParent();
167 return true;
168 }
169
170 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier &&
171 ST.hasSplitBarriers()) {
172
175 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal,
176 {BarrierID_32});
177 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,
178 {BarrierID_16});
179 I.eraseFromParent();
180 return true;
181 }
182
183 return false;
184}
185
188 AMDGPULowerIntrinsicsImpl Impl(M, TM);
189 if (!Impl.run())
192}
193
194bool AMDGPULowerIntrinsicsLegacy::runOnModule(Module &M) {
195 auto &TPC = getAnalysis();
197
198 AMDGPULowerIntrinsicsImpl Impl(M, TM);
199 return Impl.run();
200}
201
202#define PASS_DESC "AMDGPU lower intrinsics"
204 false)
208
209char AMDGPULowerIntrinsicsLegacy::ID = 0;
210
212 return new AMDGPULowerIntrinsicsLegacy;
213}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
ModuleAnalysisManager MAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static bool forEachCall(Function &Intrin, T Callback)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Target-Independent Code Generator Pass Configuration Options.
iterator_range< user_iterator > users()
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
ModulePass * createAMDGPULowerIntrinsicsLegacyPass()
Definition AMDGPULowerIntrinsics.cpp:211
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition AMDGPULowerIntrinsics.cpp:186