LLVM: lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

19#include "llvm/IR/IntrinsicsAMDGPU.h"

22

23#define DEBUG_TYPE "amdgpu-lower-intrinsics"

24

25using namespace llvm;

26

27namespace {

28

29class AMDGPULowerIntrinsicsImpl {

30public:

33

35 : M(M), TM(TM) {}

36

37 bool run();

38

39private:

41};

42

43class AMDGPULowerIntrinsicsLegacy : public ModulePass {

44public:

45 static char ID;

46

47 AMDGPULowerIntrinsicsLegacy() : ModulePass(ID) {}

48

49 bool runOnModule(Module &M) override;

50

51 void getAnalysisUsage(AnalysisUsage &AU) const override {

53 }

54};

55

59 Callback(CI);

60 }

61}

62

63}

64

65bool AMDGPULowerIntrinsicsImpl::run() {

67

68 for (Function &F : M) {

69 switch (F.getIntrinsicID()) {

70 default:

71 continue;

72 case Intrinsic::amdgcn_s_barrier:

73 case Intrinsic::amdgcn_s_barrier_signal:

74 case Intrinsic::amdgcn_s_barrier_signal_isfirst:

75 case Intrinsic::amdgcn_s_barrier_wait:

76 case Intrinsic::amdgcn_s_cluster_barrier:

78 break;

79 }

80 }

81

83}

84

85

86

87bool AMDGPULowerIntrinsicsImpl::visitBarrier(IntrinsicInst &I) {

88 assert(I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||

89 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal ||

90 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst ||

91 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait ||

92 I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier);

93

94 const GCNSubtarget &ST = TM.getSubtarget(*I.getFunction());

95 bool IsSingleWaveWG = false;

96

97 if (TM.getOptLevel() > CodeGenOptLevel::None) {

98 unsigned WGMaxSize = ST.getFlatWorkGroupSizes(*I.getFunction()).second;

99 IsSingleWaveWG = WGMaxSize <= ST.getWavefrontSize();

100 }

101

103

104

105

106 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_cluster_barrier) {

107

108

109 if (IsSingleWaveWG) {

110 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_wave_barrier, {});

111 } else {

114 Value *IsFirst = B.CreateIntrinsic(

115 B.getInt1Ty(), Intrinsic::amdgcn_s_barrier_signal_isfirst,

116 {BarrierID_32});

117 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,

118 {BarrierID_16});

119

122 B.SetInsertPoint(ThenTerm);

123 }

124

125

126

129 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal,

130 {BarrierID_32});

131

132 B.SetInsertPoint(&I);

133 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,

134 {BarrierID_16});

135

136 I.eraseFromParent();

137 return true;

138 }

139

140 bool IsWorkgroupScope = false;

141

142 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait ||

143 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal ||

144 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_signal_isfirst) {

145 int BarrierID = cast(I.getArgOperand(0))->getSExtValue();

150 IsWorkgroupScope = true;

151 } else {

152 assert(I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier);

153 IsWorkgroupScope = true;

154 }

155

156 if (IsWorkgroupScope && IsSingleWaveWG) {

157

158 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier ||

159 I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier_wait) {

160 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_wave_barrier, {});

161 } else if (I.getIntrinsicID() ==

162 Intrinsic::amdgcn_s_barrier_signal_isfirst) {

163

164 I.replaceAllUsesWith(B.getInt1(true));

165 }

166 I.eraseFromParent();

167 return true;

168 }

169

170 if (I.getIntrinsicID() == Intrinsic::amdgcn_s_barrier &&

171 ST.hasSplitBarriers()) {

172

175 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_signal,

176 {BarrierID_32});

177 B.CreateIntrinsic(B.getVoidTy(), Intrinsic::amdgcn_s_barrier_wait,

178 {BarrierID_16});

179 I.eraseFromParent();

180 return true;

181 }

182

183 return false;

184}

185

188 AMDGPULowerIntrinsicsImpl Impl(M, TM);

189 if (!Impl.run())

192}

193

194bool AMDGPULowerIntrinsicsLegacy::runOnModule(Module &M) {

195 auto &TPC = getAnalysis();

197

198 AMDGPULowerIntrinsicsImpl Impl(M, TM);

199 return Impl.run();

200}

201

202#define PASS_DESC "AMDGPU lower intrinsics"

204 false)

208

209char AMDGPULowerIntrinsicsLegacy::ID = 0;

210

212 return new AMDGPULowerIntrinsicsLegacy;

213}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

The AMDGPU TargetMachine interface definition for hw codegen targets.

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

AMD GCN specific subclass of TargetSubtarget.

uint64_t IntrinsicInst * II

ModuleAnalysisManager MAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

static bool forEachCall(Function &Intrin, T Callback)

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

A wrapper class for inspecting calls to intrinsic functions.

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

Target-Independent Code Generator Pass Configuration Options.

iterator_range< user_iterator > users()

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

ModulePass * createAMDGPULowerIntrinsicsLegacyPass()

Definition AMDGPULowerIntrinsics.cpp:211

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)

Split the containing block at the specified instruction - everything before SplitBefore stays in the ...

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)

Definition AMDGPULowerIntrinsics.cpp:186