LLVM: lib/Target/AMDGPU/AMDGPULowerExecSync.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

29

30#include

31

32#define DEBUG_TYPE "amdgpu-lower-exec-sync"

33

34using namespace llvm;

35using namespace AMDGPU;

36

37namespace {

38

39

40

43 bool NeedsReplacement = false;

44 for (Use &U : GV->uses()) {

48 NeedsReplacement = true;

49 break;

50 }

51 }

52 }

53 if (!NeedsReplacement)

54 return GV;

55

65 U.getUser()->replaceUsesOfWith(GV, NewGV);

66 }

67 }

68 }

69 return NewGV;

70}

71

72

73

80 GV->setMetadata(LLVMContext::MD_absolute_symbol,

82}

83

85 sort(V, [](const auto *L, const auto *R) {

86 return L->getName() < R->getName();

87 });

88 return {std::move(V)};

89}

90

91

92static bool lowerExecSyncGlobalVariables(

97

98 int NumAbsolutes = 0;

100 for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {

103 continue;

104

105

106

107 if (LDSToKernelsThatNeedToAccessItIndirectly[GV].size() > 1) {

109 } else {

110

111

112 LDSUsesInfo.direct_access[*K.second.begin()].insert(GV);

113 }

114 LDSToKernelsThatNeedToAccessItIndirectly.erase(GV);

115 }

116 OrderedGVs = sortByName(std::move(OrderedGVs));

119 unsigned BarId = NumAbsolutes + 1;

120 unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;

121 NumAbsolutes += BarCnt;

122

123

124

125 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;

126 recordLDSAbsoluteAddress(&M, GV, Offset);

127 }

128 OrderedGVs.clear();

129

130

131

132

138 }

139 OrderedKernels = sortByName(std::move(OrderedKernels));

140

142 for (Function *F : OrderedKernels) {

145 continue;

146

149

150 continue;

151 }

153 }

154 OrderedGVs = sortByName(std::move(OrderedGVs));

156

157

158 auto NewGV = uniquifyGVPerKernel(M, GV, F);

159 Changed |= (NewGV != GV);

161 unsigned BarId = Kernel2BarId[F];

162 BarId += NumAbsolutes + 1;

163 unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;

164 Kernel2BarId[F] += BarCnt;

165 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;

166 recordLDSAbsoluteAddress(&M, NewGV, Offset);

167 }

168 OrderedGVs.clear();

169 }

170

175 K.second.erase(GV);

176 }

177 }

179}

180

181static bool runLowerExecSyncGlobals(Module &M) {

185

186

187

189

190

196 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);

197 }

198 }

199

201

202 Changed |= lowerExecSyncGlobalVariables(

203 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);

204 }

206}

207

208class AMDGPULowerExecSyncLegacy : public ModulePass {

209public:

210 static char ID;

211 AMDGPULowerExecSyncLegacy() : ModulePass(ID) {}

212 bool runOnModule(Module &M) override;

213};

214

215}

216

217char AMDGPULowerExecSyncLegacy::ID = 0;

219

221 "AMDGPU lowering of execution synchronization", false,

222 false)

225 "AMDGPU lowering of execution synchronization", false,

227

228bool AMDGPULowerExecSyncLegacy::runOnModule(Module &M) {

229 return runLowerExecSyncGlobals(M);

230}

231

233 return new AMDGPULowerExecSyncLegacy();

234}

235

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Target-Independent Code Generator Pass Configuration Options pass.

The basic data container for the call graph of a Module of IR.

A parsed version of the target data layout string in and methods for querying it.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set a particular kind of metadata attachment.

LinkageTypes getLinkage() const

LLVM_ABI bool isAbsoluteSymbolRef() const

Returns whether this is a reference to an absolute symbol.

ThreadLocalMode getThreadLocalMode() const

PointerType * getType() const

Global values are always pointers.

Type * getValueType() const

const Constant * getInitializer() const

getInitializer - Return the initializer for this global variable.

LLVM_ABI void copyAttributesFrom(const GlobalVariable *Src)

copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...

bool isConstant() const

If the value is a global constant, its value is immutable throughout the runtime execution of the pro...

This is an important class for using LLVM in a threaded context.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

unsigned getAddressSpace() const

Return the address space of the Pointer type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Target-Independent Code Generator Pass Configuration Options.

A Use represents the edge between a Value definition and its users.

iterator_range< use_iterator > uses()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

@ LOCAL_ADDRESS

Address space for local memory.

@ BARRIER_SCOPE_WORKGROUP

LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)

LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)

TargetExtType * isNamedBarrier(const GlobalVariable &GV)

bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)

DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

char & AMDGPULowerExecSyncLegacyPassID

Definition AMDGPULowerExecSync.cpp:218

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

void sort(IteratorTy Start, IteratorTy End)

ModulePass * createAMDGPULowerExecSyncLegacyPass()

Definition AMDGPULowerExecSync.cpp:232

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition AMDGPULowerExecSync.cpp:236

FunctionVariableMap direct_access

FunctionVariableMap indirect_access