LLVM: lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

20#include "llvm/IR/IntrinsicsAMDGPU.h"

23

24#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"

25

26using namespace llvm;

27

28namespace {

29

30class AMDGPULowerKernelArguments : public FunctionPass {

31public:

32 static char ID;

33

35

37

38 void getAnalysisUsage(AnalysisUsage &AU) const override {

41 }

42};

43

44}

45

46

51

52

53

55 break;

56 }

57

58 return InsPt;

59}

60

64 return false;

65

71

72 const Align KernArgBaseAlign(16);

73 const uint64_t BaseOffset = ST.getExplicitKernelArgOffset();

74

76

77 const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign);

78 if (TotalKernArgSize == 0)

79 return false;

80

82 Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {},

83 nullptr, F.getName() + ".kernarg.segment");

84 KernArgSegment->addRetAttr(Attribute::NonNull);

87

88 uint64_t ExplicitArgOffset = 0;

90 const bool IsByRef = Arg.hasByRefAttr();

91 Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();

92 MaybeAlign ParamAlign = IsByRef ? Arg.getParamAlign() : std::nullopt;

93 Align ABITypeAlign = DL.getValueOrABITypeAlignment(ParamAlign, ArgTy);

94

96 uint64_t AllocSize = DL.getTypeAllocSize(ArgTy);

97

98 uint64_t EltOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;

99 ExplicitArgOffset = alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;

100

101

102 if (Arg.use_empty() || Arg.hasInRegAttr())

103 continue;

104

105

106

107 if (IsByRef) {

108 Value *ArgOffsetPtr = Builder.CreateConstInBoundsGEP1_64(

109 Builder.getInt8Ty(), KernArgSegment, EltOffset,

110 Arg.getName() + ".byval.kernarg.offset");

111

112 Value *CastOffsetPtr =

113 Builder.CreateAddrSpaceCast(ArgOffsetPtr, Arg.getType());

115 continue;

116 }

117

119

120

121

122

125 !ST.hasUsableDSOffset())

126 continue;

127

128

129

130 if (Arg.hasNoAliasAttr())

131 continue;

132 }

133

135 bool IsV3 = VT && VT->getNumElements() == 3;

137

139

140 int64_t AlignDownOffset = alignDown(EltOffset, 4);

141 int64_t OffsetDiff = EltOffset - AlignDownOffset;

143 KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);

144

146 Type *AdjustedArgTy;

147 if (DoShiftOpt) {

148

149

150

151

152

153

154

155 ArgPtr = Builder.CreateConstInBoundsGEP1_64(

156 Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,

157 Arg.getName() + ".kernarg.offset.align.down");

158 AdjustedArgTy = Builder.getInt32Ty();

159 } else {

160 ArgPtr = Builder.CreateConstInBoundsGEP1_64(

161 Builder.getInt8Ty(), KernArgSegment, EltOffset,

162 Arg.getName() + ".kernarg.offset");

163 AdjustedArgTy = ArgTy;

164 }

165

166 if (IsV3 && Size >= 32) {

168

169 AdjustedArgTy = V4Ty;

170 }

171

173 Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);

174 Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));

175

177

178 if (Arg.hasAttribute(Attribute::NoUndef))

179 Load->setMetadata(LLVMContext::MD_noundef, MDNode::get(Ctx, {}));

180

181 if (Arg.hasAttribute(Attribute::Range)) {

183 Arg.getAttribute(Attribute::Range).getValueAsConstantRange();

184 Load->setMetadata(LLVMContext::MD_range,

186 }

187

189 if (Arg.hasNonNullAttr())

190 Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {}));

191

192 uint64_t DerefBytes = Arg.getDereferenceableBytes();

193 if (DerefBytes != 0) {

194 Load->setMetadata(

195 LLVMContext::MD_dereferenceable,

198 ConstantInt::get(Builder.getInt64Ty(), DerefBytes))));

199 }

200

201 uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes();

202 if (DerefOrNullBytes != 0) {

203 Load->setMetadata(

204 LLVMContext::MD_dereferenceable_or_null,

206 MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(),

207 DerefOrNullBytes))));

208 }

209

210 if (MaybeAlign ParamAlign = Arg.getParamAlign()) {

211 Load->setMetadata(

212 LLVMContext::MD_align,

214 Builder.getInt64Ty(), ParamAlign->value()))));

215 }

216 }

217

218

219

220 if (DoShiftOpt) {

221 Value *ExtractBits = OffsetDiff == 0 ?

222 Load : Builder.CreateLShr(Load, OffsetDiff * 8);

223

225 Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy);

226 Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy,

227 Arg.getName() + ".load");

229 } else if (IsV3) {

230 Value *Shuf = Builder.CreateShuffleVector(Load, ArrayRef{0, 1, 2},

231 Arg.getName() + ".load");

233 } else {

234 Load->setName(Arg.getName() + ".load");

235 Arg.replaceAllUsesWith(Load);

236 }

237 }

238

241

242 return true;

243}

244

245bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {

246 auto &TPC = getAnalysis();

247 const TargetMachine &TM = TPC.getTM();

249}

250

252 "AMDGPU Lower Kernel Arguments", false, false)

255

256char AMDGPULowerKernelArguments::ID = 0;

257

259 return new AMDGPULowerKernelArguments();

260}

261

266

269 return PA;

270 }

271

273}

static BasicBlock::iterator getInsertPt(BasicBlock &BB)

Definition AMDGPULowerKernelArguments.cpp:47

static bool lowerKernelArguments(Function &F, const TargetMachine &TM)

Definition AMDGPULowerKernelArguments.cpp:61

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file contains the simple types necessary to represent the attributes associated with functions a...

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static bool runOnFunction(Function &F, bool PostInlining)

AMD GCN specific subclass of TargetSubtarget.

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Target-Independent Code Generator Pass Configuration Options pass.

PreservedAnalyses run(Function &, FunctionAnalysisManager &)

Definition AMDGPULowerKernelArguments.cpp:263

an instruction to allocate memory on the stack

LLVM_ABI bool isStaticAlloca() const

Return true if this alloca is in the entry block of the function and is a constant size.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

This class represents an incoming formal argument to a Function.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)

static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)

Return a uniquified Attribute object that has the specific alignment set.

LLVM Basic Block Representation.

LLVM_ABI const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

InstListType::iterator iterator

Instruction iterators...

Represents analyses that only rely on functions' control flow.

void addRetAttr(Attribute::AttrKind Kind)

Adds the attribute to the return value.

This class represents a function call, abstracting a target machine's calling convention.

This class represents a range of values.

A parsed version of the target data layout string in and methods for querying it.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Class to represent integer types.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

LLVM_ABI ConstantAsMetadata * createConstant(Constant *C)

Return the given constant as metadata.

LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)

Return metadata describing the range [Lo, Hi).

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

Primary interface to the complete machine description for the target machine.

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

Target-Independent Code Generator Pass Configuration Options.

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isAggregateType() const

Return true if the type is an aggregate type.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

constexpr T alignDown(U Value, V Align, W Skew=0)

Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.

FunctionPass * createAMDGPULowerKernelArgumentsPass()

Definition AMDGPULowerKernelArguments.cpp:258

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

This struct is a compact representation of a valid (non-zero power of two) alignment.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.