LLVM: lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

25

26using namespace llvm;

27

35 }

36

37 if (!ArgAlign)

38 ArgAlign = DL.getABITypeAlign(Ty);

39

40 return std::pair(Ty, *ArgAlign);

41}

42

43

45 const Function &EnqueuedBlock) {

46 const MDNode *Associated =

47 EnqueuedBlock.getMetadata(LLVMContext::MD_associated);

48 if (!Associated)

49 return "";

50

52 auto *RuntimeHandle =

54 if (!RuntimeHandle ||

55 RuntimeHandle->getSection() != ".amdgpu.kernel.runtime.handle")

56 return "";

57

61 return Name.str().str();

62}

63

64namespace llvm {

65

67 "amdgpu-dump-hsa-metadata",

68 cl::desc("Dump AMDGPU HSA Metadata"));

70 "amdgpu-verify-hsa-metadata",

71 cl::desc("Verify AMDGPU HSA Metadata"));

72

74

75

76

77

78

80 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';

81}

82

84 errs() << "AMDGPU HSA Metadata Parser Test: ";

85

87

88 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {

89 errs() << "FAIL\n";

90 return;

91 }

92

93 std::string ToHSAMetadataString;

95 FromHSAMetadataString.toYAML(StrOS);

96

97 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';

98 if (HSAMetadataString != ToHSAMetadataString) {

99 errs() << "Original input: " << HSAMetadataString << '\n'

100 << "Produced output: " << StrOS.str() << '\n';

101 }

102}

103

104std::optional

107 .Case("read_only", StringRef("read_only"))

111}

112

128 default:

129 return std::nullopt;

130 }

131}

132

136 if (TypeQual.contains("pipe"))

137 return "pipe";

138

140 .Case("image1d_t", "image")

141 .Case("image1d_array_t", "image")

142 .Case("image1d_buffer_t", "image")

143 .Case("image2d_t", "image")

144 .Case("image2d_array_t", "image")

145 .Case("image2d_array_depth_t", "image")

146 .Case("image2d_array_msaa_t", "image")

147 .Case("image2d_array_msaa_depth_t", "image")

148 .Case("image2d_depth_t", "image")

149 .Case("image2d_msaa_t", "image")

150 .Case("image2d_msaa_depth_t", "image")

151 .Case("image3d_t", "image")

152 .Case("sampler_t", "sampler")

153 .Case("queue_t", "queue")

156 ? "dynamic_shared_pointer"

157 : "global_buffer")

158 : "by_value");

159}

160

163 switch (Ty->getTypeID()) {

167

168 auto BitWidth = Ty->getIntegerBitWidth();

170 case 8:

171 return "char";

172 case 16:

173 return "short";

174 case 32:

175 return "int";

176 case 64:

177 return "long";

178 default:

180 }

181 }

183 return "half";

185 return "float";

187 return "double";

190 auto *ElTy = VecTy->getElementType();

191 auto NumElements = VecTy->getNumElements();

193 }

194 default:

195 return "unknown";

196 }

197}

198

202 if (Node->getNumOperands() != 3)

203 return Dims;

204

205 for (auto &Op : Node->operands())

206 Dims.push_back(Dims.getDocument()->getNode(

208 return Dims;

209}

210

217

223

225 auto *Node = Mod.getNamedMetadata("llvm.printf.fmts");

227 return;

228

230 for (auto *Op : Node->operands())

231 if (Op->getNumOperands())

232 Printf.push_back(Printf.getDocument()->getNode(

233 cast(Op->getOperand(0))->getString(), true));

235}

236

239

240 auto *Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");

241 if (Node || Node->getNumOperands())

242 return;

243 auto *Op0 = Node->getOperand(0);

244 if (Op0->getNumOperands() <= 1)

245 return;

246

253 Kern[".language_version"] = LanguageVersion;

254}

255

260 if (auto *Node = Func.getMetadata("reqd_work_group_size"))

262 if (auto *Node = Func.getMetadata("work_group_size_hint"))

264 if (auto *Node = Func.getMetadata("vec_type_hint")) {

269 true);

270 }

271

273 if (!HandleName.empty()) {

274 Kern[".device_enqueue_symbol"] =

276 }

277

278 if (Func.hasFnAttribute("device-init"))

280 else if (Func.hasFnAttribute("device-fini"))

282}

283

289 for (auto &Arg : Func.args()) {

290 if (Arg.hasAttribute("amdgpu-hidden-argument"))

291 continue;

292

294 }

295

297

298 Kern[".args"] = Args;

299}

300

304 const auto *Func = Arg.getParent();

307

309 Node = Func->getMetadata("kernel_arg_name");

310 if (Node && ArgNo < Node->getNumOperands())

314

316 Node = Func->getMetadata("kernel_arg_type");

317 if (Node && ArgNo < Node->getNumOperands())

319

321 Node = Func->getMetadata("kernel_arg_base_type");

322 if (Node && ArgNo < Node->getNumOperands())

324

326

329 ActAccQual = "read_only";

330 else if (Arg.hasAttribute(Attribute::WriteOnly))

331 ActAccQual = "write_only";

332 }

333

335 Node = Func->getMetadata("kernel_arg_access_qual");

336 if (Node && ArgNo < Node->getNumOperands())

338

340 Node = Func->getMetadata("kernel_arg_type_qual");

341 if (Node && ArgNo < Node->getNumOperands())

343

344 const DataLayout &DL = Func->getDataLayout();

345

348

349

353 }

354

355

359

362 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,

363 AccQual, TypeQual);

364}

365

371 auto Arg = Args.getDocument()->getMapNode();

372

373 if (!Name.empty())

374 Arg[".name"] = Arg.getDocument()->getNode(Name, true);

375 if (!TypeName.empty())

376 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, true);

377 auto Size = DL.getTypeAllocSize(Ty);

378 Arg[".size"] = Arg.getDocument()->getNode(Size);

380 Arg[".offset"] = Arg.getDocument()->getNode(Offset);

382 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, true);

383 if (PointeeAlign)

384 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());

385

388

389 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")

390 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,

391 true);

392

394 Arg[".access"] = Arg.getDocument()->getNode(*AQ, true);

395

397 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, true);

398

400 TypeQual.split(SplitTypeQuals, " ", -1, false);

402 if (Key == "const")

403 Arg[".is_const"] = Arg.getDocument()->getNode(true);

404 else if (Key == "restrict")

405 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);

406 else if (Key == "volatile")

407 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);

408 else if (Key == "pipe")

409 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);

410 }

411

412 Args.push_back(Arg);

413}

414

419

420 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);

421 if (!HiddenArgNumBytes)

422 return;

423

424 const Module *M = Func.getParent();

425 auto &DL = M->getDataLayout();

427

429

430 if (HiddenArgNumBytes >= 8)

432 Args);

433 if (HiddenArgNumBytes >= 16)

435 Args);

436 if (HiddenArgNumBytes >= 24)

438 Args);

439

440 auto *Int8PtrTy =

442

443 if (HiddenArgNumBytes >= 32) {

444

445

446

447 if (M->getNamedMetadata("llvm.printf.fmts"))

449 Args);

450 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))

452 Args);

453 else

455 }

456

457

458

459 if (HiddenArgNumBytes >= 40) {

460 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {

462 Args);

463 } else {

465 }

466 }

467

468 if (HiddenArgNumBytes >= 48) {

469 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {

471 Args);

472 } else {

474 }

475 }

476

477

478 if (HiddenArgNumBytes >= 56) {

479 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {

481 Args);

482 } else {

484 }

485 }

486}

487

491 unsigned CodeObjectVersion) const {

495

497

498 Align MaxKernArgAlign;

499 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(

501 Kern[".group_segment_fixed_size"] =

502 Kern.getDocument()->getNode(ProgramInfo.LDSSize);

503 DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],

506 DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],

509 }

510

512 Kern[".workgroup_processor_mode"] =

513 Kern.getDocument()->getNode(ProgramInfo.WgpMode);

514

515

516 Kern[".kernarg_segment_align"] =

517 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());

518 Kern[".wavefront_size"] =

524

525

529 }

530

531 Kern[".max_flat_workgroup_size"] =

532 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());

533

534 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();

535 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();

536 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();

537

538

539 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)

540 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);

541

542 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)

543 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);

544

545 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)

546 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);

547

548 Kern[".sgpr_spill_count"] =

549 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());

550 Kern[".vgpr_spill_count"] =

551 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());

552

553 return Kern;

554}

555

560

569

572 std::string HSAMetadataString;

575

580}

581

587 return;

588

589 auto CodeObjectVersion =

591 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);

592

593 auto Kernels =

595

597 {

598 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());

599 Kern[".symbol"] = Kern.getDocument()->getNode(

600 (Twine(Func.getName()) + Twine(".kd")).str(), true);

604 }

605

606 Kernels.push_back(Kern);

607}

608

609

610

611

612

619

624

625

626 if (ST.getImplicitArgNumBytes(Func) == 0)

627 return;

628

629 const Module *M = Func.getParent();

630 auto &DL = M->getDataLayout();

632

636

641

645

649

650

652

653 Offset += 8;

654

658

660

661 Offset += 6;

662 auto *Int8PtrTy =

664

665 if (M->getNamedMetadata("llvm.printf.fmts")) {

667 Args);

668 } else {

669 Offset += 8;

670 }

671

672 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {

674 Args);

675 } else {

676 Offset += 8;

677 }

678

679 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {

681 Args);

682 } else {

683 Offset += 8;

684 }

685

686 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))

688 else

689 Offset += 8;

690

691 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {

693 Args);

694 } else {

695 Offset += 8;

696 }

697

698 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {

700 Args);

701 } else {

702 Offset += 8;

703 }

704

705

708 Args);

709 } else {

710 Offset += 4;

711 }

712

713 Offset += 68;

714

715

716

717 if (!ST.hasApertureRegs()) {

720 } else {

721 Offset += 8;

722 }

723

726}

727

732

734 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())

736}

737

738

739

740

741

748

753

756 if (Attr.isFixedDims()) {

761 Kern[".cluster_dims"] = ClusterDimsNode;

762 }

763}

764

765}

766}

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

AMD GCN specific subclass of TargetSubtarget.

Module.h This file contains the declarations for the Module class.

Defines struct to track resource usage and hardware flags for kernels and entry functions.

bool isDynamicLDSUsed() const

unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const

unsigned getWavefrontSize() const

TargetLoweringObjectFile * getObjFileLowering() const override

virtual bool EmitHSAMetadata(msgpack::Document &HSAMetadata, bool Strict)

Emit HSA Metadata.

std::string toString() const

This class represents an incoming formal argument to a Function.

LLVM_ABI Type * getParamByRefType() const

If this is a byref argument, return its type.

LLVM_ABI bool hasNoAliasAttr() const

Return true if this argument has the noalias attribute.

LLVM_ABI bool hasByRefAttr() const

Return true if this argument has the byref attribute.

LLVM_ABI bool onlyReadsMemory() const

Return true if this argument has the readonly or readnone attribute.

LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const

Check if an argument has a given attribute.

const Function * getParent() const

unsigned getArgNo() const

Return the index of this formal argument in its containing function.

LLVM_ABI MaybeAlign getParamAlign() const

If this is a byval or inalloca argument, return its alignment.

A parsed version of the target data layout string in and methods for querying it.

MDNode * getMetadata(unsigned KindID) const

Get the current metadata attachments for the given kind, if any.

const MDOperand & getOperand(unsigned I) const

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const TargetMachine & getTarget() const

getTarget - Return the target machine this machine code is compiled with

A Module instance is used to store all the information related to an LLVM module.

static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)

This constructs a pointer to an object of the specified type in a numbered address space.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

GCNUserSGPRUsageInfo & getUserSGPRInfo()

AMDGPU::ClusterDimsAttr getClusterDims() const

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

bool contains(StringRef Other) const

Return true if the given string is a substring of *this, and false otherwise.

A switch()-like statement whose cases are string literals.

StringSwitch & Case(StringLiteral S, T Value)

Mangler & getMangler() const

void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV, Mangler &Mang, bool MayAlwaysUsePrivate=false) const

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isPointerTy() const

True if this is an instance of PointerType.

@ HalfTyID

16-bit floating point type

@ FloatTyID

32-bit floating point type

@ IntegerTyID

Arbitrary bit width integers.

@ FixedVectorTyID

Fixed width SIMD vector type.

@ DoubleTyID

64-bit floating point type

static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

A DocNode that is an array.

void push_back(DocNode N)

ArrayDocNode & getArray(bool Convert=false)

Get an ArrayDocNode for an array node.

Document * getDocument() const

Simple in-memory representation of a document of msgpack objects with ability to find and create arra...

DocNode getNode()

Create a nil node associated with this Document.

ArrayDocNode getArrayNode()

Create an empty Array node associated with this Document.

LLVM_ABI void toYAML(raw_ostream &OS)

Convert MsgPack Document to YAML text.

LLVM_ABI bool fromYAML(StringRef S)

Read YAML text into the MsgPack document. Returns false on failure.

A raw_ostream that writes to an std::string.

std::string & str()

Returns the string's reference.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ PRIVATE_ADDRESS

Address space for private memory.

constexpr uint32_t VersionMajorV5

HSA metadata major version for code object V5.

constexpr uint32_t VersionMinorV4

HSA metadata minor version for code object V4.

constexpr uint32_t VersionMinorV5

HSA metadata minor version for code object V5.

constexpr uint32_t VersionMinorV6

HSA metadata minor version for code object V6.

constexpr uint32_t VersionMajorV6

HSA metadata major version for code object V6.

constexpr uint32_t VersionMajorV4

HSA metadata major version for code object V4.

unsigned getAMDHSACodeObjectVersion(const Module &M)

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ SPIR_KERNEL

Used for SPIR kernel functions.

std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)

Extract a Value from Metadata.

This is an optimization pass for GlobalISel generic memory operations.

static cl::opt< bool > VerifyHSAMetadata("amdgpu-verify-hsa-metadata", cl::desc("Verify AMDGPU HSA Metadata"))

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty

static cl::opt< bool > DumpHSAMetadata("amdgpu-dump-hsa-metadata", cl::desc("Dump AMDGPU HSA Metadata"))

LLVM_GET_TYPE_NAME_CONSTEXPR StringRef getTypeName()

We provide a function which tries to compute the (demangled) name of a type statically.

FunctionAddr VTableAddr uintptr_t uintptr_t Version

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

@ Mod

The access may modify the value stored in memory.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

This struct is a compact representation of a valid (non-zero power of two) alignment.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

Align valueOrOne() const

For convenience, returns a valid alignment or 1 if undefined.

Track resource usage for kernels / entry functions.

const MCExpr * NumAccVGPR

const MCExpr * DynamicCallStack

const MCExpr * ScratchSize