LLVM: lib/Target/AMDGPU/AMDGPUAttributor.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

16#include "llvm/IR/IntrinsicsAMDGPU.h"

17#include "llvm/IR/IntrinsicsR600.h"

20

21#define DEBUG_TYPE "amdgpu-attributor"

22

23using namespace llvm;

24

26 "amdgpu-indirect-call-specialization-threshold",

28 "A threshold controls whether an indirect call will be specialized"),

30

31#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,

32

34#include "AMDGPUAttributes.def"

36};

37

38#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,

39

42#include "AMDGPUAttributes.def"

45};

46

47#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},

48static constexpr std::pair<ImplicitArgumentMask, StringLiteral>

50#include "AMDGPUAttributes.def"

51};

52

53

54

55

56

57

60 bool HasApertureRegs, bool SupportsGetDoorBellID,

61 unsigned CodeObjectVersion) {

62 switch (ID) {

63 case Intrinsic::amdgcn_workitem_id_x:

64 NonKernelOnly = true;

65 return WORKITEM_ID_X;

66 case Intrinsic::amdgcn_workgroup_id_x:

67 NonKernelOnly = true;

68 return WORKGROUP_ID_X;

69 case Intrinsic::amdgcn_workitem_id_y:

70 case Intrinsic::r600_read_tidig_y:

71 return WORKITEM_ID_Y;

72 case Intrinsic::amdgcn_workitem_id_z:

73 case Intrinsic::r600_read_tidig_z:

74 return WORKITEM_ID_Z;

75 case Intrinsic::amdgcn_workgroup_id_y:

76 case Intrinsic::r600_read_tgid_y:

77 return WORKGROUP_ID_Y;

78 case Intrinsic::amdgcn_workgroup_id_z:

79 case Intrinsic::r600_read_tgid_z:

80 return WORKGROUP_ID_Z;

81 case Intrinsic::amdgcn_cluster_id_x:

82 NonKernelOnly = true;

83 return CLUSTER_ID_X;

84 case Intrinsic::amdgcn_cluster_id_y:

85 return CLUSTER_ID_Y;

86 case Intrinsic::amdgcn_cluster_id_z:

87 return CLUSTER_ID_Z;

88 case Intrinsic::amdgcn_lds_kernel_id:

89 return LDS_KERNEL_ID;

90 case Intrinsic::amdgcn_dispatch_ptr:

91 return DISPATCH_PTR;

92 case Intrinsic::amdgcn_dispatch_id:

93 return DISPATCH_ID;

94 case Intrinsic::amdgcn_implicitarg_ptr:

95 return IMPLICIT_ARG_PTR;

96

97

98 case Intrinsic::amdgcn_queue_ptr:

100 return QUEUE_PTR;

101 case Intrinsic::amdgcn_is_shared:

102 case Intrinsic::amdgcn_is_private:

103 if (HasApertureRegs)

105

106

107

109 : QUEUE_PTR;

110 case Intrinsic:🪤

111 case Intrinsic::debugtrap:

112 case Intrinsic::ubsantrap:

113 if (SupportsGetDoorBellID)

115 : QUEUE_PTR;

117 return QUEUE_PTR;

118 default:

120 }

121}

122

126

129 if (!GV)

130 return false;

133}

134

135

137 return F.hasFnAttribute(Attribute::SanitizeAddress) ||

138 F.hasFnAttribute(Attribute::SanitizeThread) ||

139 F.hasFnAttribute(Attribute::SanitizeMemory) ||

140 F.hasFnAttribute(Attribute::SanitizeHWAddress) ||

141 F.hasFnAttribute(Attribute::SanitizeMemTag);

142}

143

144namespace {

146public:

147 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,

149 SetVector<Function *> *CGSCC, TargetMachine &TM)

152

153 TargetMachine &TM;

154

155 enum ConstantStatus : uint8_t {

156 NONE = 0,

157 DS_GLOBAL = 1 << 0,

158 ADDR_SPACE_CAST_PRIVATE_TO_FLAT = 1 << 1,

159 ADDR_SPACE_CAST_LOCAL_TO_FLAT = 1 << 2,

160 ADDR_SPACE_CAST_BOTH_TO_FLAT =

161 ADDR_SPACE_CAST_PRIVATE_TO_FLAT | ADDR_SPACE_CAST_LOCAL_TO_FLAT

162 };

163

164

165 bool hasApertureRegs(Function &F) {

166 const GCNSubtarget &ST = TM.getSubtarget(F);

167 return ST.hasApertureRegs();

168 }

169

170

171 bool supportsGetDoorbellID(Function &F) {

172 const GCNSubtarget &ST = TM.getSubtarget(F);

173 return ST.supportsGetDoorbellID();

174 }

175

176 std::optional<std::pair<unsigned, unsigned>>

177 getFlatWorkGroupSizeAttr(const Function &F) const {

179 if (!R)

180 return std::nullopt;

181 return std::make_pair(R->first, *(R->second));

182 }

183

184 std::pair<unsigned, unsigned>

185 getDefaultFlatWorkGroupSize(const Function &F) const {

186 const GCNSubtarget &ST = TM.getSubtarget(F);

187 return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());

188 }

189

190 std::pair<unsigned, unsigned>

191 getMaximumFlatWorkGroupRange(const Function &F) {

192 const GCNSubtarget &ST = TM.getSubtarget(F);

193 return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};

194 }

195

196 SmallVector getMaxNumWorkGroups(const Function &F) {

197 const GCNSubtarget &ST = TM.getSubtarget(F);

198 return ST.getMaxNumWorkGroups(F);

199 }

200

201

202 unsigned getCodeObjectVersion() const { return CodeObjectVersion; }

203

204

205

206

207 std::pair<unsigned, unsigned>

208 getWavesPerEU(const Function &F,

209 std::pair<unsigned, unsigned> FlatWorkGroupSize) {

210 const GCNSubtarget &ST = TM.getSubtarget(F);

211 return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);

212 }

213

214 std::optional<std::pair<unsigned, unsigned>>

215 getWavesPerEUAttr(const Function &F) {

217 true);

218 if (!Val)

219 return std::nullopt;

220 if (!Val->second) {

221 const GCNSubtarget &ST = TM.getSubtarget(F);

222 Val->second = ST.getMaxWavesPerEU();

223 }

224 return std::make_pair(Val->first, *(Val->second));

225 }

226

227 std::pair<unsigned, unsigned>

228 getEffectiveWavesPerEU(const Function &F,

229 std::pair<unsigned, unsigned> WavesPerEU,

230 std::pair<unsigned, unsigned> FlatWorkGroupSize) {

231 const GCNSubtarget &ST = TM.getSubtarget(F);

232 return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,

233 getLDSSize(F));

234 }

235

237 const GCNSubtarget &ST = TM.getSubtarget(F);

238 return ST.getMaxWavesPerEU();

239 }

240

241 unsigned getMaxAddrSpace() const override {

243 }

244

245private:

246

247

248 static uint8_t visitConstExpr(const ConstantExpr *CE) {

249 uint8_t Status = NONE;

250

251 if (CE->getOpcode() == Instruction::AddrSpaceCast) {

252 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();

254 Status |= ADDR_SPACE_CAST_PRIVATE_TO_FLAT;

256 Status |= ADDR_SPACE_CAST_LOCAL_TO_FLAT;

257 }

258

259 return Status;

260 }

261

262

263

264 static unsigned getLDSSize(const Function &F) {

266 {0, UINT32_MAX}, true)

267 .first;

268 }

269

270

271 uint8_t getConstantAccess(const Constant *C,

272 SmallPtrSetImpl<const Constant *> &Visited) {

273 auto It = ConstantStatus.find(C);

274 if (It != ConstantStatus.end())

275 return It->second;

276

280

282 Result |= visitConstExpr(CE);

283

284 for (const Use &U : C->operands()) {

286 if (!OpC || !Visited.insert(OpC).second)

287 continue;

288

289 Result |= getConstantAccess(OpC, Visited);

290 }

292 }

293

294public:

295

296 bool needsQueuePtr(const Constant *C, Function &Fn) {

298 bool HasAperture = hasApertureRegs(Fn);

299

300

301 if (!IsNonEntryFunc && HasAperture)

302 return false;

303

304 SmallPtrSet<const Constant *, 8> Visited;

305 uint8_t Access = getConstantAccess(C, Visited);

306

307

308 if (IsNonEntryFunc && (Access & DS_GLOBAL))

309 return true;

310

311 return !HasAperture && (Access & ADDR_SPACE_CAST_BOTH_TO_FLAT);

312 }

313

314 bool checkConstForAddrSpaceCastFromPrivate(const Constant *C) {

315 SmallPtrSet<const Constant *, 8> Visited;

316 uint8_t Access = getConstantAccess(C, Visited);

317 return Access & ADDR_SPACE_CAST_PRIVATE_TO_FLAT;

318 }

319

320private:

321

322 DenseMap<const Constant *, uint8_t> ConstantStatus;

323 const unsigned CodeObjectVersion;

324};

325

326struct AAAMDAttributes

327 : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

328 AbstractAttribute> {

329 using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

330 AbstractAttribute>;

331

332 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

333

334

335 static AAAMDAttributes &createForPosition(const IRPosition &IRP,

336 Attributor &A);

337

338

339 StringRef getName() const override { return "AAAMDAttributes"; }

340

341

342 const char *getIdAddr() const override { return &ID; }

343

344

345

346 static bool classof(const AbstractAttribute *AA) {

348 }

349

350

351 static const char ID;

352};

353const char AAAMDAttributes::ID = 0;

354

355struct AAUniformWorkGroupSize

356 : public StateWrapper<BooleanState, AbstractAttribute> {

357 using Base = StateWrapper<BooleanState, AbstractAttribute>;

358 AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

359

360

361 static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,

362 Attributor &A);

363

364

365 StringRef getName() const override { return "AAUniformWorkGroupSize"; }

366

367

368 const char *getIdAddr() const override { return &ID; }

369

370

371

372 static bool classof(const AbstractAttribute *AA) {

374 }

375

376

377 static const char ID;

378};

379const char AAUniformWorkGroupSize::ID = 0;

380

381struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {

382 AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)

383 : AAUniformWorkGroupSize(IRP, A) {}

384

385 void initialize(Attributor &A) override {

386 Function *F = getAssociatedFunction();

387 CallingConv::ID CC = F->getCallingConv();

388

389 if (CC != CallingConv::AMDGPU_KERNEL)

390 return;

391

392 bool InitialValue = false;

393 if (F->hasFnAttribute("uniform-work-group-size"))

394 InitialValue =

395 F->getFnAttribute("uniform-work-group-size").getValueAsString() ==

396 "true";

397

398 if (InitialValue)

399 indicateOptimisticFixpoint();

400 else

401 indicatePessimisticFixpoint();

402 }

403

404 ChangeStatus updateImpl(Attributor &A) override {

405 ChangeStatus Change = ChangeStatus::UNCHANGED;

406

407 auto CheckCallSite = [&](AbstractCallSite CS) {

408 Function *Caller = CS.getInstruction()->getFunction();

410 << "->" << getAssociatedFunction()->getName() << "\n");

411

412 const auto *CallerInfo = A.getAAFor(

414 if (!CallerInfo || !CallerInfo->isValidState())

415 return false;

416

418 CallerInfo->getState());

419

420 return true;

421 };

422

423 bool AllCallSitesKnown = true;

424 if (A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

425 return indicatePessimisticFixpoint();

426

427 return Change;

428 }

429

432 LLVMContext &Ctx = getAssociatedFunction()->getContext();

433

434 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",

435 getAssumed() ? "true" : "false"));

436 return A.manifestAttrs(getIRPosition(), AttrList,

437 true);

438 }

439

440 bool isValidState() const override {

441

442 return true;

443 }

444

445 const std::string getAsStr(Attributor *) const override {

446 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";

447 }

448

449

450 void trackStatistics() const override {}

451};

452

453AAUniformWorkGroupSize &

454AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,

457 return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);

459 "AAUniformWorkGroupSize is only valid for function position");

460}

461

462struct AAAMDAttributesFunction : public AAAMDAttributes {

463 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)

464 : AAAMDAttributes(IRP, A) {}

465

466 void initialize(Attributor &A) override {

467 Function *F = getAssociatedFunction();

468

469

470

471

472

473

474

476 if (HasSanitizerAttrs) {

477 removeAssumedBits(IMPLICIT_ARG_PTR);

478 removeAssumedBits(HOSTCALL_PTR);

479 removeAssumedBits(FLAT_SCRATCH_INIT);

480 }

481

483 if (HasSanitizerAttrs &&

484 (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR ||

485 Attr.first == FLAT_SCRATCH_INIT))

486 continue;

487

488 if (F->hasFnAttribute(Attr.second))

489 addKnownBits(Attr.first);

490 }

491

492 if (F->isDeclaration())

493 return;

494

495

496

498 indicatePessimisticFixpoint();

499 return;

500 }

501 }

502

503 ChangeStatus updateImpl(Attributor &A) override {

504 Function *F = getAssociatedFunction();

505

506 auto OrigAssumed = getAssumed();

507

508

509 const AACallEdges *AAEdges = A.getAAFor(

510 *this, this->getIRPosition(), DepClassTy::REQUIRED);

513 return indicatePessimisticFixpoint();

514

516

517 bool NeedsImplicit = false;

518 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

519 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

520 bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);

521 unsigned COV = InfoCache.getCodeObjectVersion();

522

526 const AAAMDAttributes *AAAMD = A.getAAFor(

528 if (!AAAMD || !AAAMD->isValidState())

529 return indicatePessimisticFixpoint();

530 *this &= *AAAMD;

531 continue;

532 }

533

534 bool NonKernelOnly = false;

537 HasApertureRegs, SupportsGetDoorbellID, COV);

538

540

541

542

543

544

545

546

547

548 if (Callee->hasFnAttribute(Attribute::NoCallback))

549 return indicatePessimisticFixpoint();

550 continue;

551 }

552

554 if ((IsNonEntryFunc || !NonKernelOnly))

555 removeAssumedBits(AttrMask);

556 }

557 }

558

559

560 if (NeedsImplicit)

561 removeAssumedBits(IMPLICIT_ARG_PTR);

562

563 if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {

564

565

566 if (COV >= 5)

567 removeAssumedBits(IMPLICIT_ARG_PTR);

568 else

569 removeAssumedBits(QUEUE_PTR);

570 }

571

572 if (funcRetrievesMultigridSyncArg(A, COV)) {

573 assert(!isAssumed(IMPLICIT_ARG_PTR) &&

574 "multigrid_sync_arg needs implicitarg_ptr");

575 removeAssumedBits(MULTIGRID_SYNC_ARG);

576 }

577

578 if (funcRetrievesHostcallPtr(A, COV)) {

579 assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");

580 removeAssumedBits(HOSTCALL_PTR);

581 }

582

583 if (funcRetrievesHeapPtr(A, COV)) {

584 assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");

585 removeAssumedBits(HEAP_PTR);

586 }

587

588 if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {

589 assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");

590 removeAssumedBits(QUEUE_PTR);

591 }

592

593 if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {

594 removeAssumedBits(LDS_KERNEL_ID);

595 }

596

597 if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))

598 removeAssumedBits(DEFAULT_QUEUE);

599

600 if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))

601 removeAssumedBits(COMPLETION_ACTION);

602

603 if (isAssumed(FLAT_SCRATCH_INIT) && needFlatScratchInit(A))

604 removeAssumedBits(FLAT_SCRATCH_INIT);

605

606 return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED

607 : ChangeStatus::UNCHANGED;

608 }

609

612 LLVMContext &Ctx = getAssociatedFunction()->getContext();

613

615 if (isKnown(Attr.first))

616 AttrList.push_back(Attribute::get(Ctx, Attr.second));

617 }

618

619 return A.manifestAttrs(getIRPosition(), AttrList,

620 true);

621 }

622

623 const std::string getAsStr(Attributor *) const override {

624 std::string Str;

625 raw_string_ostream OS(Str);

626 OS << "AMDInfo[";

628 if (isAssumed(Attr.first))

629 OS << ' ' << Attr.second;

630 OS << " ]";

631 return OS.str();

632 }

633

634

635 void trackStatistics() const override {}

636

637private:

638 bool checkForQueuePtr(Attributor &A) {

639 Function *F = getAssociatedFunction();

641

642 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

643

644 bool NeedsQueuePtr = false;

645

646 auto CheckAddrSpaceCasts = [&](Instruction &I) {

647 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();

649 NeedsQueuePtr = true;

650 return false;

651 }

652 return true;

653 };

654

655 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

656

657

658

659

660

661 if (!HasApertureRegs) {

662 bool UsedAssumedInformation = false;

663 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,

664 {Instruction::AddrSpaceCast},

665 UsedAssumedInformation);

666 }

667

668

669 if (NeedsQueuePtr)

670 return true;

671

672 if (!IsNonEntryFunc && HasApertureRegs)

673 return false;

674

675 for (BasicBlock &BB : *F) {

676 for (Instruction &I : BB) {

677 for (const Use &U : I.operands()) {

679 if (InfoCache.needsQueuePtr(C, *F))

680 return true;

681 }

682 }

683 }

684 }

685

686 return false;

687 }

688

689 bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {

691 AA::RangeTy Range(Pos, 8);

692 return funcRetrievesImplicitKernelArg(A, Range);

693 }

694

695 bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {

697 AA::RangeTy Range(Pos, 8);

698 return funcRetrievesImplicitKernelArg(A, Range);

699 }

700

701 bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {

703 AA::RangeTy Range(Pos, 8);

704 return funcRetrievesImplicitKernelArg(A, Range);

705 }

706

707 bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {

709 AA::RangeTy Range(Pos, 8);

710 return funcRetrievesImplicitKernelArg(A, Range);

711 }

712

713 bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {

714 if (COV < 5)

715 return false;

717 return funcRetrievesImplicitKernelArg(A, Range);

718 }

719

720 bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {

721 if (COV < 5)

722 return false;

724 return funcRetrievesImplicitKernelArg(A, Range);

725 }

726

727 bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {

728

729

730

731

732

733

734 auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {

737 return true;

738

739 const auto *PointerInfoAA = A.getAAFor(

741 if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())

742 return false;

743

744 return PointerInfoAA->forallInterferingAccesses(

745 Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {

747 });

748 };

749

750 bool UsedAssumedInformation = false;

751 return A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,

752 UsedAssumedInformation);

753 }

754

755 bool funcRetrievesLDSKernelId(Attributor &A) {

759 };

760 bool UsedAssumedInformation = false;

761 return A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,

762 UsedAssumedInformation);

763 }

764

765

766

767 bool needFlatScratchInit(Attributor &A) {

768 assert(isAssumed(FLAT_SCRATCH_INIT));

769

770

771

772 auto AddrSpaceCastNotFromPrivate = [](Instruction &I) {

775 };

776

777 bool UsedAssumedInformation = false;

778 if (A.checkForAllInstructions(AddrSpaceCastNotFromPrivate, *this,

779 {Instruction::AddrSpaceCast},

780 UsedAssumedInformation))

781 return true;

782

783

784 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

785

786 Function *F = getAssociatedFunction();

788 for (const Use &U : I.operands()) {

790 if (InfoCache.checkConstForAddrSpaceCastFromPrivate(C))

791 return true;

792 }

793 }

794 }

795

796

797

798

799

800 auto CheckForNoFlatScratchInit = [&](Instruction &I) {

803

804

805

806

807

808

809 if (!Callee)

810 return true;

811

812 return Callee->getIntrinsicID() !=

813 Intrinsic::amdgcn_addrspacecast_nonnull;

814 };

815

816 UsedAssumedInformation = false;

817

818

819

820 return A.checkForAllCallLikeInstructions(CheckForNoFlatScratchInit, *this,

821 UsedAssumedInformation);

822 }

823};

824

825AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

826 Attributor &A) {

828 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);

829 llvm_unreachable("AAAMDAttributes is only valid for function position");

830}

831

832

833struct AAAMDSizeRangeAttribute

834 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {

835 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;

836

837 StringRef AttrName;

838

839 AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,

840 StringRef AttrName)

841 : Base(IRP, 32), AttrName(AttrName) {}

842

843

844 void trackStatistics() const override {}

845

846 template ChangeStatus updateImplImpl(Attributor &A) {

847 ChangeStatus Change = ChangeStatus::UNCHANGED;

848

849 auto CheckCallSite = [&](AbstractCallSite CS) {

850 Function *Caller = CS.getInstruction()->getFunction();

852 << "->" << getAssociatedFunction()->getName() << '\n');

853

854 const auto *CallerInfo = A.getAAFor(

856 if (!CallerInfo || !CallerInfo->isValidState())

857 return false;

858

859 Change |=

861

862 return true;

863 };

864

865 bool AllCallSitesKnown = true;

866 if (A.checkForAllCallSites(CheckCallSite, *this,

867 true,

868 AllCallSitesKnown))

869 return indicatePessimisticFixpoint();

870

871 return Change;

872 }

873

874

875

877 emitAttributeIfNotDefaultAfterClamp(Attributor &A,

878 std::pair<unsigned, unsigned> Default) {

880 unsigned Lower = getAssumed().getLower().getZExtValue();

881 unsigned Upper = getAssumed().getUpper().getZExtValue();

882

883

886 if (Upper > Max + 1)

888

889

891 return ChangeStatus::UNCHANGED;

892

893 Function *F = getAssociatedFunction();

894 LLVMContext &Ctx = F->getContext();

895 SmallString<10> Buffer;

896 raw_svector_ostream OS(Buffer);

898 return A.manifestAttrs(getIRPosition(),

899 {Attribute::get(Ctx, AttrName, OS.str())},

900 true);

901 }

902

903 const std::string getAsStr(Attributor *) const override {

904 std::string Str;

905 raw_string_ostream OS(Str);

907 OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;

908 OS << ']';

909 return OS.str();

910 }

911};

912

913

914struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {

915 AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)

916 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}

917

918 void initialize(Attributor &A) override {

919 Function *F = getAssociatedFunction();

920 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

921

922 bool HasAttr = false;

923 auto Range = InfoCache.getDefaultFlatWorkGroupSize(*F);

924 auto MaxRange = InfoCache.getMaximumFlatWorkGroupRange(*F);

925

926 if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F)) {

927

928

929

930 if (*Attr != MaxRange) {

932 HasAttr = true;

933 }

934 }

935

936

937

938 if (Range == MaxRange)

939 return;

940

942 ConstantRange CR(APInt(32, Min), APInt(32, Max + 1));

943 IntegerRangeState IRS(CR);

945

947 indicateOptimisticFixpoint();

948 }

949

950 ChangeStatus updateImpl(Attributor &A) override {

951 return updateImplImpl(A);

952 }

953

954

955 static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,

956 Attributor &A);

957

959 Function *F = getAssociatedFunction();

960 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

961 return emitAttributeIfNotDefaultAfterClamp(

962 A, InfoCache.getMaximumFlatWorkGroupRange(*F));

963 }

964

965

966 StringRef getName() const override { return "AAAMDFlatWorkGroupSize"; }

967

968

969 const char *getIdAddr() const override { return &ID; }

970

971

972

973 static bool classof(const AbstractAttribute *AA) {

975 }

976

977

978 static const char ID;

979};

980

981const char AAAMDFlatWorkGroupSize::ID = 0;

982

983AAAMDFlatWorkGroupSize &

984AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,

985 Attributor &A) {

987 return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);

989 "AAAMDFlatWorkGroupSize is only valid for function position");

990}

991

992struct TupleDecIntegerRangeState : public AbstractState {

993 DecIntegerState<uint32_t> X, Y, Z;

994

995 bool isValidState() const override {

996 return X.isValidState() && Y.isValidState() && Z.isValidState();

997 }

998

999 bool isAtFixpoint() const override {

1000 return X.isAtFixpoint() && Y.isAtFixpoint() && Z.isAtFixpoint();

1001 }

1002

1003 ChangeStatus indicateOptimisticFixpoint() override {

1004 return X.indicateOptimisticFixpoint() | Y.indicateOptimisticFixpoint() |

1005 Z.indicateOptimisticFixpoint();

1006 }

1007

1008 ChangeStatus indicatePessimisticFixpoint() override {

1009 return X.indicatePessimisticFixpoint() | Y.indicatePessimisticFixpoint() |

1010 Z.indicatePessimisticFixpoint();

1011 }

1012

1013 TupleDecIntegerRangeState operator^=(const TupleDecIntegerRangeState &Other) {

1017 return *this;

1018 }

1019

1020 bool operator==(const TupleDecIntegerRangeState &Other) const {

1022 }

1023

1024 TupleDecIntegerRangeState &getAssumed() { return *this; }

1025 const TupleDecIntegerRangeState &getAssumed() const { return *this; }

1026};

1027

1028using AAAMDMaxNumWorkgroupsState =

1029 StateWrapper<TupleDecIntegerRangeState, AbstractAttribute, uint32_t>;

1030

1031

1032struct AAAMDMaxNumWorkgroups

1033 : public StateWrapper<TupleDecIntegerRangeState, AbstractAttribute> {

1034 using Base = StateWrapper<TupleDecIntegerRangeState, AbstractAttribute>;

1035

1036 AAAMDMaxNumWorkgroups(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

1037

1038 void initialize(Attributor &A) override {

1039 Function *F = getAssociatedFunction();

1040 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

1041

1042 SmallVector MaxNumWorkgroups = InfoCache.getMaxNumWorkGroups(*F);

1043

1044 X.takeKnownMinimum(MaxNumWorkgroups[0]);

1045 Y.takeKnownMinimum(MaxNumWorkgroups[1]);

1046 Z.takeKnownMinimum(MaxNumWorkgroups[2]);

1047

1049 indicatePessimisticFixpoint();

1050 }

1051

1052 ChangeStatus updateImpl(Attributor &A) override {

1053 ChangeStatus Change = ChangeStatus::UNCHANGED;

1054

1055 auto CheckCallSite = [&](AbstractCallSite CS) {

1056 Function *Caller = CS.getInstruction()->getFunction();

1058 << "->" << getAssociatedFunction()->getName() << '\n');

1059

1060 const auto *CallerInfo = A.getAAFor(

1062 if (!CallerInfo || !CallerInfo->isValidState())

1063 return false;

1064

1065 Change |=

1067 return true;

1068 };

1069

1070 bool AllCallSitesKnown = true;

1071 if (A.checkForAllCallSites(CheckCallSite, *this,

1072 true,

1073 AllCallSitesKnown))

1074 return indicatePessimisticFixpoint();

1075

1076 return Change;

1077 }

1078

1079

1080 static AAAMDMaxNumWorkgroups &createForPosition(const IRPosition &IRP,

1081 Attributor &A);

1082

1084 Function *F = getAssociatedFunction();

1085 LLVMContext &Ctx = F->getContext();

1086 SmallString<32> Buffer;

1087 raw_svector_ostream OS(Buffer);

1088 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed();

1089

1090

1091

1092 return A.manifestAttrs(

1093 getIRPosition(),

1094 {Attribute::get(Ctx, "amdgpu-max-num-workgroups", OS.str())},

1095 true);

1096 }

1097

1098 StringRef getName() const override { return "AAAMDMaxNumWorkgroups"; }

1099

1100 const std::string getAsStr(Attributor *) const override {

1101 std::string Buffer = "AAAMDMaxNumWorkgroupsState[";

1102 raw_string_ostream OS(Buffer);

1103 OS << X.getAssumed() << ',' << Y.getAssumed() << ',' << Z.getAssumed()

1104 << ']';

1105 return OS.str();

1106 }

1107

1108 const char *getIdAddr() const override { return &ID; }

1109

1110

1111

1112 static bool classof(const AbstractAttribute *AA) {

1114 }

1115

1116 void trackStatistics() const override {}

1117

1118

1119 static const char ID;

1120};

1121

1122const char AAAMDMaxNumWorkgroups::ID = 0;

1123

1124AAAMDMaxNumWorkgroups &

1125AAAMDMaxNumWorkgroups::createForPosition(const IRPosition &IRP, Attributor &A) {

1127 return *new (A.Allocator) AAAMDMaxNumWorkgroups(IRP, A);

1128 llvm_unreachable("AAAMDMaxNumWorkgroups is only valid for function position");

1129}

1130

1131

1132struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {

1133 AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)

1134 : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}

1135

1136 void initialize(Attributor &A) override {

1137 Function *F = getAssociatedFunction();

1138 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

1139

1140

1141 if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {

1142 std::pair<unsigned, unsigned> MaxWavesPerEURange{

1143 1U, InfoCache.getMaxWavesPerEU(*F)};

1144 if (*Attr != MaxWavesPerEURange) {

1145 auto [Min, Max] = *Attr;

1146 ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));

1147 IntegerRangeState RangeState(Range);

1148 this->getState() = RangeState;

1149 indicateOptimisticFixpoint();

1150 return;

1151 }

1152 }

1153

1155 indicatePessimisticFixpoint();

1156 }

1157

1158 ChangeStatus updateImpl(Attributor &A) override {

1159 ChangeStatus Change = ChangeStatus::UNCHANGED;

1160

1161 auto CheckCallSite = [&](AbstractCallSite CS) {

1162 Function *Caller = CS.getInstruction()->getFunction();

1165 << "->" << Func->getName() << '\n');

1166 (void)Func;

1167

1168 const auto *CallerAA = A.getAAFor(

1170 if (!CallerAA || !CallerAA->isValidState())

1171 return false;

1172

1173 ConstantRange Assumed = getAssumed();

1175 CallerAA->getAssumed().getLower().getZExtValue());

1177 CallerAA->getAssumed().getUpper().getZExtValue());

1178 ConstantRange Range(APInt(32, Min), APInt(32, Max));

1179 IntegerRangeState RangeState(Range);

1180 getState() = RangeState;

1181 Change |= getState() == Assumed ? ChangeStatus::UNCHANGED

1182 : ChangeStatus::CHANGED;

1183

1184 return true;

1185 };

1186

1187 bool AllCallSitesKnown = true;

1188 if (A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

1189 return indicatePessimisticFixpoint();

1190

1191 return Change;

1192 }

1193

1194

1195 static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,

1196 Attributor &A);

1197

1199 Function *F = getAssociatedFunction();

1200 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

1201 return emitAttributeIfNotDefaultAfterClamp(

1202 A, {1U, InfoCache.getMaxWavesPerEU(*F)});

1203 }

1204

1205

1206 StringRef getName() const override { return "AAAMDWavesPerEU"; }

1207

1208

1209 const char *getIdAddr() const override { return &ID; }

1210

1211

1212

1213 static bool classof(const AbstractAttribute *AA) {

1215 }

1216

1217

1218 static const char ID;

1219};

1220

1221const char AAAMDWavesPerEU::ID = 0;

1222

1223AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,

1224 Attributor &A) {

1226 return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);

1227 llvm_unreachable("AAAMDWavesPerEU is only valid for function position");

1228}

1229

1230

1231static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,

1232 const CallBase &Call) {

1233 unsigned ArgNo = 0;

1234 unsigned ResNo = 0;

1235 unsigned AGPRDefCount = 0;

1236 unsigned AGPRUseCount = 0;

1237 unsigned MaxPhysReg = 0;

1239

1240

1241 for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {

1242 Type *Ty = nullptr;

1243 switch (CI.Type) {

1247 Ty = STy->getElementType(ResNo);

1248 ++ResNo;

1249 break;

1250 }

1253 break;

1254 }

1256 continue;

1258

1259 break;

1260 }

1261

1262 for (StringRef Code : CI.Codes) {

1263 unsigned RegCount = 0;

1264 if (Code.starts_with("a")) {

1265

1266

1267

1268

1269 RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);

1270 } else {

1271

1273 if (Kind == 'a') {

1274 RegCount = NumRegs;

1275 MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));

1276 }

1277

1278 continue;

1279 }

1280

1282

1283

1284

1285 AGPRDefCount = alignTo(AGPRDefCount, RegCount);

1286

1287 AGPRDefCount += RegCount;

1288 if (CI.isEarlyClobber) {

1289 AGPRUseCount = alignTo(AGPRUseCount, RegCount);

1290 AGPRUseCount += RegCount;

1291 }

1292 } else {

1293 AGPRUseCount = alignTo(AGPRUseCount, RegCount);

1294 AGPRUseCount += RegCount;

1295 }

1296 }

1297 }

1298

1299 unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);

1300

1301

1302

1303

1304 return std::min(MaxVirtReg + MaxPhysReg, 256u);

1305}

1306

1307struct AAAMDGPUMinAGPRAlloc

1308 : public StateWrapper<DecIntegerState<>, AbstractAttribute> {

1309 using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;

1310 AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

1311

1312 static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,

1313 Attributor &A) {

1315 return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);

1317 "AAAMDGPUMinAGPRAlloc is only valid for function position");

1318 }

1319

1320 void initialize(Attributor &A) override {

1321 Function *F = getAssociatedFunction();

1322 auto [MinNumAGPR, MaxNumAGPR] =

1324 true);

1325 if (MinNumAGPR == 0)

1326 indicateOptimisticFixpoint();

1327 }

1328

1329 const std::string getAsStr(Attributor *A) const override {

1330 std::string Str = "amdgpu-agpr-alloc=";

1331 raw_string_ostream OS(Str);

1332 OS << getAssumed();

1333 return OS.str();

1334 }

1335

1336 void trackStatistics() const override {}

1337

1338 ChangeStatus updateImpl(Attributor &A) override {

1339 DecIntegerState<> Maximum;

1340

1341

1342

1343

1344 auto CheckForMinAGPRAllocs = [&](Instruction &I) {

1346 const Value *CalleeOp = CB.getCalledOperand();

1347

1349

1350

1351 unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);

1353 return true;

1354 }

1355

1356 switch (CB.getIntrinsicID()) {

1358 break;

1359 case Intrinsic::write_register:

1360 case Intrinsic::read_register:

1361 case Intrinsic::read_volatile_register: {

1365 ->getOperand(0));

1366 auto [Kind, RegIdx, NumRegs] =

1368 if (Kind == 'a')

1370

1371 return true;

1372 }

1373 default:

1374

1375

1376

1377

1378

1379 return CB.hasFnAttr(Attribute::NoCallback);

1380 }

1381

1382

1383 auto *CBEdges = A.getAAFor(

1385 if (!CBEdges || CBEdges->hasUnknownCallee()) {

1387 return false;

1388 }

1389

1390 for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {

1391 const auto *CalleeInfo = A.getAAFor(

1393 if (!CalleeInfo || !CalleeInfo->isValidState()) {

1395 return false;

1396 }

1397

1399 }

1400

1401 return true;

1402 };

1403

1404 bool UsedAssumedInformation = false;

1405 if (A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,

1406 UsedAssumedInformation))

1407 return indicatePessimisticFixpoint();

1408

1410 }

1411

1413 LLVMContext &Ctx = getAssociatedFunction()->getContext();

1414 SmallString<4> Buffer;

1415 raw_svector_ostream OS(Buffer);

1416 OS << getAssumed();

1417

1418 return A.manifestAttrs(

1419 getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});

1420 }

1421

1422 StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }

1423 const char *getIdAddr() const override { return &ID; }

1424

1425

1426

1427 static bool classof(const AbstractAttribute *AA) {

1429 }

1430

1431 static const char ID;

1432};

1433

1434const char AAAMDGPUMinAGPRAlloc::ID = 0;

1435

1436

1437

1438struct AAAMDGPUClusterDims

1439 : public StateWrapper<BooleanState, AbstractAttribute> {

1440 using Base = StateWrapper<BooleanState, AbstractAttribute>;

1441 AAAMDGPUClusterDims(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

1442

1443

1444 static AAAMDGPUClusterDims &createForPosition(const IRPosition &IRP,

1445 Attributor &A);

1446

1447

1448 StringRef getName() const override { return "AAAMDGPUClusterDims"; }

1449

1450

1451 const char *getIdAddr() const override { return &ID; }

1452

1453

1454

1455 static bool classof(const AbstractAttribute *AA) {

1457 }

1458

1459 virtual const AMDGPU::ClusterDimsAttr &getClusterDims() const = 0;

1460

1461

1462 static const char ID;

1463};

1464

1465const char AAAMDGPUClusterDims::ID = 0;

1466

1467struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {

1468 AAAMDGPUClusterDimsFunction(const IRPosition &IRP, Attributor &A)

1469 : AAAMDGPUClusterDims(IRP, A) {}

1470

1471 void initialize(Attributor &A) override {

1472 Function *F = getAssociatedFunction();

1473 assert(F && "empty associated function");

1474

1476

1477

1480 indicatePessimisticFixpoint();

1481 else

1482 indicateOptimisticFixpoint();

1483 }

1484 }

1485

1486 const std::string getAsStr(Attributor *A) const override {

1487 if (!getAssumed() || Attr.isUnknown())

1488 return "unknown";

1490 return "no";

1492 return "variable";

1494 }

1495

1496 void trackStatistics() const override {}

1497

1498 ChangeStatus updateImpl(Attributor &A) override {

1499 auto OldState = Attr;

1500

1501 auto CheckCallSite = [&](AbstractCallSite CS) {

1502 const auto *CallerAA = A.getAAFor(

1504 DepClassTy::REQUIRED);

1505 if (!CallerAA || !CallerAA->isValidState())

1506 return false;

1507

1508 return merge(CallerAA->getClusterDims());

1509 };

1510

1511 bool UsedAssumedInformation = false;

1512 if (A.checkForAllCallSites(CheckCallSite, *this,

1513 true,

1514 UsedAssumedInformation))

1515 return indicatePessimisticFixpoint();

1516

1517 return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;

1518 }

1519

1522 return ChangeStatus::UNCHANGED;

1523 return A.manifestAttrs(

1524 getIRPosition(),

1525 {Attribute::get(getAssociatedFunction()->getContext(), AttrName,

1527 true);

1528 }

1529

1530 const AMDGPU::ClusterDimsAttr &getClusterDims() const override {

1531 return Attr;

1532 }

1533

1534private:

1535 bool merge(const AMDGPU::ClusterDimsAttr &Other) {

1536

1537

1539 return true;

1540

1541

1542

1545 return true;

1546 }

1547

1548

1549

1550 if (Other.isUnknown())

1551 return true;

1552

1553

1554

1555

1556 if (Attr == Other)

1557 return true;

1558

1559

1560

1561

1562

1563

1566 return false;

1567 }

1568

1569

1570

1572 return true;

1573 }

1574

1575 AMDGPU::ClusterDimsAttr Attr;

1576

1577 static constexpr char AttrName[] = "amdgpu-cluster-dims";

1578};

1579

1580AAAMDGPUClusterDims &

1581AAAMDGPUClusterDims::createForPosition(const IRPosition &IRP, Attributor &A) {

1583 return *new (A.Allocator) AAAMDGPUClusterDimsFunction(IRP, A);

1584 llvm_unreachable("AAAMDGPUClusterDims is only valid for function position");

1585}

1586

1587static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,

1588 AMDGPUAttributorOptions Options,

1590 SetVector<Function *> Functions;

1591 for (Function &F : M) {

1592 if (F.isIntrinsic())

1594 }

1595

1596 CallGraphUpdater CGUpdater;

1598 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);

1599 DenseSet<const char *> Allowed(

1600 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,

1602 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,

1606 &AAAMDGPUClusterDims::ID, &AAAlign::ID});

1607

1608 AttributorConfig AC(CGUpdater);

1609 AC.IsClosedWorldModule = Options.IsClosedWorld;

1611 AC.IsModulePass = true;

1612 AC.DefaultInitializeLiveInternals = false;

1613 AC.IndirectCalleeSpecializationCallback =

1614 [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,

1618 };

1619 AC.IPOAmendableCB = [](const Function &F) {

1620 return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;

1621 };

1622

1623 Attributor A(Functions, InfoCache, AC);

1624

1626 StringRef LTOPhaseStr = to_string(LTOPhase);

1627 dbgs() << "[AMDGPUAttributor] Running at phase " << LTOPhaseStr << '\n'

1628 << "[AMDGPUAttributor] Module " << M.getName() << " is "

1629 << (AC.IsClosedWorldModule ? "" : "not ")

1630 << "assumed to be a closed world.\n";

1631 });

1632

1633 for (auto *F : Functions) {

1637 CallingConv::ID CC = F->getCallingConv();

1641 }

1642

1643 const GCNSubtarget &ST = TM.getSubtarget(*F);

1644 if (F->isDeclaration() && ST.hasClusters())

1646

1647 if (ST.hasGFX90AInsts())

1649

1651 Value *Ptr = nullptr;

1653 Ptr = LI->getPointerOperand();

1655 Ptr = SI->getPointerOperand();

1657 Ptr = RMW->getPointerOperand();

1659 Ptr = CmpX->getPointerOperand();

1660

1661 if (Ptr) {

1665 if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)

1667 }

1668 }

1669 }

1670 }

1671

1672 return A.run() == ChangeStatus::CHANGED;

1673}

1674}

1675

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static bool isDSAddress(const Constant *C)

Definition AMDGPUAttributor.cpp:127

static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]

Definition AMDGPUAttributor.cpp:49

static cl::opt< unsigned > IndirectCallSpecializationThreshold("amdgpu-indirect-call-specialization-threshold", cl::desc("A threshold controls whether an indirect call will be specialized"), cl::init(3))

static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)

Definition AMDGPUAttributor.cpp:59

static bool hasSanitizerAttributes(const Function &F)

Returns true if sanitizer attributes are present on a function.

Definition AMDGPUAttributor.cpp:136

ImplicitArgumentMask

Definition AMDGPUAttributor.cpp:40

@ UNKNOWN_INTRINSIC

Definition AMDGPUAttributor.cpp:41

@ NOT_IMPLICIT_INPUT

Definition AMDGPUAttributor.cpp:83

@ ALL_ARGUMENT_MASK

Definition AMDGPUAttributor.cpp:82

ImplicitArgumentPositions

Definition AMDGPUAttributor.cpp:33

@ LAST_ARG_POS

Definition AMDGPUAttributor.cpp:74

static bool castRequiresQueuePtr(unsigned SrcAS)

Definition AMDGPUAttributor.cpp:123

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Expand Atomic instructions

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)

AMD GCN specific subclass of TargetSubtarget.

static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)

Machine Check Debug Module

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

FunctionAnalysisManager FAM

static StringRef getName(Value *V)

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)

Initialize the set of available library functions based on the specified target triple.

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

Definition AMDGPUAttributor.cpp:1676

static ClusterDimsAttr get(const Function &F)

std::string to_string() const

bool isVariableDims() const

uint64_t getZExtValue() const

Get zero extended value.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Value * getArgOperand(unsigned i) const

LLVM_ABI Intrinsic::ID getIntrinsicID() const

Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...

const APInt & getLower() const

Return the lower value for this range.

const APInt & getUpper() const

Return the upper value for this range.

This is an important base class in LLVM.

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

unsigned getAddressSpace() const

Module * getParent()

Get the module that this global value is contained inside of...

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

A Module instance is used to store all the information related to an LLVM module.

const DataLayout & getDataLayout() const

Get the data layout for the module's target platform.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

bool insert(const value_type &X)

Insert a new element into the SetVector.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

void push_back(const T &Elt)

std::string str() const

str - Get the contents as an std::string.

const STC & getSubtarget(const Function &F) const

This method returns a pointer to the specified type of TargetSubtargetInfo.

LLVM_ABI bool isDroppable() const

A droppable user is a user for which uses can be dropped without affecting correctness and should be ...

Type * getType() const

All values are typed, get the type of this value.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ PRIVATE_ADDRESS

Address space for private memory.

unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)

unsigned getAMDHSACodeObjectVersion(const Module &M)

unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)

std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)

Returns a valid charcode or 0 in the first entry if this is a valid physical register name.

LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)

std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)

Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.

unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)

unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)

std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)

LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)

unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)

E & operator^=(E &LHS, E RHS)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ CE

Windows NT (Windows on ARM)

initializer< Ty > init(const Ty &Val)

NodeAddr< CodeNode * > Code

NodeAddr< FuncNode * > Func

Context & getContext() const

friend class Instruction

Iterator for Instructions in a `BasicBlock.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy

Provide the FunctionAnalysisManager to Module proxy.

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

ThinOrFullLTOPhase

This enumerates the LLVM full LTO or ThinLTO optimization phases.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

const char * to_string(ThinOrFullLTOPhase Phase)

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)

Helper function to clamp a state S of type StateType with the information in R and indicate/return if...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

BumpPtrAllocatorImpl<> BumpPtrAllocator

The standard BumpPtrAllocator which just uses the default template parameters.

AnalysisManager< Module > ModuleAnalysisManager

Convenience typedef for the Module analysis manager.

static LLVM_ABI const char ID

Unique ID (due to the unique address)

static LLVM_ABI const char ID

Unique ID (due to the unique address)

virtual const SetVector< Function * > & getOptimisticEdges() const =0

Get the optimistic edges.

static LLVM_ABI const char ID

Unique ID (due to the unique address)

virtual bool hasNonAsmUnknownCallee() const =0

Is there any call with a unknown callee, excluding any inline asm.

static LLVM_ABI const char ID

Unique ID (due to the unique address)

static LLVM_ABI const char ID

Unique ID (due to the unique address)

Instruction * getRemoteInst() const

Return the actual instruction that causes the access.

static LLVM_ABI const char ID

Unique ID (due to the unique address)

static LLVM_ABI const char ID

Unique ID (due to the unique address)

static LLVM_ABI const char ID

Unique ID (due to the unique address)

static LLVM_ABI const char ID

Unique ID (due to the unique address)

virtual const char * getIdAddr() const =0

This function should return the address of the ID of the AbstractAttribute.

Wrapper for FunctionAnalysisManager.

The fixpoint analysis framework that orchestrates the attribute deduction.

DecIntegerState & takeAssumedMaximum(base_t Value)

Take maximum of assumed and Value.

Helper to describe and deal with positions in the LLVM-IR.

static const IRPosition callsite_returned(const CallBase &CB)

Create a position describing the returned value of CB.

static const IRPosition value(const Value &V, const CallBaseContext *CBContext=nullptr)

Create a position describing the value of V.

@ IRP_FUNCTION

An attribute for a function (scope).

static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)

Create a position describing the function scope of F.

Kind getPositionKind() const

Return the associated position kind.

static const IRPosition callsite_function(const CallBase &CB)

Create a position describing the function scope of CB.

Data structure to hold cached (LLVM-IR) information.

bool isValidState() const override

See AbstractState::isValidState() NOTE: For now we simply pretend that the worst possible state is in...

ChangeStatus indicatePessimisticFixpoint() override

See AbstractState::indicatePessimisticFixpoint(...)

Helper to tie a abstract state implementation to an abstract attribute.