LLVM: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

71#include "llvm/IR/IntrinsicsAMDGPU.h"

102#include

103

104using namespace llvm;

106

107namespace {

108class SGPRRegisterRegAlloc : public RegisterRegAllocBase {

109public:

112};

113

114class VGPRRegisterRegAlloc : public RegisterRegAllocBase {

115public:

118};

119

120class WWMRegisterRegAlloc : public RegisterRegAllocBase {

121public:

124};

125

131}

132

137 return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);

138}

139

146 return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC) &&

148}

149

150

152

153

154

155static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;

156static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;

157static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;

158

159static SGPRRegisterRegAlloc

160defaultSGPRRegAlloc("default",

161 "pick SGPR register allocator based on -O option",

163

164static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,

167 cl::desc("Register allocator to use for SGPRs"));

168

169static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,

172 cl::desc("Register allocator to use for VGPRs"));

173

174static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,

176 WWMRegAlloc("wwm-regalloc", cl::Hidden,

178 cl::desc("Register allocator to use for WWM registers"));

179

180static void initializeDefaultSGPRRegisterAllocatorOnce() {

182

183 if (!Ctor) {

184 Ctor = SGPRRegAlloc;

185 SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);

186 }

187}

188

189static void initializeDefaultVGPRRegisterAllocatorOnce() {

191

192 if (!Ctor) {

193 Ctor = VGPRRegAlloc;

194 VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);

195 }

196}

197

198static void initializeDefaultWWMRegisterAllocatorOnce() {

200

201 if (!Ctor) {

202 Ctor = WWMRegAlloc;

203 WWMRegisterRegAlloc::setDefault(WWMRegAlloc);

204 }

205}

206

207static FunctionPass *createBasicSGPRRegisterAllocator() {

209}

210

211static FunctionPass *createGreedySGPRRegisterAllocator() {

213}

214

215static FunctionPass *createFastSGPRRegisterAllocator() {

217}

218

219static FunctionPass *createBasicVGPRRegisterAllocator() {

221}

222

223static FunctionPass *createGreedyVGPRRegisterAllocator() {

225}

226

227static FunctionPass *createFastVGPRRegisterAllocator() {

229}

230

231static FunctionPass *createBasicWWMRegisterAllocator() {

233}

234

235static FunctionPass *createGreedyWWMRegisterAllocator() {

237}

238

239static FunctionPass *createFastWWMRegisterAllocator() {

241}

242

243static SGPRRegisterRegAlloc basicRegAllocSGPR(

244 "basic", "basic register allocator", createBasicSGPRRegisterAllocator);

245static SGPRRegisterRegAlloc greedyRegAllocSGPR(

246 "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);

247

248static SGPRRegisterRegAlloc fastRegAllocSGPR(

249 "fast", "fast register allocator", createFastSGPRRegisterAllocator);

250

251

252static VGPRRegisterRegAlloc basicRegAllocVGPR(

253 "basic", "basic register allocator", createBasicVGPRRegisterAllocator);

254static VGPRRegisterRegAlloc greedyRegAllocVGPR(

255 "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);

256

257static VGPRRegisterRegAlloc fastRegAllocVGPR(

258 "fast", "fast register allocator", createFastVGPRRegisterAllocator);

259static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",

260 "basic register allocator",

261 createBasicWWMRegisterAllocator);

262static WWMRegisterRegAlloc

263 greedyRegAllocWWMReg("greedy", "greedy register allocator",

264 createGreedyWWMRegisterAllocator);

265static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",

266 createFastWWMRegisterAllocator);

267

269 return Phase == ThinOrFullLTOPhase::FullLTOPreLink ||

270 Phase == ThinOrFullLTOPhase::ThinLTOPreLink;

271}

272}

273

276 cl::desc("Run early if-conversion"),

278

281 cl::desc("Run pre-RA exec mask optimizations"),

283

286 cl::desc("Lower GPU ctor / dtors to globals on the device."),

288

289

291 "amdgpu-load-store-vectorizer",

292 cl::desc("Enable load store vectorizer"),

295

296

298 "amdgpu-scalarize-global-loads",

299 cl::desc("Enable global load scalarization"),

302

303

305 "amdgpu-internalize-symbols",

306 cl::desc("Enable elimination of non-kernel functions and unused globals"),

309

310

312 "amdgpu-early-inline-all",

313 cl::desc("Inline all functions early"),

316

318 "amdgpu-enable-remove-incompatible-functions", cl::Hidden,

319 cl::desc("Enable removal of functions when they"

320 "use features not supported by the target GPU"),

322

324 "amdgpu-sdwa-peephole",

325 cl::desc("Enable SDWA peepholer"),

327

329 "amdgpu-dpp-combine",

330 cl::desc("Enable DPP combiner"),

332

333

335 cl::desc("Enable AMDGPU Alias Analysis"),

337

338

340 "amdgpu-simplify-libcall",

341 cl::desc("Enable amdgpu library simplifications"),

344

346 "amdgpu-ir-lower-kernel-arguments",

347 cl::desc("Lower kernel argument loads in IR pass"),

350

352 "amdgpu-reassign-regs",

353 cl::desc("Enable register reassign optimizations on gfx10+"),

356

358 "amdgpu-opt-vgpr-liverange",

359 cl::desc("Enable VGPR liverange optimizations for if-else structure"),

361

363 "amdgpu-atomic-optimizer-strategy",

364 cl::desc("Select DPP or Iterative strategy for scan"),

365 cl::init(ScanOptions::Iterative),

367 clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"),

368 clEnumValN(ScanOptions::Iterative, "Iterative",

369 "Use Iterative approach for scan"),

370 clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));

371

372

374 "amdgpu-mode-register",

375 cl::desc("Enable mode register pass"),

378

379

382 cl::desc("Enable s_delay_alu insertion"),

384

385

388 cl::desc("Enable VOPD, dual issue of VALU in wave32"),

390

391

395 cl::desc("Enable machine DCE inside regalloc"));

396

398 cl::desc("Adjust wave priority"),

400

402 "amdgpu-scalar-ir-passes",

403 cl::desc("Enable scalar IR passes"),

406

409 cl::desc("Enable lowering of lds to global memory pass "

410 "and asan instrument resulting IR."),

412

414 "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),

417

419 "amdgpu-enable-pre-ra-optimizations",

420 cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),

422

424 "amdgpu-enable-promote-kernel-arguments",

425 cl::desc("Enable promotion of flat kernel pointer arguments to global"),

427

429 "amdgpu-enable-image-intrinsic-optimizer",

430 cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),

432

435 cl::desc("Enable loop data prefetch on AMDGPU"),

437

440 cl::desc("Select custom AMDGPU scheduling strategy."),

442

444 "amdgpu-enable-rewrite-partial-reg-uses",

445 cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),

447

449 "amdgpu-enable-hipstdpar",

450 cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),

452

455 cl::desc("Enable AMDGPUAttributorPass"),

457

459 "new-reg-bank-select",

460 cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of "

461 "regbankselect"),

463

465 "amdgpu-link-time-closed-world",

466 cl::desc("Whether has closed-world assumption at link time"),

468

470

473

551}

552

553static std::unique_ptr createTLOF(const Triple &TT) {

554 return std::make_unique();

555}

556

559}

560

567 if (ST.shouldClusterStores())

572 return DAG;

573}

574

580 return DAG;

581}

582

587 C, std::make_unique(C));

589 if (ST.shouldClusterStores())

592 return DAG;

593}

594

601 if (ST.shouldClusterStores())

603 return DAG;

604}

605

609}

610

616 if (ST.shouldClusterStores())

619 return DAG;

620}

621

625

628 "Run GCN scheduler to maximize occupancy",

630

634

636 "gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause",

638

640 "gcn-iterative-max-occupancy-experimental",

641 "Run GCN scheduler to maximize occupancy (experimental)",

643

645 "gcn-iterative-minreg",

646 "Run GCN iterative scheduler for minimal register usage (experimental)",

648

650 "gcn-iterative-ilp",

651 "Run GCN iterative scheduler for ILP scheduling (experimental)",

653

656

657 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"

658 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";

659 }

660

661

662

663

664

665

666

667 return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"

668 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"

669 "v32:32-v48:64-v96:"

670 "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"

671 "G1-ni:7:8:9";

672}

673

676 if (!GPU.empty())

677 return GPU;

678

679

681 return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";

682

683 return "r600";

684}

685

687

688

690}

691

695 std::optionalReloc::Model RM,

696 std::optionalCodeModel::Model CM,

702 TLOF(createTLOF(getTargetTriple())) {

709 }

710}

711

714

716

718 Attribute GPUAttr = F.getFnAttribute("target-cpu");

720}

721

723 Attribute FSAttr = F.getFnAttribute("target-features");

724

727}

728

729

731 if (const Function *F = dyn_cast(&GV))

732 return F->isDeclaration() || F->getName().starts_with("__asan_") ||

733 F->getName().starts_with("__sanitizer_") ||

735

738}

739

742}

743

746 if (Params.empty())

754 if (Result)

755 return *Result;

757}

758

762 while (!Params.empty()) {

764 std::tie(ParamName, Params) = Params.split(';');

765 if (ParamName == "closed-world") {

766 Result.IsClosedWorld = true;

767 } else {

768 return make_error(

769 formatv("invalid AMDGPUAttributor pass parameter '{0}' ", ParamName)

770 .str(),

772 }

773 }

774 return Result;

775}

776

778

779#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"

781

786 });

787

792

794 return;

795

797

798

802 }

803

806 });

807

811 return;

812

816 });

817

821 return;

822

824

825

826

827

831

832

833

835

836

837

839

841

842

843

845 }

846

848 });

849

850

857 }

858 });

859

862

863

864

870

874 }

880 }

881 }

882 });

883

886 if (FilterName == "sgpr")

887 return onlyAllocateSGPRs;

888 if (FilterName == "vgpr")

889 return onlyAllocateVGPRs;

890 if (FilterName == "wwm")

891 return onlyAllocateWWMRegs;

892 return nullptr;

893 });

894}

895

900 ? -1

901 : 0;

902}

903

905 unsigned DestAS) const {

908}

909

911 const auto *LD = dyn_cast(V);

912 if (!LD)

914

915

917

918 const auto *Ptr = LD->getPointerOperand();

921

922

923

924

926}

927

928std::pair<const Value *, unsigned>

930 if (auto *II = dyn_cast(V)) {

931 switch (II->getIntrinsicID()) {

932 case Intrinsic::amdgcn_is_shared:

934 case Intrinsic::amdgcn_is_private:

936 default:

937 break;

938 }

939 return std::pair(nullptr, -1);

940 }

941

942

943

946 const_cast<Value *>(V),

948 m_Not(m_IntrinsicIntrinsic::amdgcn\_is\_private(

951

952 return std::pair(nullptr, -1);

953}

954

955unsigned

957 switch (Kind) {

967 }

969}

970

972 Module &M, unsigned NumParts,

973 function_ref<void(std::unique_ptr MPart)> ModuleCallback) {

974

975

976

977

982

987

991 return true;

992}

993

994

995

996

997

1001 std::optionalReloc::Model RM,

1002 std::optionalCodeModel::Model CM,

1005

1010

1012 SubtargetKey.append(FS);

1013

1014 auto &I = SubtargetMap[SubtargetKey];

1015 if (I) {

1016

1017

1018

1020 I = std::make_unique(TargetTriple, GPU, FS, *this);

1021 }

1022

1024

1025 return I.get();

1026}

1027

1031}

1032

1038 return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);

1039}

1040

1041

1042

1043

1044

1047}

1048

1049namespace {

1050

1052public:

1055

1056

1057

1058 setRequiresCodeGenSCCOrder(true);

1060 }

1061

1063 return getTM();

1064 }

1065

1068

1072 C, std::make_unique(C),

1073 true);

1076 if (ST.shouldClusterStores())

1080 if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))

1082 return DAG;

1083 }

1084

1085 bool addPreISel() override;

1086 void addMachineSSAOptimization() override;

1087 bool addILPOpts() override;

1088 bool addInstSelector() override;

1089 bool addIRTranslator() override;

1090 void addPreLegalizeMachineIR() override;

1091 bool addLegalizeMachineIR() override;

1092 void addPreRegBankSelect() override;

1093 bool addRegBankSelect() override;

1094 void addPreGlobalInstructionSelect() override;

1095 bool addGlobalInstructionSelect() override;

1096 void addFastRegAlloc() override;

1097 void addOptimizedRegAlloc() override;

1098

1099 FunctionPass *createSGPRAllocPass(bool Optimized);

1100 FunctionPass *createVGPRAllocPass(bool Optimized);

1101 FunctionPass *createWWMRegAllocPass(bool Optimized);

1102 FunctionPass *createRegAllocPass(bool Optimized) override;

1103

1104 bool addRegAssignAndRewriteFast() override;

1105 bool addRegAssignAndRewriteOptimized() override;

1106

1107 bool addPreRewrite() override;

1108 void addPostRegAlloc() override;

1109 void addPreSched2() override;

1110 void addPreEmitPass() override;

1111};

1112

1113}

1114

1117

1118

1121

1124}

1125

1129 else

1131}

1132

1137

1138

1140

1141

1143

1145

1146

1148}

1149

1152

1156

1157

1161

1165

1168

1169

1170

1172

1173

1176

1177

1180

1181

1183

1184

1187

1188

1191 }

1192

1195

1196

1201 }

1202

1204

1207

1210

1216 AAR.addAAResult(WrapperPass->getResult());

1217 }));

1218 }

1219

1221

1223 }

1224

1225

1226

1229 }

1230

1232

1233

1234

1235

1236

1237

1238

1239

1240

1241

1242

1243

1244

1247}

1248

1251

1252

1254 }

1255

1259

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1273

1274

1275

1276

1278

1279

1281 }

1282

1284

1287

1288

1289

1290

1291

1293}

1294

1298 return false;

1299}

1300

1303 return false;

1304}

1305

1307

1308 return false;

1309}

1310

1316 if (ST.shouldClusterStores())

1318 return DAG;

1319}

1320

1321

1322

1323

1324

1328 if (ST.enableSIScheduler())

1330

1332 C->MF->getFunction().getFnAttribute("amdgpu-sched-strategy");

1336

1337 if (SchedStrategy == "max-ilp")

1339

1340 if (SchedStrategy == "max-memory-clause")

1342

1344}

1345

1346bool GCNPassConfig::addPreISel() {

1348

1351

1354

1355

1356

1361

1364

1365

1366

1368

1370

1373

1374 return false;

1375}

1376

1377void GCNPassConfig::addMachineSSAOptimization() {

1379

1380

1381

1382

1383

1384

1385

1386

1396 }

1399}

1400

1401bool GCNPassConfig::addILPOpts() {

1404

1406 return false;

1407}

1408

1409bool GCNPassConfig::addInstSelector() {

1413 return false;

1414}

1415

1416bool GCNPassConfig::addIRTranslator() {

1418 return false;

1419}

1420

1421void GCNPassConfig::addPreLegalizeMachineIR() {

1425}

1426

1427bool GCNPassConfig::addLegalizeMachineIR() {

1429 return false;

1430}

1431

1432void GCNPassConfig::addPreRegBankSelect() {

1436}

1437

1438bool GCNPassConfig::addRegBankSelect() {

1442 } else {

1444 }

1445 return false;

1446}

1447

1448void GCNPassConfig::addPreGlobalInstructionSelect() {

1451}

1452

1453bool GCNPassConfig::addGlobalInstructionSelect() {

1455 return false;

1456}

1457

1458void GCNPassConfig::addFastRegAlloc() {

1459

1460

1461

1462

1463

1464

1466

1468

1470}

1471

1472void GCNPassConfig::addOptimizedRegAlloc() {

1475

1476

1477

1478

1479

1482

1483

1484

1485

1487

1490

1493

1494

1495

1497

1500

1501

1502

1505

1507}

1508

1509bool GCNPassConfig::addPreRewrite() {

1512 return true;

1513}

1514

1515FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {

1516

1517 llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,

1518 initializeDefaultSGPRRegisterAllocatorOnce);

1519

1522 return Ctor();

1523

1524 if (Optimized)

1526

1528}

1529

1530FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {

1531

1532 llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,

1533 initializeDefaultVGPRRegisterAllocatorOnce);

1534

1537 return Ctor();

1538

1539 if (Optimized)

1540 return createGreedyVGPRRegisterAllocator();

1541

1542 return createFastVGPRRegisterAllocator();

1543}

1544

1545FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {

1546

1547 llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,

1548 initializeDefaultWWMRegisterAllocatorOnce);

1549

1552 return Ctor();

1553

1554 if (Optimized)

1555 return createGreedyWWMRegisterAllocator();

1556

1557 return createFastWWMRegisterAllocator();

1558}

1559

1560FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {

1562}

1563

1565 "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "

1566 "and -vgpr-regalloc";

1567

1568bool GCNPassConfig::addRegAssignAndRewriteFast() {

1569 if (!usingDefaultRegAlloc())

1571

1573

1574 addPass(createSGPRAllocPass(false));

1575

1576

1578

1579

1581

1582

1583 addPass(createWWMRegAllocPass(false));

1584

1587

1588

1589 addPass(createVGPRAllocPass(false));

1590

1591 return true;

1592}

1593

1594bool GCNPassConfig::addRegAssignAndRewriteOptimized() {

1595 if (!usingDefaultRegAlloc())

1597

1599

1600 addPass(createSGPRAllocPass(true));

1601

1602

1603

1604

1605

1607

1608

1609

1610

1612

1613

1615

1616

1618

1619

1620 addPass(createWWMRegAllocPass(true));

1624

1625

1626 addPass(createVGPRAllocPass(true));

1627

1628 addPreRewrite();

1630

1632

1633 return true;

1634}

1635

1636void GCNPassConfig::addPostRegAlloc() {

1641}

1642

1643void GCNPassConfig::addPreSched2() {

1647}

1648

1649void GCNPassConfig::addPreEmitPass() {

1654

1656

1659

1665

1666

1667

1668

1669

1670

1671

1672

1674

1677

1680}

1681

1683 return new GCNPassConfig(*this, PM);

1684}

1685

1690}

1691

1695 return SIMachineFunctionInfo::create(

1697}

1698

1701}

1702

1708}

1709

1718

1720 return true;

1721

1722 if (MFI->Occupancy == 0) {

1723

1724 MFI->Occupancy = ST.getOccupancyWithWorkGroupSizes(MF).second;

1725 }

1726

1730 SourceRange = RegName.SourceRange;

1731 return true;

1732 }

1733 RegVal = TempReg;

1734

1735 return false;

1736 };

1737

1740 return RegName.Value.empty() && parseRegister(RegName, RegVal);

1741 };

1742

1743 if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))

1744 return true;

1745

1746 if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))

1747 return true;

1748

1750 MFI->LongBranchReservedReg))

1751 return true;

1752

1754

1759 "incorrect register class for field", RegName.Value,

1760 {}, {});

1761 SourceRange = RegName.SourceRange;

1762 return true;

1763 };

1764

1765 if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||

1766 parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||

1767 parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))

1768 return true;

1769

1770 if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&

1771 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {

1772 return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);

1773 }

1774

1775 if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&

1776 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {

1777 return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);

1778 }

1779

1780 if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&

1781 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {

1783 }

1784

1787 if (parseRegister(YamlReg, ParsedReg))

1788 return true;

1789

1791 }

1792

1795 }

1798 }

1799

1800 for (const auto &YamlRegStr : YamlMFI.SpillPhysVGPRS) {

1802 if (parseRegister(YamlRegStr, ParsedReg))

1803 return true;

1804 MFI->SpillPhysVGPRs.push_back(ParsedReg);

1805 }

1806

1807 auto parseAndCheckArgument = [&](const std::optionalyaml::SIArgument &A,

1810 unsigned SystemSGPRs) {

1811

1812 if (A)

1813 return false;

1814

1815 if (A->IsRegister) {

1818 SourceRange = A->RegisterName.SourceRange;

1819 return true;

1820 }

1821 if (!RC.contains(Reg))

1822 return diagnoseRegisterClass(A->RegisterName);

1824 } else

1826

1827 if (A->Mask)

1829

1830 MFI->NumUserSGPRs += UserSGPRs;

1831 MFI->NumSystemSGPRs += SystemSGPRs;

1832 return false;

1833 };

1834

1836 (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,

1837 AMDGPU::SGPR_128RegClass,

1839 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,

1840 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,

1841 2, 0) ||

1842 parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,

1843 MFI->ArgInfo.QueuePtr, 2, 0) ||

1844 parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,

1845 AMDGPU::SReg_64RegClass,

1847 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,

1848 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,

1849 2, 0) ||

1850 parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,

1851 AMDGPU::SReg_64RegClass,

1853 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,

1854 AMDGPU::SGPR_32RegClass,

1856 parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,

1857 AMDGPU::SGPR_32RegClass,

1859 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,

1860 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,

1861 0, 1) ||

1862 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,

1863 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,

1864 0, 1) ||

1865 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,

1866 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,

1867 0, 1) ||

1868 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,

1869 AMDGPU::SGPR_32RegClass,

1871 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,

1872 AMDGPU::SGPR_32RegClass,

1874 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,

1875 AMDGPU::SReg_64RegClass,

1877 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,

1878 AMDGPU::SReg_64RegClass,

1880 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,

1881 AMDGPU::VGPR_32RegClass,

1883 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,

1884 AMDGPU::VGPR_32RegClass,

1886 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,

1887 AMDGPU::VGPR_32RegClass,

1889 return true;

1890

1891 if (ST.hasIEEEMode())

1893 if (ST.hasDX10ClampMode())

1895

1896

1903

1910

1913

1914 return false;

1915}

1916

1917

1918

1919

1920

1926

1927

1928

1929 disablePass<StackMapLivenessPass, FuncletLayoutPass,

1931}

1932

1936

1940

1943

1944

1945

1947

1950

1952

1953

1956

1959

1960

1964

1966

1971

1972

1973

1974

1976

1977

1978 }

1979

1981

1982

1983

1984

1985

1986

1987

1988

1989

1990

1991

1992

1993

1996}

1997

1999

2000

2001

2004

2005

2006

2007

2008

2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019

2020

2022

2024

2027

2028

2029

2030

2031

2033}

2034

2036

2039

2042

2044

2045

2046

2047

2052

2054

2056

2057

2058

2059

2061

2063

2066

2067

2068

2070}

2071

2075

2077}

2078

2080 CreateMCStreamer) const {

2081

2082}

2083

2089}

2090

2092 AddMachinePass &addPass) const {

2094

2098 }

2105 }

2108}

2109

2115}

2116

2119 if (Opt.getNumOccurrences())

2120 return Opt;

2122 return false;

2123 return Opt;

2124}

2125

2129 else

2131}

2132

2134 AddIRPass &addPass) const {

2137

2139

2140

2141

2143

2144

2145

2147

2148

2150

2151

2152

2154}

unsigned const MachineRegisterInfo * MRI

aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase

static cl::opt< bool > EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(true))

This is the AMGPU address space based alias analysis pass.

Defines an instruction selector for the AMDGPU target.

Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...

static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))

static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)

static MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)

static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)

static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)

static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))

static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)

static cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)

static cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)

static ScheduleDAGInstrs * createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)

static cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))

static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))

static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)

static cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)

static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)

static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))

static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))

static cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)

static MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)

static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)

static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)

static Reloc::Model getEffectiveRelocModel(std::optional< Reloc::Model > RM)

Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions(StringRef Params)

static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))

static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy(StringRef Params)

static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)

static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)

static bool mustPreserveGV(const GlobalValue &GV)

Predicate for Internalize pass.

static cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()

static cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)

static cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))

static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)

static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)

static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)

static cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))

static cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))

static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)

static MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)

static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)

static const char RegAllocOptNotSupportedMessage[]

static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)

The AMDGPU TargetMachine interface definition for hw codegen targets.

This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.

This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine.

Provides passes to inlining "always_inline" functions.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

This header provides classes for managing passes over SCCs of the call graph.

Analysis containing CSE Info

Provides analysis for continuously CSEing during GISel passes.

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_EXTERNAL_VISIBILITY

This file provides the interface for a simple, fast CSE pass.

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

This file defines the class GCNIterativeScheduler, which uses an iterative approach to find a best sc...

This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...

AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...

This file declares the IRTranslator pass.

This header defines various interfaces for pass management in LLVM.

static std::string computeDataLayout()

This file provides the interface for LLVM's Loop Data Prefetching Pass.

unsigned const TargetRegisterInfo * TRI

uint64_t IntrinsicInst * II

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

CGSCCAnalysisManager CGAM

FunctionAnalysisManager FAM

ModuleAnalysisManager MAM

PassInstrumentationCallbacks PIC

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

static bool isLTOPreLink(ThinOrFullLTOPhase Phase)

The AMDGPU TargetMachine interface definition for hw codegen targets.

This file describes the interface of the MachineFunctionPass responsible for assigning the generic vi...

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SI Machine Scheduler interface.

static FunctionPass * useDefaultRegisterAllocator()

-regalloc=... command line option.

Target-Independent Code Generator Pass Configuration Options pass.

LLVM IR instance of the generic uniformity analysis.

static std::unique_ptr< TargetLoweringObjectFile > createTLOF()

A manager for alias analyses.

void registerFunctionAnalysis()

Register a specific AA result.

void addAAResult(AAResultT &AAResult)

Register a specific AA result.

Legacy wrapper pass to provide the AMDGPUAAResult object.

Analysis pass providing a never-invalidated alias analysis result.

Error addInstSelector(AddMachinePass &) const

void addMachineSSAOptimization(AddMachinePass &) const

void addEarlyCSEOrGVNPass(AddIRPass &) const

void addStraightLineScalarOptimizationPasses(AddIRPass &) const

AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC)

void addIRPasses(AddIRPass &) const

void addPreISel(AddIRPass &addPass) const

void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const

void addCodeGenPrepare(AddIRPass &) const

void addILPOpts(AddMachinePass &) const

void addPostRegAlloc(AddMachinePass &) const

bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOptLevel Level=CodeGenOptLevel::Default) const

Check if a pass is enabled given Opt option.

Lower llvm.global_ctors and llvm.global_dtors to special kernels.

AMDGPUTargetMachine & getAMDGPUTargetMachine() const

std::unique_ptr< CSEConfigBase > getCSEConfig() const override

Returns the CSEConfig object to use for the current optimization level.

ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override

Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...

bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOptLevel Level=CodeGenOptLevel::Default) const

Check if a pass is enabled given Opt option.

bool addPreISel() override

Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...

bool addInstSelector() override

addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...

bool addGCPasses() override

addGCPasses - Add late codegen passes that analyze code for garbage collection.

void addStraightLineScalarOptimizationPasses()

AMDGPUPassConfig(TargetMachine &TM, PassManagerBase &PM)

void addIRPasses() override

Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...

void addEarlyCSEOrGVNPass()

void addCodeGenPrepare() override

Add pass to prepare the LLVM IR for code generation.

Splits the module M into N linkable partitions.

static int64_t getNullPointerValue(unsigned AddrSpace)

Get the integer value of a null pointer in the given address space.

unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override

getAddressSpaceForPseudoSourceKind - Given the kind of memory (e.g.

const TargetSubtargetInfo * getSubtargetImpl() const

void registerDefaultAliasAnalyses(AAManager &) override

Allow the target to register alias analyses with the AAManager for use with the new pass manager.

~AMDGPUTargetMachine() override

std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override

If the specified predicate checks whether a generic pointer falls within a specified address space,...

StringRef getFeatureString(const Function &F) const

static bool EnableFunctionCalls

AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOptLevel OL)

bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override

Returns true if a cast between SrcAS and DestAS is a noop.

void registerPassBuilderCallbacks(PassBuilder &PB) override

Allow the target to modify the pass pipeline.

static bool EnableLowerModuleLDS

StringRef getGPUName(const Function &F) const

unsigned getAssumedAddrSpace(const Value *V) const override

If the specified generic pointer could be assumed as a pointer to a specific address space,...

bool splitModule(Module &M, unsigned NumParts, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback) override

Entry point for module splitting.

Inlines functions marked as "always_inline".

A container for analyses that lazily runs them and caches their results.

StringRef getValueAsString() const

Return the attribute's value as a string.

bool isValid() const

Return true if the attribute is any kind of attribute.

Allocate memory in an ever growing pool, as if by bump-pointer.

This class provides access to building LLVM's passes.

void addPostRegAlloc(AddMachinePass &) const

This method may be implemented by targets that want to run passes after register allocation pass pipe...

void addILPOpts(AddMachinePass &) const

Add passes that optimize instruction level parallelism for out-of-order targets.

Error buildPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType) const

void addMachineSSAOptimization(AddMachinePass &) const

Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...

void addCodeGenPrepare(AddIRPass &) const

Add pass to prepare the LLVM IR for code generation.

void disablePass()

Allow the target to disable a specific pass by default.

void addIRPasses(AddIRPass &) const

Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...

implements a set of functionality in the TargetMachine class for targets that make use of the indepen...

void removeDeadConstantUsers() const

If there are any dead constant users dangling off of this constant, remove them.

This pass is required by interprocedural register allocation.

Lightweight error class with error context and mandatory checking.

static ErrorSuccess success()

Create a success value.

Tagged union holding either a T or a Error.

FunctionPass class - This class is used to implement most global optimizations.

@ SCHEDULE_LEGACYMAXOCCUPANCY

const SIRegisterInfo * getRegisterInfo() const override

TargetTransformInfo getTargetTransformInfo(const Function &F) const override

Get a TargetTransformInfo implementation for the target.

void registerMachineRegisterInfoCallback(MachineFunction &MF) const override

bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override

Parse out the target's MachineFunctionInfo from the YAML reprsentation.

yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override

Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.

Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC) override

yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override

Allocate and return a default initialized instance of the YAML representation for the MachineFunction...

TargetPassConfig * createPassConfig(PassManagerBase &PM) override

Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...

GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOptLevel OL, bool JIT)

MachineFunctionInfo * createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const override

Create the target's instance of MachineFunctionInfo.

The core GVN pass object.

Pass to remove unused function declarations.

This pass is responsible for selecting generic machine instructions to target-specific instructions.

A pass that internalizes all functions and variables other than those that must be preserved accordin...

Converts loops into loop-closed SSA form.

This pass implements the localization mechanism described at the top of this file.

An optimization pass inserting data prefetches in loops.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

void addDelegate(Delegate *delegate)

MachineSchedRegistry provides a selection of available machine instruction schedulers.

This interface provides simple read-only access to a block of memory, and provides simple methods for...

virtual StringRef getBufferIdentifier() const

Return an identifier for this buffer, typically the filename it was read from.

A Module instance is used to store all the information related to an LLVM module.

static const OptimizationLevel O0

Disable as many optimizations as possible.

unsigned getSpeedupLevel() const

static const OptimizationLevel O1

Optimize quickly without destroying debuggability.

This class provides access to building LLVM's passes.

void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel, ThinOrFullLTOPhase)> &C)

Register a callback for a default optimizer pipeline extension point.

void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)

Register a callback for a default optimizer pipeline extension point.

void crossRegisterProxies(LoopAnalysisManager &LAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM, MachineFunctionAnalysisManager *MFAM=nullptr)

Cross register the analysis managers through their proxies.

void registerOptimizerLastEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel, ThinOrFullLTOPhase)> &C)

Register a callback for a default optimizer pipeline extension point.

void registerPeepholeEPCallback(const std::function< void(FunctionPassManager &, OptimizationLevel)> &C)

Register a callback for a default optimizer pipeline extension point.

void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)

Register a callback for a default optimizer pipeline extension point.

void registerRegClassFilterParsingCallback(const std::function< RegAllocFilterFunc(StringRef)> &C)

Register callbacks to parse target specific filter field if regalloc pass needs it.

void registerModuleAnalyses(ModuleAnalysisManager &MAM)

Registers all available module analysis passes.

void registerFullLinkTimeOptimizationLastEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)

Register a callback for a default optimizer pipeline extension point.

void registerFunctionAnalyses(FunctionAnalysisManager &FAM)

Registers all available function analysis passes.

LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)

PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)

Run all of the passes in this manager over the given unit of IR.

PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

@ ExternalSymbolCallEntry

This pass implements the reg bank selector pass used in the GlobalISel pipeline.

RegisterPassParser class - Handle the addition of new machine passes.

RegisterRegAllocBase class - Track the registration of register allocators.

FunctionPass *(*)() FunctionPassCtor

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)

void setFlag(Register Reg, uint8_t Flag)

bool checkFlag(Register Reg, uint8_t Flag) const

void reserveWWMRegister(Register Reg)

Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...

Represents a location in source code.

Represents a range in source code.

A ScheduleDAG for scheduling lists of MachineInstr.

ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)

Add a postprocessing step to the DAG builder.

const TargetInstrInfo * TII

Target instruction information.

const TargetRegisterInfo * TRI

Target processor register info.

Move instructions into successor blocks when possible.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

void append(StringRef RHS)

Append from a StringRef.

unsigned getMainFileID() const

const MemoryBuffer * getMemoryBuffer(unsigned i) const

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

constexpr bool empty() const

empty - Check if the string is empty.

bool consume_front(StringRef Prefix)

Returns true if this StringRef has the given prefix and removes that prefix.

A switch()-like statement whose cases are string literals.

StringSwitch & Case(StringLiteral S, T Value)

StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)

Primary interface to the complete machine description for the target machine.

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

Triple TargetTriple

Triple string, CPU name, and target feature strings the TargetMachine instance is created with.

const Triple & getTargetTriple() const

const MCSubtargetInfo * getMCSubtargetInfo() const

StringRef getTargetFeatureString() const

StringRef getTargetCPU() const

std::unique_ptr< const MCSubtargetInfo > STI

void resetTargetOptions(const Function &F) const

Reset the target options based on the function's attributes.

std::unique_ptr< const MCRegisterInfo > MRI

Target-Independent Code Generator Pass Configuration Options.

virtual void addCodeGenPrepare()

Add pass to prepare the LLVM IR for code generation.

virtual bool addILPOpts()

Add passes that optimize instruction level parallelism for out-of-order targets.

virtual void addPostRegAlloc()

This method may be implemented by targets that want to run passes after register allocation pass pipe...

CodeGenOptLevel getOptLevel() const

virtual void addOptimizedRegAlloc()

addOptimizedRegAlloc - Add passes related to register allocation.

virtual void addIRPasses()

Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...

virtual void addFastRegAlloc()

addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...

virtual void addMachineSSAOptimization()

addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.

void disablePass(AnalysisID PassID)

Allow the target to disable a specific standard pass by default.

AnalysisID addPass(AnalysisID PassID)

Utilities for targets to add passes to the pass manager.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

TargetSubtargetInfo - Generic base class for all target subtargets.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Target - Wrapper for Target specific information.

Triple - Helper class for working with autoconf configuration names.

ArchType getArch() const

Get the parsed architecture type of this triple.

bool isAMDGCN() const

Tests whether the target is AMDGCN.

LLVM Value Representation.

An efficient, type-erasing, non-owning reference to a callable.

PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...

An abstract base class for streams implementations that also support a pwrite operation.

Interfaces for registering analysis passes, producing common pass manager configurations,...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ PRIVATE_ADDRESS

Address space for private memory.

bool isFlatGlobalAddrSpace(unsigned AS)

bool isEntryFunctionCC(CallingConv::ID CC)

@ C

The default llvm calling convention, compatible with C.

BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)

Matches an And with LHS and RHS in either order.

bool match(Val *V, const Pattern &P)

deferredval_ty< Value > m_Deferred(Value *const &V)

Like m_Specific(), but works if the specific value to match is determined as part of the same match()...

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)

Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

This is an optimization pass for GlobalISel generic memory operations.

FunctionPass * createFlattenCFGPass()

void initializeSIFormMemoryClausesPass(PassRegistry &)

FunctionPass * createFastRegisterAllocator()

FastRegisterAllocation Pass - This pass register allocates as fast as possible.

char & EarlyMachineLICMID

This pass performs loop invariant code motion on machine instructions.

ImmutablePass * createAMDGPUAAWrapperPass()

char & PostRAHazardRecognizerID

PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.

std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc

Filter function for register classes during regalloc.

FunctionPass * createAMDGPUSetWavePriorityPass()

void initializeGCNCreateVOPDPass(PassRegistry &)

char & GCNPreRAOptimizationsID

char & GCLoweringID

GCLowering Pass - Used by gc.root to perform its default lowering operations.

void initializeGCNPreRAOptimizationsPass(PassRegistry &)

Pass * createLoadStoreVectorizerPass()

Create a legacy pass manager instance of the LoadStoreVectorizer pass.

ModulePass * createExpandVariadicsPass(ExpandVariadicsMode)

void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &)

void initializeAMDGPUAttributorLegacyPass(PassRegistry &)

FunctionPass * createSIAnnotateControlFlowLegacyPass()

Create the annotation pass.

FunctionPass * createSIModeRegisterPass()

void initializeSILowerWWMCopiesLegacyPass(PassRegistry &)

FunctionPass * createGreedyRegisterAllocator()

Greedy register allocation pass - This pass implements a global register allocator for optimized buil...

void initializeAMDGPUAAWrapperPassPass(PassRegistry &)

void initializeSIShrinkInstructionsLegacyPass(PassRegistry &)

ModulePass * createAMDGPULowerBufferFatPointersPass()

void initializeR600ClauseMergePassPass(PassRegistry &)

void initializeSIModeRegisterPass(PassRegistry &)

ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()

ModulePass * createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &)

FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)

char & GCNRewritePartialRegUsesID

FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)

void initializeAMDGPUSwLowerLDSLegacyPass(PassRegistry &)

std::error_code inconvertibleErrorCode()

The value returned by this function can be returned from convertToErrorCode for Error values where no...

void initializeGCNPreRALongBranchRegPass(PassRegistry &)

void initializeSILowerSGPRSpillsLegacyPass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)

Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.

void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &)

FunctionPass * createNaryReassociatePass()

char & PatchableFunctionID

This pass implements the "patchable-function" attribute.

char & SIOptimizeExecMaskingLegacyID

char & PostRASchedulerID

PostRAScheduler - This pass performs post register allocation scheduling.

void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)

void initializeR600PacketizerPass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()

ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)

void initializeSIPreEmitPeepholePass(PassRegistry &)

void initializeSIFoldOperandsLegacyPass(PassRegistry &)

char & SILoadStoreOptimizerLegacyID

void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)

std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)

Target & getTheR600Target()

The target for R600 GPUs.

char & MachineSchedulerID

MachineScheduler - This pass schedules machine instructions.

Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)

When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...

void initializeGCNNSAReassignPass(PassRegistry &)

char & PostMachineSchedulerID

PostMachineScheduler - This pass schedules machine instructions postRA.

void initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(PassRegistry &)

void initializeSIInsertWaitcntsPass(PassRegistry &)

ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)

Create the standard converging machine scheduler.

char & SIFormMemoryClausesID

void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &)

void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)

void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &)

char & EarlyIfConverterLegacyID

EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.

void initializeAMDGPURegBankCombinerPass(PassRegistry &)

void initializeSILateBranchLoweringPass(PassRegistry &)

ThinOrFullLTOPhase

This enumerates the LLVM full LTO or ThinLTO optimization phases.

char & AMDGPUUnifyDivergentExitNodesID

FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)

FunctionPass * createAMDGPUPreloadKernArgPrologLegacyPass()

char & SIOptimizeVGPRLiveRangeLegacyID

char & ShadowStackGCLoweringID

ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.

void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)

void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)

auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)

void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)

CodeModel::Model getEffectiveCodeModel(std::optional< CodeModel::Model > CM, CodeModel::Model Default)

Helper method for getting the code model, returning Default if CM does not have a value.

char & SILateBranchLoweringPassID

char & BranchRelaxationPassID

BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...

FunctionPass * createSinkingPass()

CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)

A function to deduce a function pass type and wrap it in the templated adaptor.

void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)

CodeGenFileType

These enums are meant to be passed into addPassesToEmitFile to indicate what type of file to emit,...

void initializeSIPostRABundlerPass(PassRegistry &)

void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)

char & GCNDPPCombineLegacyID

void initializeSIWholeQuadModePass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)

If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...

FunctionPass * createLoopDataPrefetchPass()

FunctionPass * createAMDGPULowerKernelArgumentsPass()

char & AMDGPUInsertDelayAluID

Pass * createAMDGPUAnnotateKernelFeaturesPass()

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()

Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...

char & StackMapLivenessID

StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...

char & SILowerWWMCopiesLegacyID

FunctionPass * createUnifyLoopExitsPass()

char & SIOptimizeExecMaskingPreRAID

FunctionPass * createFixIrreduciblePass()

char & FuncletLayoutID

This pass lays out funclets contiguously.

void initializeSIInsertHardClausesPass(PassRegistry &)

char & DetectDeadLanesID

This pass adds dead/undef flags after analyzing subregister lanes.

void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)

CodeGenOptLevel

Code generation optimization level.

void initializeAMDGPUReserveWWMRegsPass(PassRegistry &)

ModulePass * createAMDGPUPrintfRuntimeBinding()

char & StackSlotColoringID

StackSlotColoring - This pass performs stack slot coloring.

void initializeSIMemoryLegalizerPass(PassRegistry &)

Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)

Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...

void initializeR600ControlFlowFinalizerPass(PassRegistry &)

void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &)

void initializeSILowerControlFlowLegacyPass(PassRegistry &)

char & SIPreAllocateWWMRegsLegacyID

ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)

char & AMDGPUReserveWWMRegsID

FunctionPass * createAMDGPUPromoteAlloca()

FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)

char & SIPreEmitPeepholeID

ModulePass * createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *)

void initializeGCNRegPressurePrinterPass(PassRegistry &)

void initializeSILowerI1CopiesLegacyPass(PassRegistry &)

char & SILowerSGPRSpillsLegacyID

void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)

FunctionPass * createBasicRegisterAllocator()

BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...

void initializeGlobalISel(PassRegistry &)

Initialize all passes linked into the GlobalISel library.

char & SILowerControlFlowLegacyID

ModulePass * createR600OpenCLImageTypeLoweringPass()

FunctionPass * createAMDGPUCodeGenPreparePass()

void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &)

ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass()

FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)

This pass converts a legalized DAG into a AMDGPU-specific.

void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &)

void initializeSIFixVGPRCopiesLegacyPass(PassRegistry &)

Target & getTheGCNTarget()

The target for GCN GPUs.

void initializeSIFixSGPRCopiesLegacyPass(PassRegistry &)

void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)

FunctionPass * createGVNPass()

Create a legacy GVN pass.

FunctionPass * createAMDGPURegBankSelectPass()

FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)

FunctionPass * createAMDGPURegBankLegalizePass()

char & MachineCSELegacyID

MachineCSE - This pass performs global CSE on machine instructions.

std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)

If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...

void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)

void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &)

char & LiveVariablesID

LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...

void initializeAMDGPUCodeGenPreparePass(PassRegistry &)

FunctionPass * createAMDGPURewriteUndefForPHILegacyPass()

void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &)

void call_once(once_flag &flag, Function &&F, Args &&... ArgList)

Execute the function specified as a parameter once.

FunctionPass * createSILowerI1CopiesLegacyPass()

void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)

char & SIInsertHardClausesID

void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)

char & SIFixSGPRCopiesLegacyID

void initializeGCNDPPCombineLegacyPass(PassRegistry &)

FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)

char & SIPeepholeSDWALegacyID

char & VirtRegRewriterID

VirtRegRewriter pass.

char & SIFoldOperandsLegacyID

FunctionPass * createLowerSwitchPass()

void initializeAMDGPUPreloadKernArgPrologLegacyPass(PassRegistry &)

FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)

void initializeR600VectorRegMergerPass(PassRegistry &)

ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)

A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...

FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()

FunctionPass * createSIMemoryLegalizerPass()

void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &)

void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &)

void initializeSIPeepholeSDWALegacyPass(PassRegistry &)

void initializeAMDGPURegBankLegalizePass(PassRegistry &)

char & TwoAddressInstructionPassID

TwoAddressInstruction - This pass reduces two-address instructions to use two operands.

void initializeAMDGPURegBankSelectPass(PassRegistry &)

FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()

FunctionPass * createAtomicExpandLegacyPass()

AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...

MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)

FunctionPass * createStraightLineStrengthReducePass()

FunctionPass * createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *)

void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)

void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &)

FunctionPass * createSIInsertWaitcntsPass()

FunctionPass * createAMDGPUAnnotateUniformValuesLegacy()

FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)

char & PHIEliminationID

PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.

bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)

FunctionPass * createSIShrinkInstructionsLegacyPass()

char & AMDGPUMarkLastScratchLoadID

char & RenameIndependentSubregsID

This pass detects subregister lanes in a virtual register that are used independently of other lanes ...

void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()

void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)

void initializeAMDGPUPromoteAllocaPass(PassRegistry &)

void initializeAMDGPURemoveIncompatibleFunctionsLegacyPass(PassRegistry &)

void initializeAMDGPUInsertDelayAluPass(PassRegistry &)

void initializeAMDGPUUnifyMetadataPass(PassRegistry &)

void initializeAMDGPUAlwaysInlinePass(PassRegistry &)

char & DeadMachineInstructionElimID

DeadMachineInstructionElim - This pass removes dead machine instructions.

char & AMDGPUPerfHintAnalysisLegacyID

char & GCNPreRALongBranchRegID

void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)

ArgDescriptor PrivateSegmentBuffer

ArgDescriptor WorkGroupIDY

ArgDescriptor WorkGroupIDZ

ArgDescriptor PrivateSegmentSize

ArgDescriptor ImplicitArgPtr

ArgDescriptor PrivateSegmentWaveByteOffset

ArgDescriptor WorkGroupInfo

ArgDescriptor WorkItemIDZ

ArgDescriptor WorkItemIDY

ArgDescriptor LDSKernelId

ArgDescriptor KernargSegmentPtr

ArgDescriptor WorkItemIDX

ArgDescriptor FlatScratchInit

ArgDescriptor DispatchPtr

ArgDescriptor ImplicitBufferPtr

ArgDescriptor WorkGroupIDX

static ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)

static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)

static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)

bool RequiresCodeGenSCCOrder

DenormalModeKind Input

Denormal treatment kind for floating point instruction inputs in the default floating-point environme...

@ PreserveSign

The sign of a flushed-to-zero number is preserved in the sign of 0.

@ IEEE

IEEE-754 denormal numbers preserved.

DenormalModeKind Output

Denormal flushing mode for floating point instruction results in the default floating point environme...

A simple and fast domtree-based CSE pass.

MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

This class manages callbacks registration, as well as provides a way for PassInstrumentation to pass ...

StringMap< VRegInfo * > VRegInfosNamed

DenseMap< Register, VRegInfo * > VRegInfos

RegisterTargetMachine - Helper template for registering a target machine implementation,...

A utility pass template to force an analysis result to be available.

bool DX10Clamp

Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...

DenormalMode FP64FP16Denormals

If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...

bool IEEE

Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...

DenormalMode FP32Denormals

If this is set, neither input or output denormals are flushed for most f32 instructions.

The llvm::once_flag structure.

Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.

StringValue SGPRForEXECCopy

SmallVector< StringValue > WWMReservedRegs

StringValue FrameOffsetReg

StringValue LongBranchReservedReg

StringValue VGPRForAGPRCopy

std::optional< SIArgumentInfo > ArgInfo

SmallVector< StringValue, 2 > SpillPhysVGPRS

StringValue ScratchRSrcReg

StringValue StackPtrOffsetReg

bool FP64FP16OutputDenormals

bool FP64FP16InputDenormals

A wrapper around std::string which contains a source range that's being set during parsing.