AMDGPUTargetMachine.cpp Source File (original) (raw)

69#include "llvm/IR/IntrinsicsAMDGPU.h"

100#include

101

102using namespace llvm;

104

105namespace {

106class SGPRRegisterRegAlloc : public RegisterRegAllocBase {

107public:

110};

111

112class VGPRRegisterRegAlloc : public RegisterRegAllocBase {

113public:

116};

117

118class WWMRegisterRegAlloc : public RegisterRegAllocBase {

119public:

122};

123

129}

130

135 return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);

136}

137

144 return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC) &&

146}

147

148

150

151

152

153static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;

154static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;

155static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;

156

157static SGPRRegisterRegAlloc

158defaultSGPRRegAlloc("default",

159 "pick SGPR register allocator based on -O option",

161

162static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,

165 cl::desc("Register allocator to use for SGPRs"));

166

167static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,

170 cl::desc("Register allocator to use for VGPRs"));

171

172static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,

174 WWMRegAlloc("wwm-regalloc", cl::Hidden,

176 cl::desc("Register allocator to use for WWM registers"));

177

178static void initializeDefaultSGPRRegisterAllocatorOnce() {

180

181 if (!Ctor) {

182 Ctor = SGPRRegAlloc;

183 SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);

184 }

185}

186

187static void initializeDefaultVGPRRegisterAllocatorOnce() {

189

190 if (!Ctor) {

191 Ctor = VGPRRegAlloc;

192 VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);

193 }

194}

195

196static void initializeDefaultWWMRegisterAllocatorOnce() {

198

199 if (!Ctor) {

200 Ctor = WWMRegAlloc;

201 WWMRegisterRegAlloc::setDefault(WWMRegAlloc);

202 }

203}

204

205static FunctionPass *createBasicSGPRRegisterAllocator() {

207}

208

209static FunctionPass *createGreedySGPRRegisterAllocator() {

211}

212

213static FunctionPass *createFastSGPRRegisterAllocator() {

215}

216

217static FunctionPass *createBasicVGPRRegisterAllocator() {

219}

220

221static FunctionPass *createGreedyVGPRRegisterAllocator() {

223}

224

225static FunctionPass *createFastVGPRRegisterAllocator() {

227}

228

229static FunctionPass *createBasicWWMRegisterAllocator() {

231}

232

233static FunctionPass *createGreedyWWMRegisterAllocator() {

235}

236

237static FunctionPass *createFastWWMRegisterAllocator() {

239}

240

241static SGPRRegisterRegAlloc basicRegAllocSGPR(

242 "basic", "basic register allocator", createBasicSGPRRegisterAllocator);

243static SGPRRegisterRegAlloc greedyRegAllocSGPR(

244 "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);

245

246static SGPRRegisterRegAlloc fastRegAllocSGPR(

247 "fast", "fast register allocator", createFastSGPRRegisterAllocator);

248

249

250static VGPRRegisterRegAlloc basicRegAllocVGPR(

251 "basic", "basic register allocator", createBasicVGPRRegisterAllocator);

252static VGPRRegisterRegAlloc greedyRegAllocVGPR(

253 "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);

254

255static VGPRRegisterRegAlloc fastRegAllocVGPR(

256 "fast", "fast register allocator", createFastVGPRRegisterAllocator);

257static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",

258 "basic register allocator",

259 createBasicWWMRegisterAllocator);

260static WWMRegisterRegAlloc

261 greedyRegAllocWWMReg("greedy", "greedy register allocator",

262 createGreedyWWMRegisterAllocator);

263static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",

264 createFastWWMRegisterAllocator);

265

267 return Phase == ThinOrFullLTOPhase::FullLTOPreLink ||

268 Phase == ThinOrFullLTOPhase::ThinLTOPreLink;

269}

270}

271

274 cl::desc("Run early if-conversion"),

276

279 cl::desc("Run pre-RA exec mask optimizations"),

281

284 cl::desc("Lower GPU ctor / dtors to globals on the device."),

286

287

289 "amdgpu-load-store-vectorizer",

290 cl::desc("Enable load store vectorizer"),

293

294

296 "amdgpu-scalarize-global-loads",

297 cl::desc("Enable global load scalarization"),

300

301

303 "amdgpu-internalize-symbols",

304 cl::desc("Enable elimination of non-kernel functions and unused globals"),

307

308

310 "amdgpu-early-inline-all",

311 cl::desc("Inline all functions early"),

314

316 "amdgpu-enable-remove-incompatible-functions", cl::Hidden,

317 cl::desc("Enable removal of functions when they"

318 "use features not supported by the target GPU"),

320

322 "amdgpu-sdwa-peephole",

323 cl::desc("Enable SDWA peepholer"),

325

327 "amdgpu-dpp-combine",

328 cl::desc("Enable DPP combiner"),

330

331

333 cl::desc("Enable AMDGPU Alias Analysis"),

335

336

338 "amdgpu-simplify-libcall",

339 cl::desc("Enable amdgpu library simplifications"),

342

344 "amdgpu-ir-lower-kernel-arguments",

345 cl::desc("Lower kernel argument loads in IR pass"),

348

350 "amdgpu-reassign-regs",

351 cl::desc("Enable register reassign optimizations on gfx10+"),

354

356 "amdgpu-opt-vgpr-liverange",

357 cl::desc("Enable VGPR liverange optimizations for if-else structure"),

359

361 "amdgpu-atomic-optimizer-strategy",

362 cl::desc("Select DPP or Iterative strategy for scan"),

363 cl::init(ScanOptions::Iterative),

365 clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"),

366 clEnumValN(ScanOptions::Iterative, "Iterative",

367 "Use Iterative approach for scan"),

368 clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));

369

370

372 "amdgpu-mode-register",

373 cl::desc("Enable mode register pass"),

376

377

380 cl::desc("Enable s_delay_alu insertion"),

382

383

386 cl::desc("Enable VOPD, dual issue of VALU in wave32"),

388

389

393 cl::desc("Enable machine DCE inside regalloc"));

394

396 cl::desc("Adjust wave priority"),

398

400 "amdgpu-scalar-ir-passes",

401 cl::desc("Enable scalar IR passes"),

404

407 cl::desc("Enable lowering of lds to global memory pass "

408 "and asan instrument resulting IR."),

410

412 "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),

415

417 "amdgpu-enable-pre-ra-optimizations",

418 cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),

420

422 "amdgpu-enable-promote-kernel-arguments",

423 cl::desc("Enable promotion of flat kernel pointer arguments to global"),

425

427 "amdgpu-enable-image-intrinsic-optimizer",

428 cl::desc("Enable image intrinsic optimizer pass"), cl::init(true),

430

433 cl::desc("Enable loop data prefetch on AMDGPU"),

435

438 cl::desc("Select custom AMDGPU scheduling strategy."),

440

442 "amdgpu-enable-rewrite-partial-reg-uses",

443 cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),

445

447 "amdgpu-enable-hipstdpar",

448 cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),

450

453 cl::desc("Enable AMDGPUAttributorPass"),

455

457 "new-reg-bank-select",

458 cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of "

459 "regbankselect"),

461

463 "amdgpu-link-time-closed-world",

464 cl::desc("Whether has closed-world assumption at link time"),

466

468

471

549}

550

551static std::unique_ptr createTLOF(const Triple &TT) {

552 return std::make_unique();

553}

554

557}

558

565 if (ST.shouldClusterStores())

570 return DAG;

571}

572

578 return DAG;

579}

580

585 C, std::make_unique(C));

587 if (ST.shouldClusterStores())

590 return DAG;

591}

592

599 if (ST.shouldClusterStores())

601 return DAG;

602}

603

607}

608

614 if (ST.shouldClusterStores())

617 return DAG;

618}

619

623

626 "Run GCN scheduler to maximize occupancy",

628

632

634 "gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause",

636

638 "gcn-iterative-max-occupancy-experimental",

639 "Run GCN scheduler to maximize occupancy (experimental)",

641

643 "gcn-iterative-minreg",

644 "Run GCN iterative scheduler for minimal register usage (experimental)",

646

648 "gcn-iterative-ilp",

649 "Run GCN iterative scheduler for ILP scheduling (experimental)",

651

654

655 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"

656 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";

657 }

658

659

660

661

662

663

664

665 return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"

666 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"

667 "v32:32-v48:64-v96:"

668 "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"

669 "G1-ni:7:8:9";

670}

671

674 if (!GPU.empty())

675 return GPU;

676

677

679 return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";

680

681 return "r600";

682}

683

685

686

688}

689

693 std::optionalReloc::Model RM,

694 std::optionalCodeModel::Model CM,

700 TLOF(createTLOF(getTargetTriple())) {

707 }

708}

709

712

714

716 Attribute GPUAttr = F.getFnAttribute("target-cpu");

718}

719

721 Attribute FSAttr = F.getFnAttribute("target-features");

722

725}

726

727

729 if (const Function *F = dyn_cast(&GV))

730 return F->isDeclaration() || F->getName().starts_with("__asan_") ||

731 F->getName().starts_with("__sanitizer_") ||

733

736}

737

740}

741

744 if (Params.empty())

752 if (Result)

753 return *Result;

755}

756

760 while (!Params.empty()) {

762 std::tie(ParamName, Params) = Params.split(';');

763 if (ParamName == "closed-world") {

764 Result.IsClosedWorld = true;

765 } else {

766 return make_error(

767 formatv("invalid AMDGPUAttributor pass parameter '{0}' ", ParamName)

768 .str(),

770 }

771 }

772 return Result;

773}

774

776

777#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"

779

784 });

785

790

792 return;

793

795

796

800 }

801

804 });

805

809 return;

810

814 });

815

819 return;

820

822

823

824

825

829

830

831

833

834

835

837

839

840

841

843 }

844

846 });

847

848

855 }

856 });

857

860

861

862

868

872 }

878 }

879 }

880 });

881

884 if (FilterName == "sgpr")

885 return onlyAllocateSGPRs;

886 if (FilterName == "vgpr")

887 return onlyAllocateVGPRs;

888 if (FilterName == "wwm")

889 return onlyAllocateWWMRegs;

890 return nullptr;

891 });

892}

893

898 ? -1

899 : 0;

900}

901

903 unsigned DestAS) const {

906}

907

909 const auto *LD = dyn_cast(V);

910 if (!LD)

912

913

915

916 const auto *Ptr = LD->getPointerOperand();

919

920

921

922

924}

925

926std::pair<const Value *, unsigned>

928 if (auto *II = dyn_cast(V)) {

929 switch (II->getIntrinsicID()) {

930 case Intrinsic::amdgcn_is_shared:

932 case Intrinsic::amdgcn_is_private:

934 default:

935 break;

936 }

937 return std::pair(nullptr, -1);

938 }

939

940

941

944 const_cast<Value *>(V),

946 m_Not(m_IntrinsicIntrinsic::amdgcn\_is\_private(

949

950 return std::pair(nullptr, -1);

951}

952

953unsigned

955 switch (Kind) {

965 }

967}

968

970 Module &M, unsigned NumParts,

971 function_ref<void(std::unique_ptr MPart)> ModuleCallback) {

972

973

974

975

980

985

989 return true;

990}

991

992

993

994

995

999 std::optionalReloc::Model RM,

1000 std::optionalCodeModel::Model CM,

1003

1008

1010 SubtargetKey.append(FS);

1011

1012 auto &I = SubtargetMap[SubtargetKey];

1013 if () {

1014

1015

1016

1018 I = std::make_unique(TargetTriple, GPU, FS, *this);

1019 }

1020

1022

1023 return I.get();

1024}

1025

1029}

1030

1036 return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);

1037}

1038

1039

1040

1041

1042

1045}

1046

1047namespace {

1048

1050public:

1053

1054

1055

1056 setRequiresCodeGenSCCOrder(true);

1058 }

1059

1061 return getTM();

1062 }

1063

1066

1070 C, std::make_unique(C),

1071 true);

1074 if (ST.shouldClusterStores())

1076 DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));

1079 if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less))

1081 return DAG;

1082 }

1083

1084 bool addPreISel() override;

1085 void addMachineSSAOptimization() override;

1086 bool addILPOpts() override;

1087 bool addInstSelector() override;

1088 bool addIRTranslator() override;

1089 void addPreLegalizeMachineIR() override;

1090 bool addLegalizeMachineIR() override;

1091 void addPreRegBankSelect() override;

1092 bool addRegBankSelect() override;

1093 void addPreGlobalInstructionSelect() override;

1094 bool addGlobalInstructionSelect() override;

1095 void addFastRegAlloc() override;

1096 void addOptimizedRegAlloc() override;

1097

1098 FunctionPass *createSGPRAllocPass(bool Optimized);

1099 FunctionPass *createVGPRAllocPass(bool Optimized);

1100 FunctionPass *createWWMRegAllocPass(bool Optimized);

1101 FunctionPass *createRegAllocPass(bool Optimized) override;

1102

1103 bool addRegAssignAndRewriteFast() override;

1104 bool addRegAssignAndRewriteOptimized() override;

1105

1106 bool addPreRewrite() override;

1107 void addPostRegAlloc() override;

1108 void addPreSched2() override;

1109 void addPreEmitPass() override;

1110};

1111

1112}

1113

1116

1117

1120

1123}

1124

1128 else

1130}

1131

1136

1137

1139

1140

1142

1144

1145

1147}

1148

1151

1155

1156

1160

1164

1167

1168

1169

1171

1172

1175

1176

1179

1180

1182

1183

1186

1187

1190 }

1191

1194

1195

1200 }

1201

1203

1206

1209

1215 AAR.addAAResult(WrapperPass->getResult());

1216 }));

1217 }

1218

1220

1222 }

1223

1224

1225

1228 }

1229

1231

1232

1233

1234

1235

1236

1237

1238

1239

1240

1241

1242

1243

1246}

1247

1250

1251

1253 }

1254

1258

1260

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1273

1274

1275

1277

1278

1280 }

1281

1283

1286

1287

1288

1289

1290

1292}

1293

1297 return false;

1298}

1299

1302 return false;

1303}

1304

1306

1307 return false;

1308}

1309

1315 if (ST.shouldClusterStores())

1317 return DAG;

1318}

1319

1320

1321

1322

1323

1327 if (ST.enableSIScheduler())

1329

1331 C->MF->getFunction().getFnAttribute("amdgpu-sched-strategy");

1335

1336 if (SchedStrategy == "max-ilp")

1338

1339 if (SchedStrategy == "max-memory-clause")

1341

1343}

1344

1345bool GCNPassConfig::addPreISel() {

1347

1350

1353

1354

1355

1360

1363

1364

1365

1367

1369

1372

1373 return false;

1374}

1375

1376void GCNPassConfig::addMachineSSAOptimization() {

1378

1379

1380

1381

1382

1383

1384

1385

1395 }

1398}

1399

1400bool GCNPassConfig::addILPOpts() {

1403

1405 return false;

1406}

1407

1408bool GCNPassConfig::addInstSelector() {

1412 return false;

1413}

1414

1415bool GCNPassConfig::addIRTranslator() {

1417 return false;

1418}

1419

1420void GCNPassConfig::addPreLegalizeMachineIR() {

1424}

1425

1426bool GCNPassConfig::addLegalizeMachineIR() {

1428 return false;

1429}

1430

1431void GCNPassConfig::addPreRegBankSelect() {

1435}

1436

1437bool GCNPassConfig::addRegBankSelect() {

1441 } else {

1443 }

1444 return false;

1445}

1446

1447void GCNPassConfig::addPreGlobalInstructionSelect() {

1450}

1451

1452bool GCNPassConfig::addGlobalInstructionSelect() {

1454 return false;

1455}

1456

1457void GCNPassConfig::addFastRegAlloc() {

1458

1459

1460

1461

1462

1463

1465

1467

1469}

1470

1471void GCNPassConfig::addOptimizedRegAlloc() {

1474

1475

1476

1477

1478

1481

1482

1483

1484

1486

1489

1492

1493

1494

1496

1499

1500

1501

1504

1506}

1507

1508bool GCNPassConfig::addPreRewrite() {

1511 return true;

1512}

1513

1514FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {

1515

1516 llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,

1517 initializeDefaultSGPRRegisterAllocatorOnce);

1518

1521 return Ctor();

1522

1523 if (Optimized)

1525

1527}

1528

1529FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {

1530

1531 llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,

1532 initializeDefaultVGPRRegisterAllocatorOnce);

1533

1536 return Ctor();

1537

1538 if (Optimized)

1539 return createGreedyVGPRRegisterAllocator();

1540

1541 return createFastVGPRRegisterAllocator();

1542}

1543

1544FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {

1545

1546 llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,

1547 initializeDefaultWWMRegisterAllocatorOnce);

1548

1551 return Ctor();

1552

1553 if (Optimized)

1554 return createGreedyWWMRegisterAllocator();

1555

1556 return createFastWWMRegisterAllocator();

1557}

1558

1559FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {

1561}

1562

1564 "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "

1565 "and -vgpr-regalloc";

1566

1567bool GCNPassConfig::addRegAssignAndRewriteFast() {

1568 if (!usingDefaultRegAlloc())

1570

1572

1573 addPass(createSGPRAllocPass(false));

1574

1575

1577

1578

1580

1581

1582 addPass(createWWMRegAllocPass(false));

1583

1586

1587

1588 addPass(createVGPRAllocPass(false));

1589

1590 return true;

1591}

1592

1593bool GCNPassConfig::addRegAssignAndRewriteOptimized() {

1594 if (!usingDefaultRegAlloc())

1596

1598

1599 addPass(createSGPRAllocPass(true));

1600

1601

1602

1603

1604

1606

1607

1608

1609

1611

1612

1614

1615

1617

1618

1619 addPass(createWWMRegAllocPass(true));

1623

1624

1625 addPass(createVGPRAllocPass(true));

1626

1627 addPreRewrite();

1629

1631

1632 return true;

1633}

1634

1635void GCNPassConfig::addPostRegAlloc() {

1640}

1641

1642void GCNPassConfig::addPreSched2() {

1646}

1647

1648void GCNPassConfig::addPreEmitPass() {

1653

1655

1658

1664

1665

1666

1667

1668

1669

1670

1671

1673

1676

1679}

1680

1682 return new GCNPassConfig(*this, PM);

1683}

1684

1689}

1690

1694 return SIMachineFunctionInfo::create(

1696}

1697

1700}

1701

1707}

1708

1717

1719 return true;

1720

1721 if (MFI->Occupancy == 0) {

1722

1724 }

1725

1729 SourceRange = RegName.SourceRange;

1730 return true;

1731 }

1732 RegVal = TempReg;

1733

1734 return false;

1735 };

1736

1739 return RegName .Value.empty() && parseRegister(RegName, RegVal);

1740 };

1741

1742 if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))

1743 return true;

1744

1745 if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))

1746 return true;

1747

1749 MFI->LongBranchReservedReg))

1750 return true;

1751

1753

1758 "incorrect register class for field", RegName.Value,

1759 {}, {});

1760 SourceRange = RegName.SourceRange;

1761 return true;

1762 };

1763

1764 if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||

1765 parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||

1766 parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))

1767 return true;

1768

1769 if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&

1770 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {

1771 return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);

1772 }

1773

1774 if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&

1775 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {

1776 return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);

1777 }

1778

1779 if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&

1780 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {

1782 }

1783

1786 if (parseRegister(YamlReg, ParsedReg))

1787 return true;

1788

1790 }

1791

1794 }

1797 }

1798

1799 for (const auto &YamlRegStr : YamlMFI.SpillPhysVGPRS) {

1801 if (parseRegister(YamlRegStr, ParsedReg))

1802 return true;

1803 MFI->SpillPhysVGPRs.push_back(ParsedReg);

1804 }

1805

1806 auto parseAndCheckArgument = [&](const std::optionalyaml::SIArgument &A,

1809 unsigned SystemSGPRs) {

1810

1811 if ()

1812 return false;

1813

1814 if (A->IsRegister) {

1817 SourceRange = A->RegisterName.SourceRange;

1818 return true;

1819 }

1820 if (!RC.contains(Reg))

1821 return diagnoseRegisterClass(A->RegisterName);

1823 } else

1825

1826 if (A->Mask)

1828

1829 MFI->NumUserSGPRs += UserSGPRs;

1830 MFI->NumSystemSGPRs += SystemSGPRs;

1831 return false;

1832 };

1833

1835 (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,

1836 AMDGPU::SGPR_128RegClass,

1838 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,

1839 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,

1840 2, 0) ||

1841 parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,

1842 MFI->ArgInfo.QueuePtr, 2, 0) ||

1843 parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,

1844 AMDGPU::SReg_64RegClass,

1846 parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,

1847 AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,

1848 2, 0) ||

1849 parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,

1850 AMDGPU::SReg_64RegClass,

1852 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,

1853 AMDGPU::SGPR_32RegClass,

1855 parseAndCheckArgument(YamlMFI.ArgInfo->LDSKernelId,

1856 AMDGPU::SGPR_32RegClass,

1858 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,

1859 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,

1860 0, 1) ||

1861 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,

1862 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,

1863 0, 1) ||

1864 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,

1865 AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,

1866 0, 1) ||

1867 parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,

1868 AMDGPU::SGPR_32RegClass,

1870 parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,

1871 AMDGPU::SGPR_32RegClass,

1873 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,

1874 AMDGPU::SReg_64RegClass,

1876 parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,

1877 AMDGPU::SReg_64RegClass,

1879 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,

1880 AMDGPU::VGPR_32RegClass,

1882 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,

1883 AMDGPU::VGPR_32RegClass,

1885 parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,

1886 AMDGPU::VGPR_32RegClass,

1888 return true;

1889

1890 if (ST.hasIEEEMode())

1892 if (ST.hasDX10ClampMode())

1894

1895

1902

1909

1912

1913 return false;

1914}

1915

1916

1917

1918

1919

1925

1926

1927

1928 disablePass<StackMapLivenessPass, FuncletLayoutPass,

1930}

1931

1935

1939

1942

1943

1944

1946

1949

1951

1952

1955

1958

1959

1963

1965

1970

1971

1972

1973

1975

1976

1977 }

1978

1980

1981

1982

1983

1984

1985

1986

1987

1988

1989

1990

1991

1992

1995}

1996

1998

1999

2000

2003

2004

2005

2006

2007

2008

2009

2010

2011

2012

2013

2014

2015

2016

2017

2018

2019

2021

2023

2026

2027

2028

2029

2030

2032}

2033

2035

2038

2041

2043

2044

2045

2046

2051

2053

2055

2056

2057

2058

2060

2062

2065

2066

2067

2069}

2070

2074

2076}

2077

2079 CreateMCStreamer) const {

2080

2081}

2082

2088}

2089

2091 AddMachinePass &addPass) const {

2093

2097 }

2104 }

2107}

2108

2110

2114}

2115

2118 if (Opt.getNumOccurrences())

2119 return Opt;

2121 return false;

2122 return Opt;

2123}

2124

2128 else

2130}

2131

2133 AddIRPass &addPass) const {

2136

2138

2139

2140

2142

2143

2144

2146

2147

2149

2150

2151

2153}

unsigned const MachineRegisterInfo * MRI

aarch64 falkor hwpf fix Falkor HW Prefetch Fix Late Phase

static cl::opt< bool > EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(true))

This is the AMGPU address space based alias analysis pass.

Defines an instruction selector for the AMDGPU target.

Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...

static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))

static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)

static MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)

static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)

static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)

static ScheduleDAGInstrs * createGCNMaxILPMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))

static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)

static cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)

static cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)

static ScheduleDAGInstrs * createGCNMaxMemoryClauseMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)

static cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))

static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))

static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)

static cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)

static cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)

static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))

static cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))

static cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)

static MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)

static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)

static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)

static Reloc::Model getEffectiveRelocModel(std::optional< Reloc::Model > RM)

Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions(StringRef Params)

static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))

static Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy(StringRef Params)

static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)

static cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)

static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)

static bool mustPreserveGV(const GlobalValue &GV)

Predicate for Internalize pass.

static cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))

LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()

static cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)

static cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))

static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)

static cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)

static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)

static cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)

static cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))

static cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)

static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))

static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)

static MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)

static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)

static const char RegAllocOptNotSupportedMessage[]

static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)

The AMDGPU TargetMachine interface definition for hw codegen targets.

This file declares the AMDGPU-specific subclass of TargetLoweringObjectFile.

This file a TargetTransformInfo::Concept conforming object specific to the AMDGPU target machine.

Provides passes to inlining "always_inline" functions.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

This header provides classes for managing passes over SCCs of the call graph.

Analysis containing CSE Info

Provides analysis for continuously CSEing during GISel passes.

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_EXTERNAL_VISIBILITY

This file provides the interface for a simple, fast CSE pass.

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

This file defines the class GCNIterativeScheduler, which uses an iterative approach to find a best sc...

This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...

AcceleratorCodeSelection - Identify all functions reachable from a kernel, removing those that are un...

This file declares the IRTranslator pass.

This header defines various interfaces for pass management in LLVM.

static std::string computeDataLayout()

This file provides the interface for LLVM's Loop Data Prefetching Pass.

unsigned const TargetRegisterInfo * TRI

uint64_t IntrinsicInst * II

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

CGSCCAnalysisManager CGAM

FunctionAnalysisManager FAM

ModuleAnalysisManager MAM

PassInstrumentationCallbacks PIC

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

static bool isLTOPreLink(ThinOrFullLTOPhase Phase)

The AMDGPU TargetMachine interface definition for hw codegen targets.

This file describes the interface of the MachineFunctionPass responsible for assigning the generic vi...

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SI Machine Scheduler interface.

static FunctionPass * useDefaultRegisterAllocator()

-regalloc=... command line option.

Target-Independent Code Generator Pass Configuration Options pass.

LLVM IR instance of the generic uniformity analysis.

static std::unique_ptr< TargetLoweringObjectFile > createTLOF()

A manager for alias analyses.

void registerFunctionAnalysis()

void addAAResult(AAResultT &AAResult)

Legacy wrapper pass to provide the AMDGPUAAResult object.

Analysis pass providing a never-invalidated alias analysis result.

Error addInstSelector(AddMachinePass &) const

void addMachineSSAOptimization(AddMachinePass &) const

void addEarlyCSEOrGVNPass(AddIRPass &) const

void addStraightLineScalarOptimizationPasses(AddIRPass &) const

AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC)

void addIRPasses(AddIRPass &) const

void addPreISel(AddIRPass &addPass) const

void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const

void addCodeGenPrepare(AddIRPass &) const

void addILPOpts(AddMachinePass &) const

void addPostRegAlloc(AddMachinePass &) const

bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOptLevel Level=CodeGenOptLevel::Default) const

Check if a pass is enabled given Opt option.

Lower llvm.global_ctors and llvm.global_dtors to special kernels.

uint32_t getLDSSize() const

AMDGPUTargetMachine & getAMDGPUTargetMachine() const

std::unique_ptr< CSEConfigBase > getCSEConfig() const override

Returns the CSEConfig object to use for the current optimization level.

ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override

Create an instance of ScheduleDAGInstrs to be run within the standard MachineScheduler pass for this ...

bool isPassEnabled(const cl::opt< bool > &Opt, CodeGenOptLevel Level=CodeGenOptLevel::Default) const

Check if a pass is enabled given Opt option.

bool addPreISel() override

Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...

bool addInstSelector() override

addInstSelector - This method should install an instruction selector pass, which converts from LLVM c...

bool addGCPasses() override

addGCPasses - Add late codegen passes that analyze code for garbage collection.

void addStraightLineScalarOptimizationPasses()

AMDGPUPassConfig(TargetMachine &TM, PassManagerBase &PM)

void addIRPasses() override

Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...

void addEarlyCSEOrGVNPass()

void addCodeGenPrepare() override

Add pass to prepare the LLVM IR for code generation.

Splits the module M into N linkable partitions.

static int64_t getNullPointerValue(unsigned AddrSpace)

Get the integer value of a null pointer in the given address space.

unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override

getAddressSpaceForPseudoSourceKind - Given the kind of memory (e.g.

const TargetSubtargetInfo * getSubtargetImpl() const

void registerDefaultAliasAnalyses(AAManager &) override

Allow the target to register alias analyses with the AAManager for use with the new pass manager.

~AMDGPUTargetMachine() override

std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const override

If the specified predicate checks whether a generic pointer falls within a specified address space,...

StringRef getFeatureString(const Function &F) const

static bool EnableFunctionCalls

AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOptLevel OL)

bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override

Returns true if a cast between SrcAS and DestAS is a noop.

void registerPassBuilderCallbacks(PassBuilder &PB) override

Allow the target to modify the pass pipeline.

static bool EnableLowerModuleLDS

StringRef getGPUName(const Function &F) const

unsigned getAssumedAddrSpace(const Value *V) const override

If the specified generic pointer could be assumed as a pointer to a specific address space,...

bool splitModule(Module &M, unsigned NumParts, function_ref< void(std::unique_ptr< Module > MPart)> ModuleCallback) override

Entry point for module splitting.

Inlines functions marked as "always_inline".

A container for analyses that lazily runs them and caches their results.

StringRef getValueAsString() const

Return the attribute's value as a string.

bool isValid() const

Return true if the attribute is any kind of attribute.

Allocate memory in an ever growing pool, as if by bump-pointer.

This class provides access to building LLVM's passes.

void addPostRegAlloc(AddMachinePass &) const

This method may be implemented by targets that want to run passes after register allocation pass pipe...

void addILPOpts(AddMachinePass &) const

Add passes that optimize instruction level parallelism for out-of-order targets.

Error buildPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType) const

void addMachineSSAOptimization(AddMachinePass &) const

Methods with trivial inline returns are convenient points in the common codegen pass pipeline where t...

void addCodeGenPrepare(AddIRPass &) const

Add pass to prepare the LLVM IR for code generation.

void disablePass()

Allow the target to disable a specific pass by default.

void addIRPasses(AddIRPass &) const

Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...

implements a set of functionality in the TargetMachine class for targets that make use of the indepen...

void removeDeadConstantUsers() const

If there are any dead constant users dangling off of this constant, remove them.

This pass is required by interprocedural register allocation.

Lightweight error class with error context and mandatory checking.

static ErrorSuccess success()

Create a success value.

Tagged union holding either a T or a Error.

FunctionPass class - This class is used to implement most global optimizations.

@ SCHEDULE_LEGACYMAXOCCUPANCY

const SIRegisterInfo * getRegisterInfo() const override

TargetTransformInfo getTargetTransformInfo(const Function &F) const override

Get a TargetTransformInfo implementation for the target.

void registerMachineRegisterInfoCallback(MachineFunction &MF) const override

bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override

Parse out the target's MachineFunctionInfo from the YAML reprsentation.

yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override

Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.

Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC) override

yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override

Allocate and return a default initialized instance of the YAML representation for the MachineFunction...

TargetPassConfig * createPassConfig(PassManagerBase &PM) override

Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...

GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM, CodeGenOptLevel OL, bool JIT)

MachineFunctionInfo * createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const override

Create the target's instance of MachineFunctionInfo.

The core GVN pass object.

Pass to remove unused function declarations.

This pass is responsible for selecting generic machine instructions to target-specific instructions.

A pass that internalizes all functions and variables other than those that must be preserved accordin...

Converts loops into loop-closed SSA form.

This pass implements the localization mechanism described at the top of this file.

An optimization pass inserting data prefetches in loops.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

void addDelegate(Delegate *delegate)

MachineSchedRegistry provides a selection of available machine instruction schedulers.

This interface provides simple read-only access to a block of memory, and provides simple methods for...

virtual StringRef getBufferIdentifier() const

Return an identifier for this buffer, typically the filename it was read from.

A Module instance is used to store all the information related to an LLVM module.

static const OptimizationLevel O0

Disable as many optimizations as possible.

unsigned getSpeedupLevel() const

static const OptimizationLevel O1

Optimize quickly without destroying debuggability.

This class provides access to building LLVM's passes.

void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel, ThinOrFullLTOPhase)> &C)

void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)

void crossRegisterProxies(LoopAnalysisManager &LAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM, MachineFunctionAnalysisManager *MFAM=nullptr)

Cross register the analysis managers through their proxies.

void registerOptimizerLastEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel, ThinOrFullLTOPhase)> &C)

void registerPeepholeEPCallback(const std::function< void(FunctionPassManager &, OptimizationLevel)> &C)

void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)

void registerRegClassFilterParsingCallback(const std::function< RegAllocFilterFunc(StringRef)> &C)

void registerModuleAnalyses(ModuleAnalysisManager &MAM)

Registers all available module analysis passes.

void registerFullLinkTimeOptimizationLastEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)

void registerFunctionAnalyses(FunctionAnalysisManager &FAM)

Registers all available function analysis passes.

LLVM_ATTRIBUTE_MINSIZE std::enable_if_t<!std::is_same_v< PassT, PassManager > > addPass(PassT &&Pass)

PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)

Run all of the passes in this manager over the given unit of IR.

PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

@ ExternalSymbolCallEntry

This pass implements the reg bank selector pass used in the GlobalISel pipeline.

RegisterPassParser class - Handle the addition of new machine passes.

RegisterRegAllocBase class - Track the registration of register allocators.

FunctionPass *(*)() FunctionPassCtor

Wrapper class representing virtual and physical registers.

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange)

void setFlag(Register Reg, uint8_t Flag)

bool checkFlag(Register Reg, uint8_t Flag) const

void reserveWWMRegister(Register Reg)

Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...

Represents a location in source code.

Represents a range in source code.

A ScheduleDAG for scheduling lists of MachineInstr.

ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...

ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...

void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)

Add a postprocessing step to the DAG builder.

const TargetInstrInfo * TII

Target instruction information.

const TargetRegisterInfo * TRI

Target processor register info.

Move instructions into successor blocks when possible.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

void append(StringRef RHS)

Append from a StringRef.

unsigned getMainFileID() const

const MemoryBuffer * getMemoryBuffer(unsigned i) const

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

constexpr bool empty() const

empty - Check if the string is empty.

bool consume_front(StringRef Prefix)

Returns true if this StringRef has the given prefix and removes that prefix.

A switch()-like statement whose cases are string literals.

StringSwitch & Case(StringLiteral S, T Value)

StringSwitch & Cases(StringLiteral S0, StringLiteral S1, T Value)

Primary interface to the complete machine description for the target machine.

CodeGenOptLevel getOptLevel() const

Returns the optimization level: None, Less, Default, or Aggressive.

Triple TargetTriple

Triple string, CPU name, and target feature strings the TargetMachine instance is created with.

const Triple & getTargetTriple() const

const MCSubtargetInfo * getMCSubtargetInfo() const

StringRef getTargetFeatureString() const

StringRef getTargetCPU() const

std::unique_ptr< const MCSubtargetInfo > STI

void resetTargetOptions(const Function &F) const

Reset the target options based on the function's attributes.

std::unique_ptr< const MCRegisterInfo > MRI

Target-Independent Code Generator Pass Configuration Options.

virtual void addCodeGenPrepare()

Add pass to prepare the LLVM IR for code generation.

virtual bool addILPOpts()

Add passes that optimize instruction level parallelism for out-of-order targets.

virtual void addPostRegAlloc()

This method may be implemented by targets that want to run passes after register allocation pass pipe...

CodeGenOptLevel getOptLevel() const

virtual void addOptimizedRegAlloc()

addOptimizedRegAlloc - Add passes related to register allocation.

virtual void addIRPasses()

Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...

virtual void addFastRegAlloc()

addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...

virtual void addMachineSSAOptimization()

addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.

void disablePass(AnalysisID PassID)

Allow the target to disable a specific standard pass by default.

AnalysisID addPass(AnalysisID PassID)

Utilities for targets to add passes to the pass manager.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

TargetSubtargetInfo - Generic base class for all target subtargets.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

Target - Wrapper for Target specific information.

Triple - Helper class for working with autoconf configuration names.

ArchType getArch() const

Get the parsed architecture type of this triple.

bool isAMDGCN() const

Tests whether the target is AMDGCN.

LLVM Value Representation.

An efficient, type-erasing, non-owning reference to a callable.

PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...

An abstract base class for streams implementations that also support a pwrite operation.

Interfaces for registering analysis passes, producing common pass manager configurations,...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ PRIVATE_ADDRESS

Address space for private memory.

bool isFlatGlobalAddrSpace(unsigned AS)

bool isEntryFunctionCC(CallingConv::ID CC)

@ C

The default llvm calling convention, compatible with C.

BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)

Matches an And with LHS and RHS in either order.

bool match(Val *V, const Pattern &P)

deferredval_ty< Value > m_Deferred(Value *const &V)

Like m_Specific(), but works if the specific value to match is determined as part of the same match()...

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)

Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

This is an optimization pass for GlobalISel generic memory operations.

FunctionPass * createFlattenCFGPass()

void initializeSIFormMemoryClausesPass(PassRegistry &)

FunctionPass * createFastRegisterAllocator()

FastRegisterAllocation Pass - This pass register allocates as fast as possible.

char & EarlyMachineLICMID

This pass performs loop invariant code motion on machine instructions.

ImmutablePass * createAMDGPUAAWrapperPass()

char & PostRAHazardRecognizerID

PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.

std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc

Filter function for register classes during regalloc.

FunctionPass * createAMDGPUSetWavePriorityPass()

void initializeGCNCreateVOPDPass(PassRegistry &)

char & GCNPreRAOptimizationsID

char & GCLoweringID

GCLowering Pass - Used by gc.root to perform its default lowering operations.

void initializeGCNPreRAOptimizationsPass(PassRegistry &)

Pass * createLoadStoreVectorizerPass()

Create a legacy pass manager instance of the LoadStoreVectorizer pass.

ModulePass * createExpandVariadicsPass(ExpandVariadicsMode)

void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &)

void initializeAMDGPUAttributorLegacyPass(PassRegistry &)

FunctionPass * createSIAnnotateControlFlowLegacyPass()

Create the annotation pass.

FunctionPass * createSIModeRegisterPass()

FunctionPass * createGreedyRegisterAllocator()

Greedy register allocation pass - This pass implements a global register allocator for optimized buil...

void initializeAMDGPUAAWrapperPassPass(PassRegistry &)

void initializeSIShrinkInstructionsLegacyPass(PassRegistry &)

ModulePass * createAMDGPULowerBufferFatPointersPass()

void initializeR600ClauseMergePassPass(PassRegistry &)

void initializeSIModeRegisterPass(PassRegistry &)

ModulePass * createAMDGPUCtorDtorLoweringLegacyPass()

ModulePass * createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &)

FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)

char & GCNRewritePartialRegUsesID

FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)

void initializeAMDGPUSwLowerLDSLegacyPass(PassRegistry &)

std::error_code inconvertibleErrorCode()

The value returned by this function can be returned from convertToErrorCode for Error values where no...

void initializeGCNPreRALongBranchRegPass(PassRegistry &)

void initializeSILowerSGPRSpillsLegacyPass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)

Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.

void initializeAMDGPUDAGToDAGISelLegacyPass(PassRegistry &)

FunctionPass * createNaryReassociatePass()

char & PatchableFunctionID

This pass implements the "patchable-function" attribute.

char & SIOptimizeExecMaskingLegacyID

char & PostRASchedulerID

PostRAScheduler - This pass performs post register allocation scheduling.

void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)

void initializeR600PacketizerPass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()

ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)

void initializeSIPreEmitPeepholePass(PassRegistry &)

void initializeSIFoldOperandsLegacyPass(PassRegistry &)

char & SILoadStoreOptimizerLegacyID

char & SILowerWWMCopiesID

void initializeSIFixVGPRCopiesPass(PassRegistry &)

void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &)

std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)

Target & getTheR600Target()

The target for R600 GPUs.

char & MachineSchedulerID

MachineScheduler - This pass schedules machine instructions.

Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)

When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...

void initializeSILowerWWMCopiesPass(PassRegistry &)

void initializeGCNNSAReassignPass(PassRegistry &)

char & PostMachineSchedulerID

PostMachineScheduler - This pass schedules machine instructions postRA.

void initializeAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass(PassRegistry &)

void initializeSIInsertWaitcntsPass(PassRegistry &)

ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)

Create the standard converging machine scheduler.

char & SIFormMemoryClausesID

void initializeSILoadStoreOptimizerLegacyPass(PassRegistry &)

void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &)

void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &)

char & EarlyIfConverterLegacyID

EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.

void initializeAMDGPURegBankCombinerPass(PassRegistry &)

void initializeSILateBranchLoweringPass(PassRegistry &)

ThinOrFullLTOPhase

This enumerates the LLVM full LTO or ThinLTO optimization phases.

char & AMDGPUUnifyDivergentExitNodesID

FunctionPass * createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy)

FunctionPass * createAMDGPUPreloadKernArgPrologLegacyPass()

char & SIOptimizeVGPRLiveRangeLegacyID

char & ShadowStackGCLoweringID

ShadowStackGCLowering - Implements the custom lowering mechanism used by the shadow stack GC.

void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)

void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)

auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)

void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)

CodeModel::Model getEffectiveCodeModel(std::optional< CodeModel::Model > CM, CodeModel::Model Default)

Helper method for getting the code model, returning Default if CM does not have a value.

char & SILateBranchLoweringPassID

char & BranchRelaxationPassID

BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...

FunctionPass * createSinkingPass()

CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, bool EagerlyInvalidate=false, bool NoRerun=false)

A function to deduce a function pass type and wrap it in the templated adaptor.

void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)

CodeGenFileType

These enums are meant to be passed into addPassesToEmitFile to indicate what type of file to emit,...

void initializeSIPostRABundlerPass(PassRegistry &)

void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)

char & GCNDPPCombineLegacyID

void initializeSIWholeQuadModePass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)

If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...

FunctionPass * createLoopDataPrefetchPass()

FunctionPass * createAMDGPULowerKernelArgumentsPass()

char & AMDGPUInsertDelayAluID

Pass * createAMDGPUAnnotateKernelFeaturesPass()

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()

Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...

char & StackMapLivenessID

StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...

FunctionPass * createUnifyLoopExitsPass()

char & SIOptimizeExecMaskingPreRAID

FunctionPass * createFixIrreduciblePass()

char & FuncletLayoutID

This pass lays out funclets contiguously.

void initializeSIInsertHardClausesPass(PassRegistry &)

char & DetectDeadLanesID

This pass adds dead/undef flags after analyzing subregister lanes.

void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)

CodeGenOptLevel

Code generation optimization level.

void initializeAMDGPUReserveWWMRegsPass(PassRegistry &)

ModulePass * createAMDGPUPrintfRuntimeBinding()

char & StackSlotColoringID

StackSlotColoring - This pass performs stack slot coloring.

void initializeSIMemoryLegalizerPass(PassRegistry &)

Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)

Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...

void initializeR600ControlFlowFinalizerPass(PassRegistry &)

void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &)

void initializeSILowerControlFlowLegacyPass(PassRegistry &)

char & SIPreAllocateWWMRegsLegacyID

ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)

void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)

char & AMDGPUReserveWWMRegsID

FunctionPass * createAMDGPUPromoteAlloca()

FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)

char & SIPreEmitPeepholeID

ModulePass * createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *)

void initializeGCNRegPressurePrinterPass(PassRegistry &)

void initializeSILowerI1CopiesLegacyPass(PassRegistry &)

char & SILowerSGPRSpillsLegacyID

void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)

FunctionPass * createBasicRegisterAllocator()

BasicRegisterAllocation Pass - This pass implements a degenerate global register allocator using the ...

void initializeGlobalISel(PassRegistry &)

Initialize all passes linked into the GlobalISel library.

char & SILowerControlFlowLegacyID

ModulePass * createR600OpenCLImageTypeLoweringPass()

FunctionPass * createAMDGPUCodeGenPreparePass()

void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &)

ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringLegacyPass()

FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)

This pass converts a legalized DAG into a AMDGPU-specific.

void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &)

Target & getTheGCNTarget()

The target for GCN GPUs.

void initializeSIFixSGPRCopiesLegacyPass(PassRegistry &)

void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)

FunctionPass * createGVNPass()

Create a legacy GVN pass.

FunctionPass * createAMDGPURegBankSelectPass()

FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)

FunctionPass * createAMDGPURegBankLegalizePass()

char & MachineCSELegacyID

MachineCSE - This pass performs global CSE on machine instructions.

std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, bool ReorderWhileClustering=false)

If ReorderWhileClustering is set to true, no attempt will be made to reduce reordering due to store c...

void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)

void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &)

char & LiveVariablesID

LiveVariables pass - This pass computes the set of blocks in which each variable is life and sets mac...

void initializeAMDGPUCodeGenPreparePass(PassRegistry &)

FunctionPass * createAMDGPURewriteUndefForPHILegacyPass()

void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &)

void call_once(once_flag &flag, Function &&F, Args &&... ArgList)

Execute the function specified as a parameter once.

FunctionPass * createSILowerI1CopiesLegacyPass()

void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)

char & SIInsertHardClausesID

void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &)

char & SIFixSGPRCopiesLegacyID

void initializeGCNDPPCombineLegacyPass(PassRegistry &)

FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)

char & SIPeepholeSDWALegacyID

char & VirtRegRewriterID

VirtRegRewriter pass.

char & SIFoldOperandsLegacyID

FunctionPass * createLowerSwitchPass()

void initializeAMDGPUPreloadKernArgPrologLegacyPass(PassRegistry &)

FunctionPass * createVirtRegRewriter(bool ClearVirtRegs=true)

void initializeR600VectorRegMergerPass(PassRegistry &)

ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)

A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...

FunctionPass * createAMDGPUGlobalISelDivergenceLoweringPass()

FunctionPass * createSIMemoryLegalizerPass()

void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &)

void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &)

void initializeSIPeepholeSDWALegacyPass(PassRegistry &)

void initializeAMDGPURegBankLegalizePass(PassRegistry &)

char & TwoAddressInstructionPassID

TwoAddressInstruction - This pass reduces two-address instructions to use two operands.

void initializeAMDGPURegBankSelectPass(PassRegistry &)

FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()

FunctionPass * createAtomicExpandLegacyPass()

AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...

MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)

FunctionPass * createStraightLineStrengthReducePass()

FunctionPass * createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *)

void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)

void initializeAMDGPULowerBufferFatPointersPass(PassRegistry &)

FunctionPass * createSIInsertWaitcntsPass()

FunctionPass * createAMDGPUAnnotateUniformValuesLegacy()

FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)

char & PHIEliminationID

PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.

bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)

FunctionPass * createSIShrinkInstructionsLegacyPass()

char & AMDGPUMarkLastScratchLoadID

char & RenameIndependentSubregsID

This pass detects subregister lanes in a virtual register that are used independently of other lanes ...

void initializeAMDGPUAnnotateUniformValuesLegacyPass(PassRegistry &)

std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()

void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)

void initializeAMDGPUPromoteAllocaPass(PassRegistry &)

void initializeAMDGPURemoveIncompatibleFunctionsLegacyPass(PassRegistry &)

void initializeAMDGPUInsertDelayAluPass(PassRegistry &)

void initializeAMDGPUUnifyMetadataPass(PassRegistry &)

void initializeAMDGPUAlwaysInlinePass(PassRegistry &)

char & DeadMachineInstructionElimID

DeadMachineInstructionElim - This pass removes dead machine instructions.

char & AMDGPUPerfHintAnalysisLegacyID

char & GCNPreRALongBranchRegID

void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &)

ArgDescriptor PrivateSegmentBuffer

ArgDescriptor WorkGroupIDY

ArgDescriptor WorkGroupIDZ

ArgDescriptor PrivateSegmentSize

ArgDescriptor ImplicitArgPtr

ArgDescriptor PrivateSegmentWaveByteOffset

ArgDescriptor WorkGroupInfo

ArgDescriptor WorkItemIDZ

ArgDescriptor WorkItemIDY

ArgDescriptor LDSKernelId

ArgDescriptor KernargSegmentPtr

ArgDescriptor WorkItemIDX

ArgDescriptor FlatScratchInit

ArgDescriptor DispatchPtr

ArgDescriptor ImplicitBufferPtr

ArgDescriptor WorkGroupIDX

static ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)

static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)

static ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)

bool RequiresCodeGenSCCOrder

DenormalModeKind Input

Denormal treatment kind for floating point instruction inputs in the default floating-point environme...

@ PreserveSign

The sign of a flushed-to-zero number is preserved in the sign of 0.

@ IEEE

IEEE-754 denormal numbers preserved.

DenormalModeKind Output

Denormal flushing mode for floating point instruction results in the default floating point environme...

A simple and fast domtree-based CSE pass.

MachineFunctionInfo - This class can be derived from and used by targets to hold private target-speci...

MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...

This class manages callbacks registration, as well as provides a way for PassInstrumentation to pass ...

StringMap< VRegInfo * > VRegInfosNamed

DenseMap< Register, VRegInfo * > VRegInfos

RegisterTargetMachine - Helper template for registering a target machine implementation,...

A utility pass template to force an analysis result to be available.

bool DX10Clamp

Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...

DenormalMode FP64FP16Denormals

If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...

bool IEEE

Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...

DenormalMode FP32Denormals

If this is set, neither input or output denormals are flushed for most f32 instructions.

The llvm::once_flag structure.

Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.

StringValue SGPRForEXECCopy

SmallVector< StringValue > WWMReservedRegs

StringValue FrameOffsetReg

StringValue LongBranchReservedReg

StringValue VGPRForAGPRCopy

std::optional< SIArgumentInfo > ArgInfo

SmallVector< StringValue, 2 > SpillPhysVGPRS

StringValue ScratchRSrcReg

StringValue StackPtrOffsetReg

bool FP64FP16OutputDenormals

bool FP64FP16InputDenormals

A wrapper around std::string which contains a source range that's being set during parsing.