LLVM: lib/Target/X86/X86LowerAMXIntrinsics.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

31#include "llvm/IR/IntrinsicsX86.h"

39

40using namespace llvm;

41using namespace PatternMatch;

42

43#define DEBUG_TYPE "lower-amx-intrinsics"

44

45#ifndef NDEBUG

47 if (auto *FVT = dyn_cast(Ty))

48 return FVT->getNumElements() == 256 &&

49 FVT->getElementType()->isIntegerTy(32);

50 return false;

51}

52#endif

53

56 cl::desc("X86: enable AMX scalarizition."));

57

58namespace {

59class X86LowerAMXIntrinsics {

61

62public:

64 : Func(F), DTU(DomTU), LI(LoopI) {}

66

67private:

73 template

77 template <Intrinsic::ID IntrID>

78 std::enable_if_t<IntrID == Intrinsic::x86_tdpbssd_internal ||

79 IntrID == Intrinsic::x86_tdpbsud_internal ||

80 IntrID == Intrinsic::x86_tdpbusd_internal ||

81 IntrID == Intrinsic::x86_tdpbuud_internal ||

82 IntrID == Intrinsic::x86_tdpbf16ps_internal,

87 template

88 bool lowerTileLoadStore(Instruction *TileLoadStore);

89 template <Intrinsic::ID IntrID>

90 std::enable_if_t<IntrID == Intrinsic::x86_tdpbssd_internal ||

91 IntrID == Intrinsic::x86_tdpbsud_internal ||

92 IntrID == Intrinsic::x86_tdpbusd_internal ||

93 IntrID == Intrinsic::x86_tdpbuud_internal ||

94 IntrID == Intrinsic::x86_tdpbf16ps_internal,

95 bool>

97 bool lowerTileZero(Instruction *TileZero);

98};

99}

100

112

117 PHINode::Create(I16Ty, 2, Name + ".iv", Header->getTerminator()->getIterator());

118 IV->addIncoming(ConstantInt::get(I16Ty, 0), Preheader);

119

120 B.SetInsertPoint(Latch);

121 Value *Inc = B.CreateAdd(IV, Step, Name + ".step");

122 Value *Cond = B.CreateICmpNE(Inc, Bound, Name + ".cond");

124 IV->addIncoming(Inc, Latch);

125

129 DTU.applyUpdatesPermissive({

130 {DominatorTree::Delete, Preheader, Tmp},

131 {DominatorTree::Insert, Header, Body},

132 {DominatorTree::Insert, Body, Latch},

133 {DominatorTree::Insert, Latch, Header},

134 {DominatorTree::Insert, Latch, Exit},

135 {DominatorTree::Insert, Preheader, Header},

136 });

137 if (LI) {

138 L->addBasicBlockToLoop(Header, *LI);

139 L->addBasicBlockToLoop(Body, *LI);

140 L->addBasicBlockToLoop(Latch, *LI);

141 }

142 return Body;

143}

144

145template

146Value *X86LowerAMXIntrinsics::createTileLoadStoreLoops(

149 std::string IntrinName = IsTileLoad ? "tileload" : "tilestore";

150 Loop *RowLoop = nullptr;

151 Loop *ColLoop = nullptr;

152 if (LI) {

153 RowLoop = LI->AllocateLoop();

154 ColLoop = LI->AllocateLoop();

156 if (Loop *ParentL = LI->getLoopFor(Start))

157 ParentL->addChildLoop(RowLoop);

158 else

159 LI->addTopLevelLoop(RowLoop);

160 }

161

162 BasicBlock *RowBody = createLoop(Start, End, Row, B.getInt16(1),

163 IntrinName + ".scalarize.rows", B, RowLoop);

165

166 BasicBlock *ColBody = createLoop(RowBody, RowLatch, Col, B.getInt16(1),

167 IntrinName + ".scalarize.cols", B, ColLoop);

168

172 Value *CurrentRow = &*RowLoopHeader->begin();

173 Value *CurrentCol = &*ColLoopHeader->begin();

174 Type *EltTy = B.getInt32Ty();

176

177

178

179

181 Value *CurrentRowZExt = B.CreateZExt(CurrentRow, Stride->getType());

182 Value *CurrentColZExt = B.CreateZExt(CurrentCol, Stride->getType());

184 B.CreateAdd(B.CreateMul(CurrentRowZExt, Stride), CurrentColZExt);

186 Value *Idx = B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentCol);

187 if (IsTileLoad) {

188

189

190

193 PHINode *VecCPhiRowLoop = B.CreatePHI(V256I32Ty, 2, "vec.phi.row");

194 VecCPhiRowLoop->addIncoming(VecZero, Start);

195

196

197

198

200 PHINode *VecPhi = B.CreatePHI(V256I32Ty, 2, "vec.phi");

201 VecPhi->addIncoming(VecCPhiRowLoop, RowBody);

202

203

204

205

206

207

209 Value *Elt = B.CreateLoad(EltTy, EltPtr);

210 Value *ResVec = B.CreateInsertElement(VecPhi, Elt, Idx);

211 VecPhi->addIncoming(ResVec, ColLoopLatch);

212 VecCPhiRowLoop->addIncoming(ResVec, RowLatch);

213

214 return ResVec;

215 } else {

216 auto *BitCast = cast(Tile);

217 Value *Vec = BitCast->getOperand(0);

219

220

221

222

223

225 Value *Elt = B.CreateExtractElement(Vec, Idx);

226

227 B.CreateStore(Elt, EltPtr);

228 return nullptr;

229 }

230}

231

232template <Intrinsic::ID IntrID>

233std::enable_if_t<IntrID == Intrinsic::x86_tdpbssd_internal ||

234 IntrID == Intrinsic::x86_tdpbsud_internal ||

235 IntrID == Intrinsic::x86_tdpbusd_internal ||

236 IntrID == Intrinsic::x86_tdpbuud_internal ||

237 IntrID == Intrinsic::x86_tdpbf16ps_internal,

243 std::string IntrinName;

244 switch (IntrID) {

245 case Intrinsic::x86_tdpbssd_internal:

246 IntrinName = "tiledpbssd";

247 break;

248 case Intrinsic::x86_tdpbsud_internal:

249 IntrinName = "tiledpbsud";

250 break;

251 case Intrinsic::x86_tdpbusd_internal:

252 IntrinName = "tiledpbusd";

253 break;

254 case Intrinsic::x86_tdpbuud_internal:

255 IntrinName = "tiledpbuud";

256 break;

257 case Intrinsic::x86_tdpbf16ps_internal:

258 IntrinName = "tiledpbf16ps";

259 break;

260 }

261 Loop *RowLoop = nullptr;

262 Loop *ColLoop = nullptr;

263 Loop *InnerLoop = nullptr;

264 if (LI) {

265 RowLoop = LI->AllocateLoop();

266 ColLoop = LI->AllocateLoop();

267 InnerLoop = LI->AllocateLoop();

270 if (Loop *ParentL = LI->getLoopFor(Start))

271 ParentL->addChildLoop(RowLoop);

272 else

273 LI->addTopLevelLoop(RowLoop);

274 }

275

276 BasicBlock *RowBody = createLoop(Start, End, Row, B.getInt16(1),

277 IntrinName + ".scalarize.rows", B, RowLoop);

279

280 BasicBlock *ColBody = createLoop(RowBody, RowLatch, Col, B.getInt16(1),

281 IntrinName + ".scalarize.cols", B, ColLoop);

282

284

287 createLoop(ColBody, ColLoopLatch, K, B.getInt16(1),

288 IntrinName + ".scalarize.inner", B, InnerLoop);

289

294 Value *CurrentRow = &*RowLoopHeader->begin();

295 Value *CurrentCol = &*ColLoopHeader->begin();

296 Value *CurrentInner = &*InnerLoopHeader->begin();

297

299 auto *BitCastAcc = cast(Acc);

300 Value *VecC = BitCastAcc->getOperand(0);

302

303

304

305 auto *BitCastLHS = cast(LHS);

306 Value *VecA = BitCastLHS->getOperand(0);

308 auto *BitCastRHS = cast(RHS);

309 Value *VecB = BitCastRHS->getOperand(0);

311

312

313

314

315

316

317

319 PHINode *VecCPhiRowLoop = B.CreatePHI(V256I32Ty, 2, "vec.c.phi.row");

320 VecCPhiRowLoop->addIncoming(VecC, Start);

322 PHINode *VecDPhiRowLoop = B.CreatePHI(V256I32Ty, 2, "vec.d.phi.row");

323 VecDPhiRowLoop->addIncoming(VecZero, Start);

324

325

326

327

328

329

330

331

332

333

334

336 PHINode *VecCPhiColLoop = B.CreatePHI(V256I32Ty, 2, "vec.c.phi.col");

337 VecCPhiColLoop->addIncoming(VecCPhiRowLoop, RowBody);

338 PHINode *VecDPhiColLoop = B.CreatePHI(V256I32Ty, 2, "vec.d.phi.col");

339 VecDPhiColLoop->addIncoming(VecDPhiRowLoop, RowBody);

341 B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentCol);

342

343

344

345

346

347

349 PHINode *VecCPhi = B.CreatePHI(V256I32Ty, 2, "vec.c.inner.phi");

350 VecCPhi->addIncoming(VecCPhiColLoop, ColBody);

351

354 B.CreateAdd(B.CreateMul(CurrentRow, B.getInt16(16)), CurrentInner);

356 B.CreateAdd(B.CreateMul(CurrentInner, B.getInt16(16)), CurrentCol);

357 Value *NewVecC = nullptr;

358

359 if (IntrID != Intrinsic::x86_tdpbf16ps_internal) {

360

361

362

363

364

365

366

367

368

369

370

371

372

373

376 Value *EltC = B.CreateExtractElement(VecCPhi, IdxC);

377 Value *EltA = B.CreateExtractElement(VecA, IdxA);

378 Value *SubVecA = B.CreateBitCast(EltA, V4I8Ty);

379 Value *EltB = B.CreateExtractElement(VecB, IdxB);

380 Value *SubVecB = B.CreateBitCast(EltB, V4I8Ty);

381 Value *SEXTSubVecB = nullptr;

382 Value *SEXTSubVecA = nullptr;

383 switch (IntrID) {

384 case Intrinsic::x86_tdpbssd_internal:

385 SEXTSubVecB = B.CreateSExt(SubVecB, V4I32Ty);

386 SEXTSubVecA = B.CreateSExt(SubVecA, V4I32Ty);

387 break;

388 case Intrinsic::x86_tdpbsud_internal:

389 SEXTSubVecB = B.CreateZExt(SubVecB, V4I32Ty);

390 SEXTSubVecA = B.CreateSExt(SubVecA, V4I32Ty);

391 break;

392 case Intrinsic::x86_tdpbusd_internal:

393 SEXTSubVecB = B.CreateSExt(SubVecB, V4I32Ty);

394 SEXTSubVecA = B.CreateZExt(SubVecA, V4I32Ty);

395 break;

396 case Intrinsic::x86_tdpbuud_internal:

397 SEXTSubVecB = B.CreateZExt(SubVecB, V4I32Ty);

398 SEXTSubVecA = B.CreateZExt(SubVecA, V4I32Ty);

399 break;

400 default:

402 }

403 Value *SubVecR = B.CreateAddReduce(B.CreateMul(SEXTSubVecA, SEXTSubVecB));

404 Value *ResElt = B.CreateAdd(EltC, SubVecR);

405 NewVecC = B.CreateInsertElement(VecCPhi, ResElt, IdxC);

406 } else {

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

431 Value *EltC = B.CreateExtractElement(VecCPhi, IdxC);

432 Value *EltCF32 = B.CreateBitCast(EltC, B.getFloatTy());

433 Value *EltA = B.CreateExtractElement(VecA, IdxA);

434 Value *SubVecA = B.CreateBitCast(EltA, V2I16Ty);

435 Value *EltB = B.CreateExtractElement(VecB, IdxB);

436 Value *SubVecB = B.CreateBitCast(EltB, V2I16Ty);

438 int ShuffleMask[4] = {2, 0, 3, 1};

439 auto ShuffleArray = ArrayRef(ShuffleMask);

440 Value *AV2F32 = B.CreateBitCast(

441 B.CreateShuffleVector(SubVecA, ZeroV2I16, ShuffleArray), V2F32Ty);

442 Value *BV2F32 = B.CreateBitCast(

443 B.CreateShuffleVector(SubVecB, ZeroV2I16, ShuffleArray), V2F32Ty);

444 Value *SubVecR = B.CreateFAddReduce(EltCF32, B.CreateFMul(AV2F32, BV2F32));

445 Value *ResElt = B.CreateBitCast(SubVecR, B.getInt32Ty());

446 NewVecC = B.CreateInsertElement(VecCPhi, ResElt, IdxC);

447 }

448

449

450

451

452

454 Value *NewEltC = B.CreateExtractElement(NewVecC, IdxC);

455 Value *NewVecD = B.CreateInsertElement(VecDPhiColLoop, NewEltC, IdxC);

456

457 VecCPhi->addIncoming(NewVecC, InnerLoopLatch);

458 VecCPhiRowLoop->addIncoming(NewVecC, RowLatch);

459 VecCPhiColLoop->addIncoming(NewVecC, ColLoopLatch);

460 VecDPhiRowLoop->addIncoming(NewVecD, RowLatch);

461 VecDPhiColLoop->addIncoming(NewVecD, ColLoopLatch);

462

463 return NewVecD;

464}

465

466template <Intrinsic::ID IntrID>

467std::enable_if_t<IntrID == Intrinsic::x86_tdpbssd_internal ||

468 IntrID == Intrinsic::x86_tdpbsud_internal ||

469 IntrID == Intrinsic::x86_tdpbusd_internal ||

470 IntrID == Intrinsic::x86_tdpbuud_internal ||

471 IntrID == Intrinsic::x86_tdpbf16ps_internal,

472 bool>

473X86LowerAMXIntrinsics::lowerTileDP(Instruction *TileDP) {

479 PreBuilder.SetInsertPoint(TileDP);

480

481

482

483 Value *NDWord = PreBuilder.CreateLShr(N, PreBuilder.getInt16(2));

484 Value *KDWord = PreBuilder.CreateLShr(K, PreBuilder.getInt16(2));

487 SplitBlock(InsertI->getParent(), InsertI, &DTU, LI, nullptr, "continue");

489 Value *ResVec = createTileDPLoops(Start, End, Builder, M, NDWord,

490 KDWord, C, A, B);

491

492

493 Builder.SetInsertPoint(End, End->getFirstNonPHIIt());

495 Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));

496

501 I->replaceAllUsesWith(ResVec);

502 I->eraseFromParent();

503 }

504 }

507 return true;

508}

509

510template

511bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) {

513 if (IsTileLoad)

514 match(TileLoadStore,

515 m_IntrinsicIntrinsic::x86\_tileloadd64\_internal(

517 else

518 match(TileLoadStore, m_IntrinsicIntrinsic::x86\_tilestored64\_internal(

521

524 PreBuilder.SetInsertPoint(TileLoadStore);

525 Value *NDWord = PreBuilder.CreateLShr(N, PreBuilder.getInt16(2));

526 Value *StrideDWord = PreBuilder.CreateLShr(Stride, PreBuilder.getInt64(2));

529 SplitBlock(InsertI->getParent(), InsertI, &DTU, LI, nullptr, "continue");

531 Value *ResVec = createTileLoadStoreLoops(

532 Start, End, Builder, M, NDWord, Ptr, StrideDWord,

533 IsTileLoad ? nullptr : Tile);

534 if (IsTileLoad) {

535

536

537 Builder.SetInsertPoint(End, End->getFirstNonPHIIt());

539 Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));

540

545 I->replaceAllUsesWith(ResVec);

546 I->eraseFromParent();

547 }

548 }

550 }

552 return true;

553}

554

555bool X86LowerAMXIntrinsics::lowerTileZero(Instruction *TileZero) {

563 I->replaceAllUsesWith(VecZero);

564 I->eraseFromParent();

565 }

566 }

568 return true;

569}

570

571bool X86LowerAMXIntrinsics::visit() {

572 bool C = false;

576 if (auto *Inst = dyn_cast(&*II++)) {

577 switch (Inst->getIntrinsicID()) {

578 case Intrinsic::x86_tdpbssd_internal:

579 case Intrinsic::x86_tdpbsud_internal:

580 case Intrinsic::x86_tdpbusd_internal:

581 case Intrinsic::x86_tdpbuud_internal:

582 case Intrinsic::x86_tileloadd64_internal:

583 case Intrinsic::x86_tilestored64_internal:

584 case Intrinsic::x86_tilezero_internal:

585 case Intrinsic::x86_tdpbf16ps_internal:

587 break;

588 default:

589 break;

590 }

591 }

592 }

593 }

594

595 for (auto *Inst : WorkList) {

596 switch (Inst->getIntrinsicID()) {

597 case Intrinsic::x86_tdpbssd_internal:

598 C = lowerTileDPIntrinsic::x86\_tdpbssd\_internal(Inst) || C;

599 break;

600 case Intrinsic::x86_tdpbsud_internal:

601 C = lowerTileDPIntrinsic::x86\_tdpbsud\_internal(Inst) || C;

602 break;

603 case Intrinsic::x86_tdpbusd_internal:

604 C = lowerTileDPIntrinsic::x86\_tdpbusd\_internal(Inst) || C;

605 break;

606 case Intrinsic::x86_tdpbuud_internal:

607 C = lowerTileDPIntrinsic::x86\_tdpbuud\_internal(Inst) || C;

608 break;

609 case Intrinsic::x86_tdpbf16ps_internal:

610 C = lowerTileDPIntrinsic::x86\_tdpbf16ps\_internal(Inst) || C;

611 break;

612 case Intrinsic::x86_tileloadd64_internal:

613 C = lowerTileLoadStore(Inst) || C;

614 break;

615 case Intrinsic::x86_tilestored64_internal:

616 C = lowerTileLoadStore(Inst) || C;

617 break;

618 case Intrinsic::x86_tilezero_internal:

619 C = lowerTileZero(Inst) || C;

620 break;

621 default:

623 }

624 }

625

626 return C;

627}

628

629namespace {

630class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {

631public:

632 static char ID;

633

634 X86LowerAMXIntrinsicsLegacyPass() : FunctionPass(ID) {

637 }

638

641 return false;

643 if (F.hasFnAttribute(Attribute::OptimizeNone) &&

645 return false;

646

647 auto *DTWP = getAnalysisIfAvailable();

648 auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;

649 auto *LIWP = getAnalysisIfAvailable();

650 auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;

651 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

652

653 X86LowerAMXIntrinsics LAT(F, DTU, LI);

654 return LAT.visit();

655 }

657

662 }

663};

664}

665

666static const char PassName[] = "Lower AMX intrinsics";

667char X86LowerAMXIntrinsicsLegacyPass::ID = 0;

669 false, false)

673

675 return new X86LowerAMXIntrinsicsLegacyPass();

676}

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

static cl::opt< bool > X86ScalarizeAMX("enable-x86-scalar-amx", cl::init(false), cl::Hidden, cl::desc("X86: enable AMX scalarizition."))

static bool isV256I32Ty(Type *Ty)

static const char PassName[]

static const uint32_t IV[8]

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

const BasicBlock * getSinglePredecessor() const

Return the predecessor of this block if it has a single predecessor block.

const BasicBlock * getSingleSuccessor() const

Return the successor of this block if it has a single successor.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Conditional or Unconditional Branch instruction.

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

BasicBlock * getSuccessor(unsigned i) const

void setSuccessor(unsigned idx, BasicBlock *NewSucc)

static Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

Legacy analysis pass which computes a DominatorTree.

Class to represent fixed width SIMD vectors.

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

Common base class shared among various IRBuilders.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

This is an important class for using LLVM in a threaded context.

void addChildLoop(LoopT *NewChild)

Add the specified loop to be a child of this loop.

The legacy pass manager's analysis pass to compute loop information.

Represents a single loop in the control flow graph.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

Primary interface to the complete machine description for the target machine.

Target-Independent Code Generator Pass Configuration Options.

The instances of the Type class are immutable: once they are created, they are never changed.

static Type * getX86_AMXTy(LLVMContext &C)

static IntegerType * getInt16Ty(LLVMContext &C)

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< use_iterator > uses()

const ParentTy * getParent() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

bool match(Val *V, const Pattern &P)

CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)

Matches BitCast.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

FunctionPass * createX86LowerAMXIntrinsicsPass()

The pass transforms amx intrinsics to scalar operation if the function has optnone attribute or it is...

void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &)

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

iterator_range< df_iterator< T > > depth_first(const T &G)