LLVM: lib/Target/X86/X86PreTileConfig.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

42

43using namespace llvm;

44

45#define DEBUG_TYPE "tile-pre-config"

46

51 ": Failed to config tile register, please define the shape earlier");

52}

53

54namespace {

55

56struct MIRef {

57 MachineInstr *MI = nullptr;

58 MachineBasicBlock *MBB = nullptr;

59

60 size_t Pos = 0;

61 MIRef() = default;

62 MIRef(MachineBasicBlock *MBB) : MBB(MBB) {

63 for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI();

64 ++I, ++Pos)

65 MI = &*I;

66 }

67 MIRef(MachineInstr *MI)

69 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}

70 MIRef(MachineInstr *MI, MachineBasicBlock *MBB)

71 : MI(MI), MBB(MBB),

72 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}

73 MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos)

74 : MI(MI), MBB(MBB), Pos(Pos) {}

75 operator bool() const { return MBB != nullptr; }

77 return MI == RHS.MI && MBB == RHS.MBB;

78 }

79 bool operator!=(const MIRef &RHS) const { return !(*this == RHS); }

81

82

83 return std::tie(MBB, Pos) < std::tie(RHS.MBB, RHS.Pos);

84 }

86

87

88 return std::tie(MBB, Pos) > std::tie(RHS.MBB, RHS.Pos);

89 }

90};

91

92struct BBInfo {

93 MIRef FirstAMX;

94 MIRef LastCall;

95 bool HasAMXRegLiveIn = false;

96 bool TileCfgForbidden = false;

97 bool NeedTileCfgLiveIn = false;

98};

99

101 MachineRegisterInfo *MRI = nullptr;

102 const MachineLoopInfo *MLI = nullptr;

103 SmallPtrSet<MachineInstr *, 8> DefVisited;

104 DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;

105 DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;

106

107

108 bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) {

110 MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });

111 if (Iter == MI.operands_end())

112 return false;

114 return !UsableRegs.none();

115 }

116

117

118 bool isAMXInstruction(MachineInstr &MI) {

119 if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3)

120 return false;

121 switch (MI.getOpcode()) {

122 case X86::PTILESTOREDV:

123 case X86::PTCVTROWD2PSrreV:

124 case X86::PTCVTROWD2PSrriV:

125 case X86::PTCVTROWPS2BF16HrreV:

126 case X86::PTCVTROWPS2BF16HrriV:

127 case X86::PTCVTROWPS2BF16LrreV:

128 case X86::PTCVTROWPS2BF16LrriV:

129 case X86::PTCVTROWPS2PHHrreV:

130 case X86::PTCVTROWPS2PHHrriV:

131 case X86::PTCVTROWPS2PHLrreV:

132 case X86::PTCVTROWPS2PHLrriV:

133 case X86::PTILEMOVROWrreV:

134 case X86::PTILEMOVROWrriV:

135 return true;

136 }

137

138

139

140 MachineOperand &MO = MI.getOperand(0);

142 return false;

143

144 if (MRI->getRegClass(MO.getReg())->getID() != X86::TILERegClassID)

145 return false;

146

147 collectShapeInfo(MI);

148 return true;

149 }

150

151

152 bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {

153 if (!MLI->isLoopHeader(Header))

154 return false;

155 auto *ML = MLI->getLoopFor(Header);

156 if (ML->contains(Bottom) && ML->isLoopLatch(Bottom))

157 return true;

158

159 return false;

160 }

161

162

163 void collectShapeInfo(MachineInstr &MI);

164

165

166 bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl &Shapes) {

167 MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX;

169 auto InsertPoint = FirstAMX.MI->getIterator();

170 for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) {

171

172 if (I->MI->mayLoadOrStore())

173 return false;

174 for (auto &MO : I->MI->operands()) {

176 continue;

177

178

179 if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX)

180 return false;

181

182 }

183 MBB->insert(InsertPoint, I->MI->removeFromParent());

184 }

185

187 Shapes.push_back(MIRef(&*--InsertPoint, MBB));

188 return true;

189 }

190

191public:

192 X86PreTileConfig() : MachineFunctionPass(ID) {}

193

194

195 StringRef getPassName() const override {

196 return "Tile Register Pre-configure";

197 }

198

199

200 void getAnalysisUsage(AnalysisUsage &AU) const override {

202 AU.addRequired();

204 }

205

206

207 void releaseMemory() override {

208 ShapeBBs.clear();

209 DefVisited.clear();

210 BBVisitedInfo.clear();

211 }

212

213

214 bool runOnMachineFunction(MachineFunction &MF) override;

215

216 static char ID;

217};

218

219}

220

221char X86PreTileConfig::ID = 0;

222

224 "Tile Register Pre-configure", false, false)

228

229void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {

231 MIRef MIR(MI, MBB);

232 auto &Refs = ShapeBBs[MBB];

234 if (I == Refs.end() || *I != MIR)

235 Refs.insert(I, MIR);

236 };

237

239 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});

240 while (!WorkList.empty()) {

241 Register R = WorkList.pop_back_val();

243 assert(DefMI && "R must has one define instruction");

245 if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)

246 continue;

247

248 if (DefMI->isPHI()) {

249 for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)

250 if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))

251 RecordShape(DefMI, DefMBB);

252 else

253 WorkList.push_back(DefMI->getOperand(I).getReg());

254 } else {

255 RecordShape(DefMI, DefMBB);

256 }

257 }

258}

259

260bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {

261 X86MachineFunctionInfo *X86FI = MF.getInfo();

262

263 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)

264 return false;

265

266 const X86Subtarget &ST = MF.getSubtarget();

267 const TargetInstrInfo *TII = ST.getInstrInfo();

268 const TargetRegisterInfo *TRI = ST.getRegisterInfo();

269 const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);

270

271 BitVector AMXRegs(TRI->getNumRegs());

273 AMXRegs.set(X86::TMM0 + I);

274

275

277 MLI = &getAnalysis().getLI();

278 SmallSet<MIRef, 8> CfgNeedInsert;

279 SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;

280 for (auto &MBB : MF) {

281 size_t Pos = 0;

282 auto &Info = BBVisitedInfo[&MBB];

283 for (auto &MI : MBB) {

284 ++Pos;

285 if (isAMXInstruction(MI)) {

286

287 if (Info.LastCall)

289 else

290 Info.NeedTileCfgLiveIn = true;

291

292 if (Info.FirstAMX)

293 Info.FirstAMX = MIRef(&MI, &MBB, Pos);

294 } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) {

295

296 Info.LastCall = MIRef(&MI, &MBB, Pos);

297 }

298 }

299 if (Info.NeedTileCfgLiveIn) {

301 CfgNeedInsert.insert(MIRef(&MBB));

302 else

304 }

305 if (Info.FirstAMX || Info.HasAMXRegLiveIn)

307 if (!isLoopBackEdge(Succ, &MBB))

308 BBVisitedInfo[Succ].HasAMXRegLiveIn = true;

309 }

310

311

312 while (!CfgLiveInBBs.empty()) {

315 auto &Info = BBVisitedInfo[Pred];

316 if (Info.LastCall) {

318 } else if (Info.NeedTileCfgLiveIn) {

319 Info.NeedTileCfgLiveIn = true;

320 if (Pred == &MF.front())

321 CfgNeedInsert.insert(MIRef(Pred));

322 else

324 }

325 }

326 }

327

328

329 if (CfgNeedInsert.empty())

330 return false;

331

332

333 SmallVector<MachineBasicBlock *, 8> WorkList;

334 for (auto &I : ShapeBBs) {

335 auto &Info = BBVisitedInfo[I.first];

336

337 if (Info.HasAMXRegLiveIn) {

338

339

340

342 return false;

343 }

344 if (Info.FirstAMX && Info.FirstAMX < I.second.back() &&

345 !hoistShapesInBB(I.first, I.second)) {

347 return false;

348 }

350 }

351 while (!WorkList.empty()) {

354 auto &Info = BBVisitedInfo[Pred];

355 if (Info.TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) {

356 Info.TileCfgForbidden = true;

358 }

359 }

360 }

361

363 SmallSet<MIRef, 8> VisitedOrInserted;

364 int SS = MF.getFrameInfo().CreateStackObject(

365 ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);

366

367

368 for (const auto &I : CfgNeedInsert) {

369 SmallSet<MIRef, 8> InsertPoints;

371 while (!WorkList.empty()) {

373 if (!VisitedOrInserted.count(I)) {

374 if (!BBVisitedInfo[I.MBB].TileCfgForbidden) {

375

377 } else {

378

379 VisitedOrInserted.insert(I);

380

381

382 for (auto *Succ : I.MBB->successors())

383 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)

385 }

386 }

387 }

388

389

390 for (MIRef I : InsertPoints) {

391

392 auto It = ShapeBBs.find(I.MBB);

393 if (It != ShapeBBs.end() && I < It->second.back())

394 I = It->second.back();

395

396

397 if (VisitedOrInserted.insert(I).second) {

398 auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();

400 SS);

401 }

402 }

403 }

404

405

406 MachineBasicBlock &MBB = MF.front();

408 if (ST.hasAVX512()) {

409 Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);

413 } else if (ST.hasAVX2()) {

414 Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);

420 } else {

421 assert(ST.hasSSE2() && "AMX should assume SSE2 enabled");

422 unsigned StoreOpc = ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;

423 Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);

432 }

433

435

436 return true;

437}

438

440 return new X86PreTileConfig();

441}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder MachineInstrBuilder & DefMI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static const Function * getParent(const Value *V)

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Analysis containing CSE Info

const HexagonInstrInfo * TII

Module.h This file contains the declarations for the Module class.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the SmallSet class.

static void emitErrorMsg(MachineFunction &MF)

Definition X86PreTileConfig.cpp:47

AnalysisUsage & addRequired()

void setPreservesAll()

Set by analyses that do not transform their input at all.

void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)

clearBitsInMask - Clear any bits in this vector that are set in Mask.

bool none() const

none - Returns true if none of the bits are set.

FunctionPass class - This class is used to implement most global optimizations.

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

This is an important class for using LLVM in a threaded context.

LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)

emitError - Emit an error message to the currently installed error handler with optional location inf...

LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

iterator_range< succ_iterator > successors()

iterator_range< pred_iterator > predecessors()

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

Wrapper class representing virtual and physical registers.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

size_type count(const T &V) const

count - Return 1 if the element is in the set, 0 otherwise.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

unsigned getNumRegs() const

Return the number of registers in this class.

AMXProgModelEnum getAMXProgModel() const

self_iterator getIterator()

This is an optimization pass for GlobalISel generic memory operations.

bool operator<(int64_t V1, const APSInt &V2)

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

bool operator!=(uint64_t V1, const APInt &V2)

static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)

addFrameReference - This function is used to add a reference to the base of an abstract object on the...

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

bool operator>(int64_t V1, const APSInt &V2)

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

auto lower_bound(R &&Range, T &&Value)

Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

FunctionPass * createX86PreTileConfigPass()

Return a pass that insert pseudo tile config instruction.

Definition X86PreTileConfig.cpp:439