LLVM: lib/Target/X86/X86PreTileConfig.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
42
43using namespace llvm;
44
45#define DEBUG_TYPE "tile-pre-config"
46
51 ": Failed to config tile register, please define the shape earlier");
52}
53
54namespace {
55
56struct MIRef {
57 MachineInstr *MI = nullptr;
58 MachineBasicBlock *MBB = nullptr;
59
60 size_t Pos = 0;
61 MIRef() = default;
62 MIRef(MachineBasicBlock *MBB) : MBB(MBB) {
63 for (auto I = MBB->begin(), E = MBB->end(); I != E && I->isPHI();
64 ++I, ++Pos)
65 MI = &*I;
66 }
67 MIRef(MachineInstr *MI)
69 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
70 MIRef(MachineInstr *MI, MachineBasicBlock *MBB)
71 : MI(MI), MBB(MBB),
72 Pos(std::distance(MBB->instr_begin(), ++MI->getIterator())) {}
73 MIRef(MachineInstr *MI, MachineBasicBlock *MBB, size_t Pos)
74 : MI(MI), MBB(MBB), Pos(Pos) {}
75 operator bool() const { return MBB != nullptr; }
77 return MI == RHS.MI && MBB == RHS.MBB;
78 }
79 bool operator!=(const MIRef &RHS) const { return !(*this == RHS); }
81
82
83 return std::tie(MBB, Pos) < std::tie(RHS.MBB, RHS.Pos);
84 }
86
87
88 return std::tie(MBB, Pos) > std::tie(RHS.MBB, RHS.Pos);
89 }
90};
91
92struct BBInfo {
93 MIRef FirstAMX;
94 MIRef LastCall;
95 bool HasAMXRegLiveIn = false;
96 bool TileCfgForbidden = false;
97 bool NeedTileCfgLiveIn = false;
98};
99
101 MachineRegisterInfo *MRI = nullptr;
102 const MachineLoopInfo *MLI = nullptr;
103 SmallPtrSet<MachineInstr *, 8> DefVisited;
104 DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
105 DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
106
107
108 bool isDestructiveCall(MachineInstr &MI, BitVector UsableRegs) {
110 MI.operands(), [](MachineOperand &MO) { return MO.isRegMask(); });
111 if (Iter == MI.operands_end())
112 return false;
114 return !UsableRegs.none();
115 }
116
117
118 bool isAMXInstruction(MachineInstr &MI) {
119 if (MI.isPHI() || MI.isDebugInstr() || MI.getNumOperands() < 3)
120 return false;
121 switch (MI.getOpcode()) {
122 case X86::PTILESTOREDV:
123 case X86::PTCVTROWD2PSrreV:
124 case X86::PTCVTROWD2PSrriV:
125 case X86::PTCVTROWPS2BF16HrreV:
126 case X86::PTCVTROWPS2BF16HrriV:
127 case X86::PTCVTROWPS2BF16LrreV:
128 case X86::PTCVTROWPS2BF16LrriV:
129 case X86::PTCVTROWPS2PHHrreV:
130 case X86::PTCVTROWPS2PHHrriV:
131 case X86::PTCVTROWPS2PHLrreV:
132 case X86::PTCVTROWPS2PHLrriV:
133 case X86::PTILEMOVROWrreV:
134 case X86::PTILEMOVROWrriV:
135 return true;
136 }
137
138
139
140 MachineOperand &MO = MI.getOperand(0);
142 return false;
143
144 if (MRI->getRegClass(MO.getReg())->getID() != X86::TILERegClassID)
145 return false;
146
147 collectShapeInfo(MI);
148 return true;
149 }
150
151
152 bool isLoopBackEdge(MachineBasicBlock *Header, MachineBasicBlock *Bottom) {
153 if (!MLI->isLoopHeader(Header))
154 return false;
155 auto *ML = MLI->getLoopFor(Header);
156 if (ML->contains(Bottom) && ML->isLoopLatch(Bottom))
157 return true;
158
159 return false;
160 }
161
162
163 void collectShapeInfo(MachineInstr &MI);
164
165
166 bool hoistShapesInBB(MachineBasicBlock *MBB, SmallVectorImpl &Shapes) {
167 MIRef &FirstAMX = BBVisitedInfo[MBB].FirstAMX;
169 auto InsertPoint = FirstAMX.MI->getIterator();
170 for (auto I = FirstShapeBelowAMX, E = Shapes.end(); I != E; ++I) {
171
172 if (I->MI->mayLoadOrStore())
173 return false;
174 for (auto &MO : I->MI->operands()) {
176 continue;
177
178
179 if (MO.isReg() && MIRef(MRI->getVRegDef(MO.getReg())) > FirstAMX)
180 return false;
181
182 }
183 MBB->insert(InsertPoint, I->MI->removeFromParent());
184 }
185
187 Shapes.push_back(MIRef(&*--InsertPoint, MBB));
188 return true;
189 }
190
191public:
192 X86PreTileConfig() : MachineFunctionPass(ID) {}
193
194
195 StringRef getPassName() const override {
196 return "Tile Register Pre-configure";
197 }
198
199
200 void getAnalysisUsage(AnalysisUsage &AU) const override {
202 AU.addRequired();
204 }
205
206
207 void releaseMemory() override {
208 ShapeBBs.clear();
209 DefVisited.clear();
210 BBVisitedInfo.clear();
211 }
212
213
214 bool runOnMachineFunction(MachineFunction &MF) override;
215
216 static char ID;
217};
218
219}
220
221char X86PreTileConfig::ID = 0;
222
224 "Tile Register Pre-configure", false, false)
228
229void X86PreTileConfig::collectShapeInfo(MachineInstr &MI) {
232 auto &Refs = ShapeBBs[MBB];
234 if (I == Refs.end() || *I != MIR)
235 Refs.insert(I, MIR);
236 };
237
239 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
240 while (!WorkList.empty()) {
241 Register R = WorkList.pop_back_val();
243 assert(DefMI && "R must has one define instruction");
245 if (DefMI->isMoveImmediate() || !DefVisited.insert(DefMI).second)
246 continue;
247
248 if (DefMI->isPHI()) {
249 for (unsigned I = 1; I < DefMI->getNumOperands(); I += 2)
250 if (isLoopBackEdge(DefMBB, DefMI->getOperand(I + 1).getMBB()))
251 RecordShape(DefMI, DefMBB);
252 else
253 WorkList.push_back(DefMI->getOperand(I).getReg());
254 } else {
255 RecordShape(DefMI, DefMBB);
256 }
257 }
258}
259
260bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
261 X86MachineFunctionInfo *X86FI = MF.getInfo();
262
263 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
264 return false;
265
266 const X86Subtarget &ST = MF.getSubtarget();
267 const TargetInstrInfo *TII = ST.getInstrInfo();
268 const TargetRegisterInfo *TRI = ST.getRegisterInfo();
269 const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
270
271 BitVector AMXRegs(TRI->getNumRegs());
273 AMXRegs.set(X86::TMM0 + I);
274
275
277 MLI = &getAnalysis().getLI();
278 SmallSet<MIRef, 8> CfgNeedInsert;
279 SmallVector<MachineBasicBlock *, 8> CfgLiveInBBs;
280 for (auto &MBB : MF) {
281 size_t Pos = 0;
282 auto &Info = BBVisitedInfo[&MBB];
284 ++Pos;
285 if (isAMXInstruction(MI)) {
286
287 if (Info.LastCall)
289 else
290 Info.NeedTileCfgLiveIn = true;
291
292 if (.FirstAMX)
293 Info.FirstAMX = MIRef(&MI, &MBB, Pos);
294 } else if (MI.isCall() && isDestructiveCall(MI, AMXRegs)) {
295
296 Info.LastCall = MIRef(&MI, &MBB, Pos);
297 }
298 }
299 if (Info.NeedTileCfgLiveIn) {
301 CfgNeedInsert.insert(MIRef(&MBB));
302 else
304 }
305 if (Info.FirstAMX || Info.HasAMXRegLiveIn)
307 if (!isLoopBackEdge(Succ, &MBB))
308 BBVisitedInfo[Succ].HasAMXRegLiveIn = true;
309 }
310
311
312 while (!CfgLiveInBBs.empty()) {
315 auto &Info = BBVisitedInfo[Pred];
316 if (Info.LastCall) {
318 } else if (.NeedTileCfgLiveIn) {
319 Info.NeedTileCfgLiveIn = true;
320 if (Pred == &MF.front())
321 CfgNeedInsert.insert(MIRef(Pred));
322 else
324 }
325 }
326 }
327
328
329 if (CfgNeedInsert.empty())
330 return false;
331
332
333 SmallVector<MachineBasicBlock *, 8> WorkList;
334 for (auto &I : ShapeBBs) {
335 auto &Info = BBVisitedInfo[I.first];
336
337 if (Info.HasAMXRegLiveIn) {
338
339
340
342 return false;
343 }
344 if (Info.FirstAMX && Info.FirstAMX < I.second.back() &&
345 !hoistShapesInBB(I.first, I.second)) {
347 return false;
348 }
350 }
351 while (!WorkList.empty()) {
354 auto &Info = BBVisitedInfo[Pred];
355 if (.TileCfgForbidden && !isLoopBackEdge(MBB, Pred)) {
356 Info.TileCfgForbidden = true;
358 }
359 }
360 }
361
363 SmallSet<MIRef, 8> VisitedOrInserted;
364 int SS = MF.getFrameInfo().CreateStackObject(
365 ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);
366
367
368 for (const auto &I : CfgNeedInsert) {
369 SmallSet<MIRef, 8> InsertPoints;
371 while (!WorkList.empty()) {
373 if (!VisitedOrInserted.count(I)) {
374 if (!BBVisitedInfo[I.MBB].TileCfgForbidden) {
375
377 } else {
378
379 VisitedOrInserted.insert(I);
380
381
382 for (auto *Succ : I.MBB->successors())
383 if (BBVisitedInfo[Succ].NeedTileCfgLiveIn)
385 }
386 }
387 }
388
389
390 for (MIRef I : InsertPoints) {
391
392 auto It = ShapeBBs.find(I.MBB);
393 if (It != ShapeBBs.end() && I < It->second.back())
394 I = It->second.back();
395
396
397 if (VisitedOrInserted.insert(I).second) {
398 auto II = I.MI ? I.MI->getIterator() : I.MBB->instr_begin();
400 SS);
401 }
402 }
403 }
404
405
406 MachineBasicBlock &MBB = MF.front();
408 if (ST.hasAVX512()) {
409 Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
413 } else if (ST.hasAVX2()) {
414 Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
420 } else {
421 assert(ST.hasSSE2() && "AMX should assume SSE2 enabled");
422 unsigned StoreOpc = ST.hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
423 Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
432 }
433
435
436 return true;
437}
438
440 return new X86PreTileConfig();
441}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
const HexagonInstrInfo * TII
Module.h This file contains the declarations for the Module class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallSet class.
static void emitErrorMsg(MachineFunction &MF)
Definition X86PreTileConfig.cpp:47
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
clearBitsInMask - Clear any bits in this vector that are set in Mask.
bool none() const
none - Returns true if none of the bits are set.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< succ_iterator > successors()
iterator_range< pred_iterator > predecessors()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getNumRegs() const
Return the number of registers in this class.
AMXProgModelEnum getAMXProgModel() const
self_iterator getIterator()
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool operator!=(uint64_t V1, const APInt &V2)
static const MachineInstrBuilder & addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset=0, bool mem=true)
addFrameReference - This function is used to add a reference to the base of an abstract object on the...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool operator>(int64_t V1, const APSInt &V2)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
FunctionPass * createX86PreTileConfigPass()
Return a pass that insert pseudo tile config instruction.
Definition X86PreTileConfig.cpp:439