LLVM: lib/Target/X86/X86CallFrameOptimization.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
45#include
46#include
47#include
48#include
49
50using namespace llvm;
51
52#define DEBUG_TYPE "x86-cf-opt"
53
56 cl::desc("Avoid optimizing x86 call frames for size"),
58
59namespace {
60
62public:
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67 static char ID;
68
69private:
70
71 struct CallContext {
72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}
73
74
76
77
78 MachineInstr *Call = nullptr;
79
80
81 MachineInstr *SPCopy = nullptr;
82
83
84 int64_t ExpectedDist = 0;
85
86
87 SmallVector<MachineInstr *, 4> ArgStoreVector;
88
89
90 bool NoStackParams = false;
91
92
93 bool UsePush = false;
94 };
95
96 typedef SmallVector<CallContext, 8> ContextVector;
97
98 bool isLegal(MachineFunction &MF);
99
100 bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
101
102 void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
104
105 void adjustCallSequence(MachineFunction &MF, const CallContext &Context);
106
109
110 enum InstClassification { Convert, Skip, Exit };
111
112 InstClassification classifyInstruction(MachineBasicBlock &MBB,
114 const X86RegisterInfo &RegInfo,
115 const DenseSet &UsedRegs);
116
117 StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
118
119 const X86InstrInfo *TII = nullptr;
120 const X86FrameLowering *TFL = nullptr;
121 const X86Subtarget *STI = nullptr;
122 MachineRegisterInfo *MRI = nullptr;
123 unsigned SlotSize = 0;
124 unsigned Log2SlotSize = 0;
125};
126
127}
128char X86CallFrameOptimization::ID = 0;
130 "X86 Call Frame Optimization", false, false)
131
132
133
134
137 return false;
138
139
140
141
142 if (STI->isTargetDarwin() &&
143 (!MF.getLandingPads().empty() ||
144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))
145 return false;
146
147
148
149 if (STI->isTargetWin64())
150 return false;
151
152
153
154
155
156
157
158
159
160
161
162
163
164 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
165 unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
166 bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);
167 unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
169 bool InsideFrameSequence = false;
171 if (MI.getOpcode() == FrameSetupOpcode) {
172 if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)
173 return false;
174 if (InsideFrameSequence)
175 return false;
176 InsideFrameSequence = true;
177 } else if (MI.getOpcode() == FrameDestroyOpcode) {
178 if (!InsideFrameSequence)
179 return false;
180 InsideFrameSequence = false;
181 }
182 }
183
184 if (InsideFrameSequence)
185 return false;
186 }
187
188 return true;
189}
190
191
192
193bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
194 ContextVector &CallSeqVector) {
195
196
197
199 if (CannotReserveFrame)
200 return true;
201
203
204 int64_t Advantage = 0;
205 for (const auto &CC : CallSeqVector) {
206
207
208
209 if (CC.NoStackParams)
210 continue;
211
212 if (!CC.UsePush) {
213
214
215
216
217
218
219 Advantage -= 6;
220 } else {
221
222
223 Advantage -= 3;
224
225 if ((StackAlign, CC.ExpectedDist))
226 Advantage -= 3;
227
228
229 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;
230 }
231 }
232
233 return Advantage >= 0;
234}
235
236bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
241
242 const X86RegisterInfo &RegInfo = *STI->getRegisterInfo();
245 Log2SlotSize = Log2_32(SlotSize);
246
247 if (skipFunction(MF.getFunction()) || !isLegal(MF))
248 return false;
249
251
253
254 ContextVector CallSeqVector;
255
256 for (auto &MBB : MF)
258 if (MI.getOpcode() == FrameSetupOpcode) {
261 CallSeqVector.push_back(Context);
262 }
263
265 return false;
266
267 for (const auto &CC : CallSeqVector) {
268 if (CC.UsePush) {
269 adjustCallSequence(MF, CC);
271 }
272 }
273
275}
276
277X86CallFrameOptimization::InstClassification
278X86CallFrameOptimization::classifyInstruction(
280 const X86RegisterInfo &RegInfo, const DenseSet &UsedRegs) {
282 return Exit;
283
284
285
286 switch (MI->getOpcode()) {
287 case X86::AND16mi:
288 case X86::AND32mi:
289 case X86::AND64mi32: {
291 return ImmOp.getImm() == 0 ? Convert : Exit;
292 }
293 case X86::OR16mi:
294 case X86::OR32mi:
295 case X86::OR64mi32: {
297 return ImmOp.getImm() == -1 ? Convert : Exit;
298 }
299 case X86::MOV32mi:
300 case X86::MOV32mr:
301 case X86::MOV64mi32:
302 case X86::MOV64mr:
303 return Convert;
304 }
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331 if (MI->isCall() || MI->mayStore())
332 return Exit;
333
334 for (const MachineOperand &MO : MI->operands()) {
335 if (!MO.isReg())
336 continue;
339 continue;
341 return Exit;
342 if (MO.isDef()) {
343 for (MCRegister U : UsedRegs)
344 if (RegInfo.regsOverlap(Reg, U))
345 return Exit;
346 }
347 }
348
349 return Skip;
350}
351
352void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
353 MachineBasicBlock &MBB,
356
357
358 const X86RegisterInfo &RegInfo = *STI->getRegisterInfo();
359
360
363 Context.FrameSetup = FrameSetup;
364
365
366
367 unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
368
369
370 if (!MaxAdjust) {
371 Context.NoStackParams = true;
372 return;
373 }
374
375
376
377
378 while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())
379 ++I;
380
382 auto StackPtrCopyInst = MBB.end();
383
384
385
386
387
388
389
390
391 for (auto J = I; !J->isCall(); ++J)
392 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
393 J->getOperand(1).getReg() == StackPtr) {
394 StackPtrCopyInst = J;
397 break;
398 }
399
400
401
402
403
404 if (MaxAdjust > 4)
405 Context.ArgStoreVector.resize(MaxAdjust, nullptr);
406
407 DenseSet UsedRegs;
408
409 for (InstClassification Classification = Skip; Classification != Exit; ++I) {
410
411 if (I == StackPtrCopyInst)
412 continue;
413 Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
414 if (Classification != Convert)
415 continue;
416
417
418
419
420
421
422
423
424
432 return;
433
434 int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
435 assert(StackDisp >= 0 &&
436 "Negative stack displacement when passing parameters");
437
438
439 if (StackDisp & (SlotSize - 1))
440 return;
441 StackDisp >>= Log2SlotSize;
442
443 assert((size_t)StackDisp < Context.ArgStoreVector.size() &&
444 "Function call has more parameters than the stack is adjusted for.");
445
446
447 if (Context.ArgStoreVector[StackDisp] != nullptr)
448 return;
449 Context.ArgStoreVector[StackDisp] = &*I;
450
451 for (const MachineOperand &MO : I->uses()) {
452 if (!MO.isReg())
453 continue;
457 }
458 }
459
460 --I;
461
462
463
464 if (I == MBB.end() || ->isCall())
465 return;
466
469 return;
470
471
472
473 auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();
474 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)
475 if (*MMI == nullptr)
476 break;
477
478
479 if (MMI == Context.ArgStoreVector.begin())
480 return;
481
482
483
484 for (; MMI != MME; ++MMI)
485 if (*MMI != nullptr)
486 return;
487
489}
490
491void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
492 const CallContext &Context) {
493
494
495
497 MachineBasicBlock &MBB = *(FrameSetup->getParent());
498 TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
499
500 const DebugLoc &DL = FrameSetup->getDebugLoc();
501 bool Is64Bit = STI->is64Bit();
502
503
504
505 for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {
509 unsigned PushOpcode;
510 switch (Store->getOpcode()) {
511 default:
513 case X86::AND16mi:
514 case X86::AND32mi:
515 case X86::AND64mi32:
516 case X86::OR16mi:
517 case X86::OR32mi:
518 case X86::OR64mi32:
519 case X86::MOV32mi:
520 case X86::MOV64mi32:
521 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;
523 Push->cloneMemRefs(MF, *Store);
524 break;
525 case X86::MOV32mr:
526 case X86::MOV64mr: {
528
529
530
531 if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {
532 Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);
533 Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
537 .add(PushOp)
538 .addImm(X86::sub_32bit);
539 }
540
541
542
543 bool SlowPUSHrmm = STI->slowTwoMemOps();
544
545
546
547 MachineInstr *DefMov = nullptr;
548 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
549 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;
551
554 Push->addOperand(DefMov->getOperand(i));
555 Push->cloneMergedMemRefs(MF, {DefMov, &*Store});
557 } else {
558 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
562 Push->cloneMemRefs(MF, *Store);
563 }
564 break;
565 }
566 }
567
568
569
570
571 if (!TFL->hasFP(MF))
575
577 }
578
579
580
581 if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))
582 Context.SPCopy->eraseFromParent();
583
584
585
586 X86MachineFunctionInfo *FuncInfo = MF.getInfo();
588}
589
590MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
592
593
594
595
596
597
598
599
600
601
603 return nullptr;
604
605
606 if (->hasOneNonDBGUse(Reg))
607 return nullptr;
608
609 MachineInstr &DefMI = *MRI->getVRegDef(Reg);
610
611
612
613 if ((DefMI.getOpcode() != X86::MOV32rm &&
614 DefMI.getOpcode() != X86::MOV64rm) ||
615 DefMI.getParent() != FrameSetup->getParent())
616 return nullptr;
617
618
619
621 if (I->isLoadFoldBarrier())
622 return nullptr;
623
625}
626
628 return new X86CallFrameOptimization();
629}
unsigned const MachineRegisterInfo * MRI
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file defines the DenseSet and SmallDenseSet classes.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)
FunctionPass class - This class is used to implement most global optimizations.
static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})
.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineInstrBundleIterator< MachineInstr > iterator
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Register getReg() const
getReg - Returns the register number.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise).
unsigned getCallFrameDestroyOpcode() const
int64_t getFrameSize(const MachineInstr &I) const
Returns size of the frame associated with the given frame instruction.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const
Wraps up getting a CFI index and building a MachineInstr for it.
void setHasPushSequences(bool HasPush)
Register getStackRegister() const
unsigned getSlotSize() const
const X86InstrInfo * getInstrInfo() const override
const X86RegisterInfo * getRegisterInfo() const override
const X86FrameLowering * getFrameLowering() const override
std::pair< iterator, bool > insert(const ValueT &V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FunctionPass * createX86CallFrameOptimization()
Return a pass that optimizes the code-size of x86 call sequences.
Definition X86CallFrameOptimization.cpp:627