LLVM: lib/Target/X86/X86CallFrameOptimization.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

45#include

46#include

47#include

48#include

49

50using namespace llvm;

51

52#define DEBUG_TYPE "x86-cf-opt"

53

56 cl::desc("Avoid optimizing x86 call frames for size"),

58

59namespace {

60

62public:

64

65 bool runOnMachineFunction(MachineFunction &MF) override;

66

67 static char ID;

68

69private:

70

71 struct CallContext {

72 CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {}

73

74

76

77

78 MachineInstr *Call = nullptr;

79

80

81 MachineInstr *SPCopy = nullptr;

82

83

84 int64_t ExpectedDist = 0;

85

86

87 SmallVector<MachineInstr *, 4> ArgStoreVector;

88

89

90 bool NoStackParams = false;

91

92

93 bool UsePush = false;

94 };

95

96 typedef SmallVector<CallContext, 8> ContextVector;

97

98 bool isLegal(MachineFunction &MF);

99

100 bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);

101

102 void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,

104

105 void adjustCallSequence(MachineFunction &MF, const CallContext &Context);

106

109

110 enum InstClassification { Convert, Skip, Exit };

111

112 InstClassification classifyInstruction(MachineBasicBlock &MBB,

114 const X86RegisterInfo &RegInfo,

115 const DenseSet &UsedRegs);

116

117 StringRef getPassName() const override { return "X86 Optimize Call Frame"; }

118

119 const X86InstrInfo *TII = nullptr;

120 const X86FrameLowering *TFL = nullptr;

121 const X86Subtarget *STI = nullptr;

122 MachineRegisterInfo *MRI = nullptr;

123 unsigned SlotSize = 0;

124 unsigned Log2SlotSize = 0;

125};

126

127}

128char X86CallFrameOptimization::ID = 0;

130 "X86 Call Frame Optimization", false, false)

131

132

133

134

137 return false;

138

139

140

141

142 if (STI->isTargetDarwin() &&

143 (!MF.getLandingPads().empty() ||

144 (MF.getFunction().needsUnwindTableEntry() && !TFL->hasFP(MF))))

145 return false;

146

147

148

149 if (STI->isTargetWin64())

150 return false;

151

152

153

154

155

156

157

158

159

160

161

162

163

164 unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();

165 unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();

166 bool EmitStackProbeCall = STI->getTargetLowering()->hasStackProbeSymbol(MF);

167 unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);

169 bool InsideFrameSequence = false;

171 if (MI.getOpcode() == FrameSetupOpcode) {

172 if (TII->getFrameSize(MI) >= StackProbeSize && EmitStackProbeCall)

173 return false;

174 if (InsideFrameSequence)

175 return false;

176 InsideFrameSequence = true;

177 } else if (MI.getOpcode() == FrameDestroyOpcode) {

178 if (!InsideFrameSequence)

179 return false;

180 InsideFrameSequence = false;

181 }

182 }

183

184 if (InsideFrameSequence)

185 return false;

186 }

187

188 return true;

189}

190

191

192

193bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,

194 ContextVector &CallSeqVector) {

195

196

197

199 if (CannotReserveFrame)

200 return true;

201

203

204 int64_t Advantage = 0;

205 for (const auto &CC : CallSeqVector) {

206

207

208

209 if (CC.NoStackParams)

210 continue;

211

212 if (!CC.UsePush) {

213

214

215

216

217

218

219 Advantage -= 6;

220 } else {

221

222

223 Advantage -= 3;

224

225 if (isAligned(StackAlign, CC.ExpectedDist))

226 Advantage -= 3;

227

228

229 Advantage += (CC.ExpectedDist >> Log2SlotSize) * 3;

230 }

231 }

232

233 return Advantage >= 0;

234}

235

236bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {

241

242 const X86RegisterInfo &RegInfo = *STI->getRegisterInfo();

245 Log2SlotSize = Log2_32(SlotSize);

246

247 if (skipFunction(MF.getFunction()) || !isLegal(MF))

248 return false;

249

251

253

254 ContextVector CallSeqVector;

255

256 for (auto &MBB : MF)

257 for (auto &MI : MBB)

258 if (MI.getOpcode() == FrameSetupOpcode) {

261 CallSeqVector.push_back(Context);

262 }

263

265 return false;

266

267 for (const auto &CC : CallSeqVector) {

268 if (CC.UsePush) {

269 adjustCallSequence(MF, CC);

271 }

272 }

273

275}

276

277X86CallFrameOptimization::InstClassification

278X86CallFrameOptimization::classifyInstruction(

280 const X86RegisterInfo &RegInfo, const DenseSet &UsedRegs) {

282 return Exit;

283

284

285

286 switch (MI->getOpcode()) {

287 case X86::AND16mi:

288 case X86::AND32mi:

289 case X86::AND64mi32: {

291 return ImmOp.getImm() == 0 ? Convert : Exit;

292 }

293 case X86::OR16mi:

294 case X86::OR32mi:

295 case X86::OR64mi32: {

297 return ImmOp.getImm() == -1 ? Convert : Exit;

298 }

299 case X86::MOV32mi:

300 case X86::MOV32mr:

301 case X86::MOV64mi32:

302 case X86::MOV64mr:

303 return Convert;

304 }

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331 if (MI->isCall() || MI->mayStore())

332 return Exit;

333

334 for (const MachineOperand &MO : MI->operands()) {

335 if (!MO.isReg())

336 continue;

339 continue;

341 return Exit;

342 if (MO.isDef()) {

343 for (MCRegister U : UsedRegs)

344 if (RegInfo.regsOverlap(Reg, U))

345 return Exit;

346 }

347 }

348

349 return Skip;

350}

351

352void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,

353 MachineBasicBlock &MBB,

356

357

358 const X86RegisterInfo &RegInfo = *STI->getRegisterInfo();

359

360

363 Context.FrameSetup = FrameSetup;

364

365

366

367 unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;

368

369

370 if (!MaxAdjust) {

371 Context.NoStackParams = true;

372 return;

373 }

374

375

376

377

378 while (I->getOpcode() == X86::LEA32r || I->isDebugInstr())

379 ++I;

380

382 auto StackPtrCopyInst = MBB.end();

383

384

385

386

387

388

389

390

391 for (auto J = I; !J->isCall(); ++J)

392 if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&

393 J->getOperand(1).getReg() == StackPtr) {

394 StackPtrCopyInst = J;

397 break;

398 }

399

400

401

402

403

404 if (MaxAdjust > 4)

405 Context.ArgStoreVector.resize(MaxAdjust, nullptr);

406

407 DenseSet UsedRegs;

408

409 for (InstClassification Classification = Skip; Classification != Exit; ++I) {

410

411 if (I == StackPtrCopyInst)

412 continue;

413 Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);

414 if (Classification != Convert)

415 continue;

416

417

418

419

420

421

422

423

424

432 return;

433

434 int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();

435 assert(StackDisp >= 0 &&

436 "Negative stack displacement when passing parameters");

437

438

439 if (StackDisp & (SlotSize - 1))

440 return;

441 StackDisp >>= Log2SlotSize;

442

443 assert((size_t)StackDisp < Context.ArgStoreVector.size() &&

444 "Function call has more parameters than the stack is adjusted for.");

445

446

447 if (Context.ArgStoreVector[StackDisp] != nullptr)

448 return;

449 Context.ArgStoreVector[StackDisp] = &*I;

450

451 for (const MachineOperand &MO : I->uses()) {

452 if (!MO.isReg())

453 continue;

457 }

458 }

459

460 --I;

461

462

463

464 if (I == MBB.end() || I->isCall())

465 return;

466

469 return;

470

471

472

473 auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end();

474 for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize)

475 if (*MMI == nullptr)

476 break;

477

478

479 if (MMI == Context.ArgStoreVector.begin())

480 return;

481

482

483

484 for (; MMI != MME; ++MMI)

485 if (*MMI != nullptr)

486 return;

487

489}

490

491void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,

492 const CallContext &Context) {

493

494

495

497 MachineBasicBlock &MBB = *(FrameSetup->getParent());

498 TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);

499

500 const DebugLoc &DL = FrameSetup->getDebugLoc();

501 bool Is64Bit = STI->is64Bit();

502

503

504

505 for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) {

509 unsigned PushOpcode;

510 switch (Store->getOpcode()) {

511 default:

513 case X86::AND16mi:

514 case X86::AND32mi:

515 case X86::AND64mi32:

516 case X86::OR16mi:

517 case X86::OR32mi:

518 case X86::OR64mi32:

519 case X86::MOV32mi:

520 case X86::MOV64mi32:

521 PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;

523 Push->cloneMemRefs(MF, *Store);

524 break;

525 case X86::MOV32mr:

526 case X86::MOV64mr: {

528

529

530

531 if (Is64Bit && Store->getOpcode() == X86::MOV32mr) {

532 Register UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass);

533 Reg = MRI->createVirtualRegister(&X86::GR64RegClass);

537 .add(PushOp)

538 .addImm(X86::sub_32bit);

539 }

540

541

542

543 bool SlowPUSHrmm = STI->slowTwoMemOps();

544

545

546

547 MachineInstr *DefMov = nullptr;

548 if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {

549 PushOpcode = Is64Bit ? X86::PUSH64rmm : X86::PUSH32rmm;

551

554 Push->addOperand(DefMov->getOperand(i));

555 Push->cloneMergedMemRefs(MF, {DefMov, &*Store});

557 } else {

558 PushOpcode = Is64Bit ? X86::PUSH64r : X86::PUSH32r;

562 Push->cloneMemRefs(MF, *Store);

563 }

564 break;

565 }

566 }

567

568

569

570

571 if (!TFL->hasFP(MF))

573 MBB, std::next(Push), DL,

575

577 }

578

579

580

581 if (Context.SPCopy && MRI->use_empty(Context.SPCopy->getOperand(0).getReg()))

582 Context.SPCopy->eraseFromParent();

583

584

585

586 X86MachineFunctionInfo *FuncInfo = MF.getInfo();

588}

589

590MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(

592

593

594

595

596

597

598

599

600

601

603 return nullptr;

604

605

606 if (MRI->hasOneNonDBGUse(Reg))

607 return nullptr;

608

609 MachineInstr &DefMI = *MRI->getVRegDef(Reg);

610

611

612

613 if ((DefMI.getOpcode() != X86::MOV32rm &&

614 DefMI.getOpcode() != X86::MOV64rm) ||

615 DefMI.getParent() != FrameSetup->getParent())

616 return nullptr;

617

618

619

621 if (I->isLoadFoldBarrier())

622 return nullptr;

623

625}

626

628 return new X86CallFrameOptimization();

629}

unsigned const MachineRegisterInfo * MRI

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

MachineInstrBuilder MachineInstrBuilder & DefMI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file defines the DenseSet and SmallDenseSet classes.

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

Promote Memory to Register

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

This file defines the SmallVector class.

static bool isProfitable(const StableFunctionMap::StableFunctionEntries &SFS)

static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)

Returns the opcode of Values or ~0 if they do not all agree.

static cl::opt< bool > NoX86CFOpt("no-x86-call-frame-opt", cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden)

FunctionPass class - This class is used to implement most global optimizations.

static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int64_t Adjustment, SMLoc Loc={})

.cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but Offset is a relative value that is added/subt...

unsigned getNumOperands() const

Return the number of declared MachineOperands for this MachineInstruction.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

LLVM_ABI instr_iterator erase(instr_iterator I)

Remove an instruction from the instruction list and delete it.

MachineInstrBundleIterator< MachineInstr > iterator

bool hasVarSizedObjects() const

This method may be called any time after instruction selection is complete to determine if the stack ...

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

Representation of each machine instruction.

const MCInstrDesc & getDesc() const

Returns the target instruction descriptor of this MachineInstr.

LLVM_ABI void eraseFromParent()

Unlink 'this' from the containing basic block and delete it.

const MachineOperand & getOperand(unsigned i) const

Register getReg() const

getReg - Returns the register number.

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

bool hasFP(const MachineFunction &MF) const

hasFP - Return true if the specified function should have a dedicated frame pointer register.

Align getStackAlign() const

getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...

unsigned getCallFrameSetupOpcode() const

These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise).

unsigned getCallFrameDestroyOpcode() const

int64_t getFrameSize(const MachineInstr &I) const

Returns size of the frame associated with the given frame instruction.

void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag Flag=MachineInstr::NoFlags) const

Wraps up getting a CFI index and building a MachineInstr for it.

void setHasPushSequences(bool HasPush)

Register getStackRegister() const

unsigned getSlotSize() const

const X86InstrInfo * getInstrInfo() const override

const X86RegisterInfo * getRegisterInfo() const override

const X86FrameLowering * getFrameLowering() const override

std::pair< iterator, bool > insert(const ValueT &V)

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

bool isAligned(Align Lhs, uint64_t SizeInBytes)

Checks that SizeInBytes is a multiple of the alignment.

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

FunctionPass * createX86CallFrameOptimization()

Return a pass that optimizes the code-size of x86 call sequences.

Definition X86CallFrameOptimization.cpp:627