LLVM: lib/CodeGen/MachineLICM.cpp Source File (original) (raw)

52#include

53#include

54#include

55

56using namespace llvm;

57

58#define DEBUG_TYPE "machinelicm"

59

62 cl::desc("MachineLICM should avoid speculation"),

64

67 cl::desc("MachineLICM should hoist even cheap instructions"),

69

72 cl::desc("Hoist invariant stores"),

74

76 cl::desc("Hoist invariant loads"),

78

79

80

83 cl::desc("Do not hoist instructions if target"

84 "block is N times hotter than the source."),

86

88

91 cl::desc("Disable hoisting instructions to"

92 " hotter blocks"),

95 "disable the feature"),

97 "enable the feature when using profile data"),

99 "enable the feature with/wo profile data")));

100

102 "Number of machine instructions hoisted out of loops");

104 "Number of instructions hoisted in low reg pressure situation");

106 "Number of high latency instructions hoisted");

108 "Number of hoisted machine instructions CSEed");

110 "Number of machine instructions hoisted out of loops post regalloc");

112 "Number of stores of const phys reg hoisted out of loops");

114 "Number of instructions not hoisted due to block frequency");

115

116namespace {

117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };

118

119 class MachineLICMImpl {

120 const TargetInstrInfo *TII = nullptr;

121 const TargetLoweringBase *TLI = nullptr;

122 const TargetRegisterInfo *TRI = nullptr;

123 const MachineFrameInfo *MFI = nullptr;

124 MachineRegisterInfo *MRI = nullptr;

125 TargetSchedModel SchedModel;

126 bool PreRegAlloc = false;

127 bool HasProfileData = false;

128 Pass *LegacyPass;

130

131

132 AliasAnalysis *AA = nullptr;

133 MachineBlockFrequencyInfo *MBFI = nullptr;

134 MachineLoopInfo *MLI = nullptr;

135 MachineDomTreeUpdater *MDTU = nullptr;

136

137

138 bool Changed = false;

139 bool FirstInLoop = false;

140

141

142

143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;

144

145

146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;

147

148 bool isExitBlock(MachineLoop *CurLoop, const MachineBasicBlock *MBB) {

149 auto [It, Inserted] = ExitBlockMap.try_emplace(CurLoop);

150 if (Inserted) {

151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;

153 It->second = std::move(ExitBlocks);

154 }

156 }

157

158

159 SmallDenseSet RegSeen;

160 SmallVector<unsigned, 8> RegPressure;

161

162

163

164 SmallVector<unsigned, 8> RegLimit;

165

166

168

169

170 DenseMap<MachineBasicBlock *,

171 DenseMap<unsigned, std::vector<MachineInstr *>>>

172 CSEMap;

173

174 enum {

175 SpeculateFalse = 0,

176 SpeculateTrue = 1,

177 SpeculateUnknown = 2

178 };

179

180

181

182

183 unsigned SpeculationState = SpeculateUnknown;

184

185 public:

186 MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,

188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {

189 assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");

190 assert(!(LegacyPass && MFAM) &&

191 "LegacyPass and MFAM cannot be provided at the same time");

192 }

193

194 bool run(MachineFunction &MF);

195

196 void releaseMemory() {

197 RegSeen.clear();

198 RegPressure.clear();

199 RegLimit.clear();

200 BackTrace.clear();

201 CSEMap.clear();

202 ExitBlockMap.clear();

203 }

204

205 private:

206

207 struct CandidateInfo {

208 MachineInstr *MI;

210 int FI;

211

212 CandidateInfo(MachineInstr *mi, Register def, int fi)

213 : MI(mi), Def(def), FI(fi) {}

214 };

215

216 void HoistRegionPostRA(MachineLoop *CurLoop);

217

218 void HoistPostRA(MachineInstr *MI, Register Def, MachineLoop *CurLoop);

219

220 void ProcessMI(MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers,

221 SmallDenseSet &StoredFIs,

222 SmallVectorImpl &Candidates,

223 MachineLoop *CurLoop);

224

225 void AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop);

226

227 bool IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop);

228

229 bool IsLoopInvariantInst(MachineInstr &I, MachineLoop *CurLoop);

230

231 bool HasLoopPHIUse(const MachineInstr *MI, MachineLoop *CurLoop);

232

233 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, Register Reg,

234 MachineLoop *CurLoop) const;

235

236 bool IsCheapInstruction(MachineInstr &MI) const;

237

238 bool CanCauseHighRegPressure(const SmallDenseMap<unsigned, int> &Cost,

239 bool Cheap);

240

241 void UpdateBackTraceRegPressure(const MachineInstr *MI);

242

243 bool IsProfitableToHoist(MachineInstr &MI, MachineLoop *CurLoop);

244

245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);

246

247 void EnterScope(MachineBasicBlock *MBB);

248

249 void ExitScope(MachineBasicBlock *MBB);

250

251 void ExitScopeIfDone(

253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,

254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);

255

256 void HoistOutOfLoop(MachineDomTreeNode *HeaderN, MachineLoop *CurLoop);

257

258 void InitRegPressure(MachineBasicBlock *BB);

259

260 SmallDenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,

261 bool ConsiderSeen,

262 bool ConsiderUnseenAsDef);

263

264 void UpdateRegPressure(const MachineInstr *MI,

265 bool ConsiderUnseenAsDef = false);

266

267 MachineInstr *ExtractHoistableLoad(MachineInstr *MI, MachineLoop *CurLoop);

268

269 MachineInstr *LookForDuplicate(const MachineInstr *MI,

270 std::vector<MachineInstr *> &PrevMIs);

271

272 bool

273 EliminateCSE(MachineInstr *MI,

274 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);

275

276 bool MayCSE(MachineInstr *MI);

277

278 unsigned Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,

279 MachineLoop *CurLoop);

280

281 void InitCSEMap(MachineBasicBlock *BB);

282

283 void InitializeLoadsHoistableLoops();

284

285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,

286 MachineBasicBlock *TgtBlock);

287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);

288 };

289

291 bool PreRegAlloc;

292

293 public:

294 MachineLICMBase(char &ID, bool PreRegAlloc)

295 : MachineFunctionPass(ID), PreRegAlloc(PreRegAlloc) {}

296

297 bool runOnMachineFunction(MachineFunction &MF) override;

298

299 void getAnalysisUsage(AnalysisUsage &AU) const override {

300 AU.addRequired();

302 AU.addRequired();

303 AU.addRequired();

305 AU.addPreserved();

307 }

308 };

309

310 class MachineLICM : public MachineLICMBase {

311 public:

312 static char ID;

313 MachineLICM() : MachineLICMBase(ID, false) {

315 }

316 };

317

318 class EarlyMachineLICM : public MachineLICMBase {

319 public:

320 static char ID;

321 EarlyMachineLICM() : MachineLICMBase(ID, true) {

323 }

324 };

325

326}

327

328char MachineLICM::ID;

329char EarlyMachineLICM::ID;

330

333

335 "Machine Loop Invariant Code Motion", false, false)

341 "Machine Loop Invariant Code Motion", false, false)

342

344 "Early Machine Loop Invariant Code Motion", false, false)

350 "Early Machine Loop Invariant Code Motion", false, false)

351

352bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {

353 if (skipFunction(MF.getFunction()))

354 return false;

355

356 MachineLICMImpl Impl(PreRegAlloc, this, nullptr);

357 return Impl.run(MF);

358}

359

360#define GET_RESULT(RESULT, GETTER, INFIX) \

361 ((LegacyPass) \

362 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \

363 : &MFAM->getResult<RESULT##Analysis>(MF))

364

366 AA = MFAM != nullptr

368 .getManager()

372 MachineDomTreeUpdater::UpdateStrategy::Lazy);

373 MDTU = &DTU;

377 : nullptr;

378

379 Changed = FirstInLoop = false;

381 TII = ST.getInstrInfo();

382 TLI = ST.getTargetLowering();

383 TRI = ST.getRegisterInfo();

386 SchedModel.init(&ST);

387

389

390 if (PreRegAlloc)

391 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");

392 else

393 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");

395

396 if (PreRegAlloc) {

397

398 unsigned NumRPS = TRI->getNumRegPressureSets();

399 RegPressure.resize(NumRPS);

401 RegLimit.resize(NumRPS);

402 for (unsigned i = 0, e = NumRPS; i != e; ++i)

403 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);

404 }

405

407 InitializeLoadsHoistableLoops();

408

410 while (!Worklist.empty()) {

412

413 if (!PreRegAlloc) {

414 HoistRegionPostRA(CurLoop);

415 } else {

416

417

419 FirstInLoop = true;

420 HoistOutOfLoop(N, CurLoop);

421 CSEMap.clear();

422 }

423 }

424 releaseMemory();

426}

427

428

430

431

432 if (MI->mayStore())

433 return false;

434

435

436 if (MI->memoperands_empty())

437 return true;

439 if (MemOp->isStore() || MemOp->getPseudoValue())

440 continue;

443 if (Value->getFrameIndex() == FI)

444 return true;

445 }

446 }

447 return false;

448}

449

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483 BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());

484 const unsigned NumRegs = TRI.getNumRegs();

485 const unsigned MaskWords = (NumRegs + 31) / 32;

486 for (unsigned K = 0; K < MaskWords; ++K) {

487 const uint32_t Word = Mask[K];

488 for (unsigned Bit = 0; Bit < 32; ++Bit) {

489 const unsigned PhysReg = (K * 32) + Bit;

490 if (PhysReg == NumRegs)

491 break;

492

493 if (PhysReg && !((Word >> Bit) & 1)) {

494 for (MCRegUnit Unit : TRI.regunits(PhysReg))

495 RUsFromRegsNotInMask.set(static_cast<unsigned>(Unit));

496 }

497 }

498 }

499

500 RUs |= RUsFromRegsNotInMask;

501}

502

503

504

505void MachineLICMImpl::ProcessMI(MachineInstr *MI, BitVector &RUDefs,

506 BitVector &RUClobbers,

507 SmallDenseSet &StoredFIs,

508 SmallVectorImpl &Candidates,

509 MachineLoop *CurLoop) {

510 bool RuledOut = false;

511 bool HasNonInvariantUse = false;

513 for (const MachineOperand &MO : MI->operands()) {

514 if (MO.isFI()) {

515

516 int FI = MO.getIndex();

517 if (!StoredFIs.count(FI) &&

520 StoredFIs.insert(FI);

521 HasNonInvariantUse = true;

522 continue;

523 }

524

525

526

527 if (MO.isRegMask()) {

529 continue;

530 }

531

532 if (!MO.isReg())

533 continue;

535 if (Reg)

536 continue;

538

539 if (!MO.isDef()) {

540 if (!HasNonInvariantUse) {

541 for (MCRegUnit Unit : TRI->regunits(Reg)) {

542

543

544 if (RUDefs.test(static_cast<unsigned>(Unit)) ||

545 RUClobbers.test(static_cast<unsigned>(Unit))) {

546 HasNonInvariantUse = true;

547 break;

548 }

549 }

550 }

551 continue;

552 }

553

554

555 if (!MO.isDead()) {

556 if (Def)

557 RuledOut = true;

558 else

560 }

561

562

563

564

565 for (MCRegUnit Unit : TRI->regunits(Reg)) {

566 if (RUDefs.test(static_cast<unsigned>(Unit))) {

567 RUClobbers.set(static_cast<unsigned>(Unit));

568 RuledOut = true;

569 } else if (RUClobbers.test(static_cast<unsigned>(Unit))) {

570

571

572 RuledOut = true;

573 }

574

575 RUDefs.set(static_cast<unsigned>(Unit));

576 }

577 }

578

579

580

581 if (Def && !RuledOut) {

582 int FI = std::numeric_limits::min();

583 if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||

585 Candidates.push_back(CandidateInfo(MI, Def, FI));

586 }

587}

588

589

590

591void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {

592 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);

593 if (!Preheader)

594 return;

595

596 unsigned NumRegUnits = TRI->getNumRegUnits();

597 BitVector RUDefs(NumRegUnits);

598 BitVector RUClobbers(NumRegUnits);

599

601 SmallDenseSet StoredFIs;

602

603

604

605 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {

606

607

609 if (ML && ML->getHeader()->isEHPad()) continue;

610

611

612

613

614 for (const auto &LI : BB->liveins()) {

615 for (MCRegUnit Unit : TRI->regunits(LI.PhysReg))

616 RUDefs.set(static_cast<unsigned>(Unit));

617 }

618

619

620 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))

622

623

624 if (BB->isEHPad()) {

625 const MachineFunction &MF = *BB->getParent();

629 for (MCRegUnit Unit : TRI->regunits(Reg))

630 RUClobbers.set(static_cast<unsigned>(Unit));

632 for (MCRegUnit Unit : TRI->regunits(Reg))

633 RUClobbers.set(static_cast<unsigned>(Unit));

634 }

635

636 SpeculationState = SpeculateUnknown;

637 for (MachineInstr &MI : *BB)

638 ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);

639 }

640

641

642 BitVector TermRUs(NumRegUnits);

644 if (TI != Preheader->end()) {

645 for (const MachineOperand &MO : TI->operands()) {

646 if (!MO.isReg())

647 continue;

649 if (Reg)

650 continue;

651 for (MCRegUnit Unit : TRI->regunits(Reg))

652 TermRUs.set(static_cast<unsigned>(Unit));

653 }

654 }

655

656

657

658

659

660

661

662

663

664 for (CandidateInfo &Candidate : Candidates) {

665 if (Candidate.FI != std::numeric_limits::min() &&

666 StoredFIs.count(Candidate.FI))

667 continue;

668

670 bool Safe = true;

671 for (MCRegUnit Unit : TRI->regunits(Def)) {

672 if (RUClobbers.test(static_cast<unsigned>(Unit)) ||

673 TermRUs.test(static_cast<unsigned>(Unit))) {

674 Safe = false;

675 break;

676 }

677 }

678

679 if (!Safe)

680 continue;

681

682 MachineInstr *MI = Candidate.MI;

683 for (const MachineOperand &MO : MI->all_uses()) {

684 if (!MO.getReg())

685 continue;

686 for (MCRegUnit Unit : TRI->regunits(MO.getReg())) {

687 if (RUDefs.test(static_cast<unsigned>(Unit)) ||

688 RUClobbers.test(static_cast<unsigned>(Unit))) {

689

690

691 Safe = false;

692 break;

693 }

694 }

695

696 if (!Safe)

697 break;

698 }

699

700 if (Safe)

701 HoistPostRA(MI, Candidate.Def, CurLoop);

702 }

703}

704

705

706

707void MachineLICMImpl::AddToLiveIns(MCRegister Reg, MachineLoop *CurLoop) {

708 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {

709 if (!BB->isLiveIn(Reg))

710 BB->addLiveIn(Reg);

711 for (MachineInstr &MI : *BB) {

712 for (MachineOperand &MO : MI.all_uses()) {

713 if (!MO.getReg())

714 continue;

715 if (TRI->regsOverlap(Reg, MO.getReg()))

716 MO.setIsKill(false);

717 }

718 }

719 }

720}

721

722

723

724void MachineLICMImpl::HoistPostRA(MachineInstr *MI, Register Def,

725 MachineLoop *CurLoop) {

727

728

729

732 << *MI);

733

734

735 MachineBasicBlock *MBB = MI->getParent();

737

738

739

740

741 assert(MI->isDebugInstr() && "Should not hoist debug inst");

743

744

745

746

747 AddToLiveIns(Def, CurLoop);

748

749 ++NumPostRAHoisted;

751}

752

753

754

755bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,

756 MachineLoop *CurLoop) {

757 if (SpeculationState != SpeculateUnknown)

758 return SpeculationState == SpeculateFalse;

759

760 if (BB != CurLoop->getHeader()) {

761

762 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;

764 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)

766 SpeculationState = SpeculateTrue;

767 return false;

768 }

769 }

770

771 SpeculationState = SpeculateFalse;

772 return true;

773}

774

775void MachineLICMImpl::EnterScope(MachineBasicBlock *MBB) {

777

778

779 BackTrace.push_back(RegPressure);

780}

781

782void MachineLICMImpl::ExitScope(MachineBasicBlock *MBB) {

785}

786

787

788

789

790void MachineLICMImpl::ExitScopeIfDone(

792 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,

793 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {

794 if (OpenChildren[Node])

795 return;

796

797 for(;;) {

798 ExitScope(Node->getBlock());

799

801 if (!Parent || --OpenChildren[Parent] != 0)

802 break;

803 Node = Parent;

804 }

805}

806

807

808

809

810

812 MachineLoop *CurLoop) {

813 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);

814 if (!Preheader)

815 return;

816

819 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;

820 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;

821

822

824 while (!WorkList.empty()) {

826 assert(Node && "Null dominator tree node?");

827 MachineBasicBlock *BB = Node->getBlock();

828

829

830

832 if (ML && ML->getHeader()->isEHPad())

833 continue;

834

835

837 continue;

838

839 Scopes.push_back(Node);

840 unsigned NumChildren = Node->getNumChildren();

841

842

843

844

846 NumChildren = 0;

847

848 OpenChildren[Node] = NumChildren;

849 if (NumChildren) {

850

851

852

854 ParentMap[Child] = Node;

856 }

857 }

858 }

859

860 if (Scopes.size() == 0)

861 return;

862

863

865 BackTrace.clear();

866 InitRegPressure(Preheader);

867

868

870 MachineBasicBlock *MBB = Node->getBlock();

871

872 EnterScope(MBB);

873

874

875 SpeculationState = SpeculateUnknown;

877 unsigned HoistRes = HoistResult::NotHoisted;

878 HoistRes = Hoist(&MI, Preheader, CurLoop);

879 if (HoistRes & HoistResult::NotHoisted) {

880

881

883 for (MachineLoop *L = MLI->getLoopFor(MI.getParent()); L != CurLoop;

884 L = L->getParentLoop())

886

887 while (!InnerLoopWorkList.empty()) {

888 MachineLoop *InnerLoop = InnerLoopWorkList.pop_back_val();

889 MachineBasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();

890 if (InnerLoopPreheader) {

891 HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);

892 if (HoistRes & HoistResult::Hoisted)

893 break;

894 }

895 }

896 }

897

898 if (HoistRes & HoistResult::ErasedMI)

899 continue;

900

901 UpdateRegPressure(&MI);

902 }

903

904

905 ExitScopeIfDone(Node, OpenChildren, ParentMap);

906 }

907}

908

912

913

914

915

916void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {

918

919

920

921

922

924 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

928 }

929

930 for (const MachineInstr &MI : *BB)

931 UpdateRegPressure(&MI, true);

932}

933

934

935void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,

936 bool ConsiderUnseenAsDef) {

937 auto Cost = calcRegisterCost(MI, true, ConsiderUnseenAsDef);

938 for (const auto &[Class, Weight] : Cost) {

939 if (static_cast<int>(RegPressure[Class]) < -Weight)

941 else

943 }

944}

945

946

947

948

949

950

951

952SmallDenseMap<unsigned, int>

953MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,

954 bool ConsiderUnseenAsDef) {

955 SmallDenseMap<unsigned, int> Cost;

956 if (MI->isImplicitDef())

958 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {

959 const MachineOperand &MO = MI->getOperand(i);

961 continue;

964 continue;

965

966

967 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;

968 const TargetRegisterClass *RC = MRI->getRegClass(Reg);

969

970 RegClassWeight W = TRI->getRegClassWeight(RC);

971 int RCCost = 0;

973 RCCost = W.RegWeight;

974 else {

976 if (isNew && !isKill && ConsiderUnseenAsDef)

977

978 RCCost = W.RegWeight;

979 else if (!isNew && isKill)

980 RCCost = -W.RegWeight;

981 }

982 if (RCCost == 0)

983 continue;

984 const int *PS = TRI->getRegClassPressureSets(RC);

985 for (; *PS != -1; ++PS)

986 Cost[*PS] += RCCost;

987 }

989}

990

991

992

994 assert(MI.mayLoad() && "Expected MI that loads!");

995

996

997

998 if (MI.memoperands_empty())

999 return true;

1000

1003 if (PSV->isGOT() || PSV->isConstantPool())

1004 return true;

1005

1006 return false;

1007}

1008

1009

1010

1011

1012

1013

1014

1015

1019

1020 bool FoundCallerPresReg = false;

1021 if (MI.mayStore() || MI.hasUnmodeledSideEffects() ||

1022 (MI.getNumOperands() == 0))

1023 return false;

1024

1025

1027 if (MO.isReg()) {

1029

1030

1031 if (Reg.isVirtual())

1033 if (Reg.isVirtual())

1034 return false;

1035 if (TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))

1036 return false;

1037 else

1038 FoundCallerPresReg = true;

1039 } else if (!MO.isImm()) {

1040 return false;

1041 }

1042 }

1043 return FoundCallerPresReg;

1044}

1045

1046

1047

1048

1049

1053

1054

1055

1056 if (MI.isCopy())

1057 return false;

1058

1060

1061 Register CopySrcReg = MI.getOperand(1).getReg();

1063 return false;

1064

1065 if (TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))

1066 return false;

1067

1068 Register CopyDstReg = MI.getOperand(0).getReg();

1069

1070 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");

1071

1074 return true;

1075 }

1076 return false;

1077}

1078

1079

1080

1081bool MachineLICMImpl::IsLICMCandidate(MachineInstr &I, MachineLoop *CurLoop) {

1082

1083 bool DontMoveAcrossStore = HoistConstLoads || !AllowedToHoistLoads[CurLoop];

1084 if ((I.isSafeToMove(DontMoveAcrossStore)) &&

1086 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");

1087 return false;

1088 }

1089

1090

1091

1092

1093

1094

1095

1097 !IsGuaranteedToExecute(I.getParent(), CurLoop)) {

1098 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");

1099 return false;

1100 }

1101

1102

1103

1104

1105

1106 if (I.isConvergent())

1107 return false;

1108

1110 return false;

1111

1112 return true;

1113}

1114

1115

1116bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,

1117 MachineLoop *CurLoop) {

1118 if (!IsLICMCandidate(I, CurLoop)) {

1119 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");

1120 return false;

1121 }

1123}

1124

1125

1126

1127bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,

1128 MachineLoop *CurLoop) {

1130 do {

1131 MI = Work.pop_back_val();

1132 for (const MachineOperand &MO : MI->all_defs()) {

1135 continue;

1136 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {

1137

1138 if (UseMI.isPHI()) {

1139

1140

1142 return true;

1143

1144

1145

1147 return true;

1148 continue;

1149 }

1150

1152 Work.push_back(&UseMI);

1153 }

1154 }

1155 } while (!Work.empty());

1156 return false;

1157}

1158

1159

1160

1161bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,

1163 MachineLoop *CurLoop) const {

1164 if (MRI->use_nodbg_empty(Reg))

1165 return false;

1166

1167 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {

1168 if (UseMI.isCopyLike())

1169 continue;

1171 continue;

1172 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {

1173 const MachineOperand &MO = UseMI.getOperand(i);

1175 continue;

1177 if (MOReg != Reg)

1178 continue;

1179

1181 return true;

1182 }

1183

1184

1185 break;

1186 }

1187

1188 return false;

1189}

1190

1191

1192

1193bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {

1195 return true;

1196

1197 bool isCheap = false;

1198 unsigned NumDefs = MI.getDesc().getNumDefs();

1199 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {

1200 MachineOperand &DefMO = MI.getOperand(i);

1201 if (!DefMO.isReg() || !DefMO.isDef())

1202 continue;

1203 --NumDefs;

1206 continue;

1207

1209 return false;

1210 isCheap = true;

1211 }

1212

1213 return isCheap;

1214}

1215

1216

1217

1218bool MachineLICMImpl::CanCauseHighRegPressure(

1219 const SmallDenseMap<unsigned, int> &Cost, bool CheapInstr) {

1220 for (const auto &[Class, Weight] : Cost) {

1221 if (Weight <= 0)

1222 continue;

1223

1224 int Limit = RegLimit[Class];

1225

1226

1227

1229 return true;

1230

1231 for (const auto &RP : BackTrace)

1232 if (static_cast<int>(RP[Class]) + Weight >= Limit)

1233 return true;

1234 }

1235

1236 return false;

1237}

1238

1239

1240

1241

1242void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {

1243

1244

1245 auto Cost = calcRegisterCost(MI, false,

1246 false);

1247

1248

1249 for (auto &RP : BackTrace)

1250 for (const auto &[Class, Weight] : Cost)

1252}

1253

1254

1255

1256bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,

1257 MachineLoop *CurLoop) {

1258 if (MI.isImplicitDef())

1259 return true;

1260

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1274 return true;

1275

1276 bool CheapInstr = IsCheapInstruction(MI);

1277 bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);

1278

1279

1280 if (CheapInstr && CreatesCopy) {

1281 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);

1282 return false;

1283 }

1284

1285

1286

1288 return true;

1289

1290

1291

1292 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {

1293 const MachineOperand &MO = MI.getOperand(i);

1295 continue;

1298 continue;

1299 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {

1301 ++NumHighLatency;

1302 return true;

1303 }

1304 }

1305

1306

1307

1308

1309

1310

1311

1312 auto Cost = calcRegisterCost(&MI, false,

1313 false);

1314

1315

1316

1317 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {

1319 ++NumLowRP;

1320 return true;

1321 }

1322

1323

1324 if (CreatesCopy) {

1325 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);

1326 return false;

1327 }

1328

1329

1330

1331

1333 (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {

1335 return false;

1336 }

1337

1338

1339

1340

1341 if (MI.isCopy() || MI.isRegSequence()) {

1342 Register DefReg = MI.getOperand(0).getReg();

1345 [this](const MachineOperand &UseOp) {

1346 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||

1347 MRI->isConstantPhysReg(UseOp.getReg());

1348 }) &&

1349 IsLoopInvariantInst(MI, CurLoop) &&

1350 any_of(MRI->use_nodbg_instructions(DefReg),

1351 [&CurLoop, this, DefReg,

1353 if (!CurLoop->contains(&UseMI))

1354 return false;

1355

1356

1357

1358

1359

1360 if (CanCauseHighRegPressure(Cost, false) &&

1361 !CurLoop->isLoopInvariant(UseMI, DefReg))

1362 return false;

1363

1364 return true;

1365 }))

1366 return true;

1367 }

1368

1369

1370

1372 MI.isDereferenceableInvariantLoad()) {

1373 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);

1374 return false;

1375 }

1376

1377 return true;

1378}

1379

1380

1381

1382

1383MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *MI,

1384 MachineLoop *CurLoop) {

1385

1386 if (MI->canFoldAsLoad())

1387 return nullptr;

1388

1389

1390

1391

1392 if (MI->isDereferenceableInvariantLoad())

1393 return nullptr;

1394

1395

1396 unsigned LoadRegIndex;

1397 unsigned NewOpc =

1399 true,

1400 false,

1401 &LoadRegIndex);

1402 if (NewOpc == 0) return nullptr;

1403 const MCInstrDesc &MID = TII->get(NewOpc);

1404 MachineFunction &MF = *MI->getMF();

1405 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex);

1406

1408

1409 SmallVector<MachineInstr *, 2> NewMIs;

1411 true,

1412 false, NewMIs);

1415 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "

1416 "succeeded!");

1418 "Unfolded a load into multiple instructions!");

1419 MachineBasicBlock *MBB = MI->getParent();

1423

1424

1425 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||

1426 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {

1427 NewMIs[0]->eraseFromParent();

1428 NewMIs[1]->eraseFromParent();

1429 return nullptr;

1430 }

1431

1432

1433 UpdateRegPressure(NewMIs[1]);

1434

1435

1436

1437

1438 if (MI->shouldUpdateAdditionalCallInfo())

1440

1441 MI->eraseFromParent();

1442 return NewMIs[0];

1443}

1444

1445

1446

1447

1448void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {

1449 for (MachineInstr &MI : *BB)

1451}

1452

1453

1454

1455void MachineLICMImpl::InitializeLoadsHoistableLoops() {

1458

1459

1460

1461 while (!Worklist.empty()) {

1462 auto *L = Worklist.pop_back_val();

1463 AllowedToHoistLoads[L] = true;

1466 }

1467

1468

1469

1470

1471

1472

1473

1474

1475 for (auto *Loop : reverse(LoopsInPreOrder)) {

1476 for (auto *MBB : Loop->blocks()) {

1477

1478 if (!AllowedToHoistLoads[Loop])

1479 continue;

1480 for (auto &MI : *MBB) {

1481 if (MI.isLoadFoldBarrier() && MI.mayStore() && MI.isCall() &&

1482 !(MI.mayLoad() && MI.hasOrderedMemoryRef()))

1483 continue;

1484 for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())

1485 AllowedToHoistLoads[L] = false;

1486 break;

1487 }

1488 }

1489 }

1490}

1491

1492

1493

1494MachineInstr *

1495MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,

1496 std::vector<MachineInstr *> &PrevMIs) {

1497 for (MachineInstr *PrevMI : PrevMIs)

1499 return PrevMI;

1500

1501 return nullptr;

1502}

1503

1504

1505

1506

1507

1508bool MachineLICMImpl::EliminateCSE(

1509 MachineInstr *MI,

1510 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {

1511

1512

1513 if (MI->isImplicitDef())

1514 return false;

1515

1516

1517

1518 if (MI->mayLoad() && MI->isDereferenceableInvariantLoad())

1519 return false;

1520

1521 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {

1523

1524

1525

1526 SmallVector<unsigned, 2> Defs;

1527 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {

1528 const MachineOperand &MO = MI->getOperand(i);

1529

1530

1532 MO.getReg() == Dup->getOperand(i).getReg()) &&

1533 "Instructions with different phys regs are not identical!");

1534

1537 }

1538

1540 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {

1541 unsigned Idx = Defs[i];

1543 Register DupReg = Dup->getOperand(Idx).getReg();

1544 OrigRCs.push_back(MRI->getRegClass(DupReg));

1545

1546 if (MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {

1547

1548 for (unsigned j = 0; j != i; ++j)

1549 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);

1550 return false;

1551 }

1552 }

1553

1554 for (unsigned Idx : Defs) {

1556 Register DupReg = Dup->getOperand(Idx).getReg();

1557 MRI->replaceRegWith(Reg, DupReg);

1558 MRI->clearKillFlags(DupReg);

1559

1560 if (MRI->use_nodbg_empty(DupReg))

1561 Dup->getOperand(Idx).setIsDead(false);

1562 }

1563

1564 MI->eraseFromParent();

1565 ++NumCSEed;

1566 return true;

1567 }

1568 return false;

1569}

1570

1571

1572

1573bool MachineLICMImpl::MayCSE(MachineInstr *MI) {

1574 if (MI->mayLoad() && MI->isDereferenceableInvariantLoad())

1575 return false;

1576

1577 unsigned Opcode = MI->getOpcode();

1578 for (auto &Map : CSEMap) {

1579

1581 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =

1582 Map.second.find(Opcode);

1583

1584

1585 if (CI == Map.second.end() || MI->isImplicitDef())

1586 continue;

1587 if (LookForDuplicate(MI, CI->second) != nullptr)

1588 return true;

1589 }

1590 }

1591

1592 return false;

1593}

1594

1595

1596

1597

1598unsigned MachineLICMImpl::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader,

1599 MachineLoop *CurLoop) {

1600 MachineBasicBlock *SrcBlock = MI->getParent();

1601

1602

1605 isTgtHotterThanSrc(SrcBlock, Preheader)) {

1606 ++NumNotHoistedDueToHotness;

1607 return HoistResult::NotHoisted;

1608 }

1609

1610 bool HasExtractHoistableLoad = false;

1611 if (!IsLoopInvariantInst(*MI, CurLoop) ||

1612 !IsProfitableToHoist(*MI, CurLoop)) {

1613

1614 MI = ExtractHoistableLoad(MI, CurLoop);

1615 if (MI)

1616 return HoistResult::NotHoisted;

1617 HasExtractHoistableLoad = true;

1618 }

1619

1620

1621

1622 if (MI->mayStore())

1623 NumStoreConst++;

1624

1625

1626

1628 dbgs() << "Hoisting " << *MI;

1629 if (MI->getParent()->getBasicBlock())

1633 dbgs() << "\n";

1634 });

1635

1636

1637

1638 if (FirstInLoop) {

1639 InitCSEMap(Preheader);

1640 FirstInLoop = false;

1641 }

1642

1643

1644 unsigned Opcode = MI->getOpcode();

1645 bool HasCSEDone = false;

1646 for (auto &Map : CSEMap) {

1647

1649 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =

1650 Map.second.find(Opcode);

1651 if (CI != Map.second.end()) {

1652 if (EliminateCSE(MI, CI)) {

1653 HasCSEDone = true;

1654 break;

1655 }

1656 }

1657 }

1658 }

1659

1660 if (!HasCSEDone) {

1661

1663

1664

1665

1666

1667 assert(MI->isDebugInstr() && "Should not hoist debug inst");

1669

1670

1671 UpdateBackTraceRegPressure(MI);

1672

1673

1674

1675

1676 for (MachineOperand &MO : MI->all_defs())

1678 MRI->clearKillFlags(MO.getReg());

1679

1680 CSEMap[Preheader][Opcode].push_back(MI);

1681 }

1682

1683 ++NumHoisted;

1685

1686 if (HasCSEDone || HasExtractHoistableLoad)

1687 return HoistResult::Hoisted | HoistResult::ErasedMI;

1688 return HoistResult::Hoisted;

1689}

1690

1691

1692MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {

1693

1694

1695 if (MachineBasicBlock *Preheader = CurLoop->getLoopPreheader())

1696 return Preheader;

1697

1698

1699

1701 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(

1702 CurLoop->getHeader(), LegacyPass, MFAM, nullptr, MDTU);

1703 if (NewPreheader)

1705 return NewPreheader;

1706 }

1707

1708 return nullptr;

1709}

1710

1711

1712

1713bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,

1714 MachineBasicBlock *TgtBlock) {

1715

1718

1719

1720 if (!SrcBF)

1721 return true;

1722

1723 double Ratio = (double)DstBF / SrcBF;

1724

1725

1727}

1728

1729template <typename DerivedT, bool PreRegAlloc>

1732 bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);

1737 return PA;

1738}