LLVM: lib/CodeGen/MachineLICM.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

53#include

54#include

55#include

56#include

57

58using namespace llvm;

59

60#define DEBUG_TYPE "machinelicm"

61

64 cl::desc("MachineLICM should avoid speculation"),

66

69 cl::desc("MachineLICM should hoist even cheap instructions"),

71

74 cl::desc("Hoist invariant stores"),

76

78 cl::desc("Hoist invariant loads"),

80

81

82

85 cl::desc("Do not hoist instructions if target"

86 "block is N times hotter than the source."),

88

90

93 cl::desc("Disable hoisting instructions to"

94 " hotter blocks"),

97 "disable the feature"),

99 "enable the feature when using profile data"),

101 "enable the feature with/wo profile data")));

102

104 "Number of machine instructions hoisted out of loops");

106 "Number of instructions hoisted in low reg pressure situation");

108 "Number of high latency instructions hoisted");

110 "Number of hoisted machine instructions CSEed");

112 "Number of machine instructions hoisted out of loops post regalloc");

114 "Number of stores of const phys reg hoisted out of loops");

116 "Number of instructions not hoisted due to block frequency");

117

118namespace {

119 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };

120

121 class MachineLICMImpl {

128 bool PreRegAlloc = false;

129 bool HasProfileData = false;

130 Pass *LegacyPass;

132

133

134 AliasAnalysis *AA = nullptr;

136 MachineLoopInfo *MLI = nullptr;

138

139

140 bool Changed = false;

141 bool FirstInLoop = false;

142

143

144

146

147

149

152 if (Inserted) {

155 It->second = std::move(ExitBlocks);

156 }

158 }

159

160

163

164

165

167

168

170

171

174 CSEMap;

175

176 enum {

177 SpeculateFalse = 0,

178 SpeculateTrue = 1,

179 SpeculateUnknown = 2

180 };

181

182

183

184

185 unsigned SpeculationState = SpeculateUnknown;

186

187 public:

188 MachineLICMImpl(bool PreRegAlloc, Pass *LegacyPass,

190 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {

191 assert((LegacyPass || MFAM) && "LegacyPass or MFAM must be provided");

192 assert(!(LegacyPass && MFAM) &&

193 "LegacyPass and MFAM cannot be provided at the same time");

194 }

195

197

198 void releaseMemory() {

201 RegLimit.clear();

202 BackTrace.clear();

203 CSEMap.clear();

204 ExitBlockMap.clear();

205 }

206

207 private:

208

209 struct CandidateInfo {

211 unsigned Def;

212 int FI;

213

214 CandidateInfo(MachineInstr *mi, unsigned def, int fi)

215 : MI(mi), Def(def), FI(fi) {}

216 };

217

218 void HoistRegionPostRA(MachineLoop *CurLoop,

220

223

228

230

232

234

236

239

241

243 bool Cheap);

244

245 void UpdateBackTraceRegPressure(const MachineInstr *MI);

246

248

250

251 bool isTriviallyReMaterializable(const MachineInstr &MI) const;

252

254

256

257 void ExitScopeIfDone(

261

264

266

268 bool ConsiderSeen,

269 bool ConsiderUnseenAsDef);

270

272 bool ConsiderUnseenAsDef = false);

273

275

277 std::vector<MachineInstr *> &PrevMIs);

278

279 bool

281 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);

282

284

287

289

290 void InitializeLoadsHoistableLoops();

291

296 };

297

299 bool PreRegAlloc;

300

301 public:

302 MachineLICMBase(char &ID, bool PreRegAlloc)

304

306

315 }

316 };

317

318 class MachineLICM : public MachineLICMBase {

319 public:

320 static char ID;

321 MachineLICM() : MachineLICMBase(ID, false) {

323 }

324 };

325

326 class EarlyMachineLICM : public MachineLICMBase {

327 public:

328 static char ID;

329 EarlyMachineLICM() : MachineLICMBase(ID, true) {

331 }

332 };

333

334}

335

336char MachineLICM::ID;

337char EarlyMachineLICM::ID;

338

341

343 "Machine Loop Invariant Code Motion", false, false)

350

359

360bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {

361 if (skipFunction(MF.getFunction()))

362 return false;

363

364 MachineLICMImpl Impl(PreRegAlloc, this, nullptr);

365 return Impl.run(MF);

366}

367

368#define GET_RESULT(RESULT, GETTER, INFIX) \

369 ((LegacyPass) \

370 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \

371 : &MFAM->getResult<RESULT##Analysis>(MF))

372

374 AA = MFAM != nullptr

376 .getManager()

380 MachineDomTreeUpdater::UpdateStrategy::Lazy);

381 MDTU = &DTU;

385 : nullptr;

386

387 Changed = FirstInLoop = false;

389 TII = ST.getInstrInfo();

390 TLI = ST.getTargetLowering();

391 TRI = ST.getRegisterInfo();

394 SchedModel.init(&ST);

395

397

398 if (PreRegAlloc)

399 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");

400 else

401 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");

403

404 if (PreRegAlloc) {

405

406 unsigned NumRPS = TRI->getNumRegPressureSets();

407 RegPressure.resize(NumRPS);

408 std::fill(RegPressure.begin(), RegPressure.end(), 0);

409 RegLimit.resize(NumRPS);

410 for (unsigned i = 0, e = NumRPS; i != e; ++i)

411 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);

412 }

413

415 InitializeLoadsHoistableLoops();

416

418 while (!Worklist.empty()) {

421

422 if (!PreRegAlloc)

423 HoistRegionPostRA(CurLoop, CurPreheader);

424 else {

425

426

428 FirstInLoop = true;

429 HoistOutOfLoop(N, CurLoop, CurPreheader);

430 CSEMap.clear();

431 }

432 }

433 releaseMemory();

434 return Changed;

435}

436

437

439

440

441 if (MI->mayStore())

442 return false;

443

444

445 if (MI->memoperands_empty())

446 return true;

448 if (MemOp->isStore() || MemOp->getPseudoValue())

449 continue;

451 dyn_cast(MemOp->getPseudoValue())) {

452 if (Value->getFrameIndex() == FI)

453 return true;

454 }

455 }

456 return false;

457}

458

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492 BitVector RUsFromRegsNotInMask(TRI.getNumRegUnits());

493 const unsigned NumRegs = TRI.getNumRegs();

494 const unsigned MaskWords = (NumRegs + 31) / 32;

495 for (unsigned K = 0; K < MaskWords; ++K) {

496 const uint32_t Word = Mask[K];

497 for (unsigned Bit = 0; Bit < 32; ++Bit) {

498 const unsigned PhysReg = (K * 32) + Bit;

499 if (PhysReg == NumRegs)

500 break;

501

502 if (PhysReg && !((Word >> Bit) & 1)) {

504 RUsFromRegsNotInMask.set(*RUI);

505 }

506 }

507 }

508

509 RUs |= RUsFromRegsNotInMask;

510}

511

512

513

519 bool RuledOut = false;

520 bool HasNonInvariantUse = false;

521 unsigned Def = 0;

523 if (MO.isFI()) {

524

525 int FI = MO.getIndex();

526 if (!StoredFIs.count(FI) &&

529 StoredFIs.insert(FI);

530 HasNonInvariantUse = true;

531 continue;

532 }

533

534

535

536 if (MO.isRegMask()) {

538 continue;

539 }

540

541 if (!MO.isReg())

542 continue;

544 if (!Reg)

545 continue;

546 assert(Reg.isPhysical() && "Not expecting virtual register!");

547

548 if (!MO.isDef()) {

549 if (!HasNonInvariantUse) {

551

552

553 if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {

554 HasNonInvariantUse = true;

555 break;

556 }

557 }

558 }

559 continue;

560 }

561

562 if (MO.isImplicit()) {

564 RUClobbers.set(*RUI);

565 if (!MO.isDead())

566

567 RuledOut = true;

568

569

570 continue;

571 }

572

573

574

575 if (Def)

576 RuledOut = true;

577 else

579

580

581

582

584 if (RUDefs.test(*RUI)) {

585 RUClobbers.set(*RUI);

586 RuledOut = true;

587 } else if (RUClobbers.test(*RUI)) {

588

589

590 RuledOut = true;

591 }

592

593 RUDefs.set(*RUI);

594 }

595 }

596

597

598

599 if (Def && !RuledOut) {

600 int FI = std::numeric_limits::min();

601 if ((!HasNonInvariantUse && IsLICMCandidate(*MI, CurLoop)) ||

603 Candidates.push_back(CandidateInfo(MI, Def, FI));

604 }

605}

606

607

608

609void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop,

611 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);

612 if (!Preheader)

613 return;

614

615 unsigned NumRegUnits = TRI->getNumRegUnits();

616 BitVector RUDefs(NumRegUnits);

617 BitVector RUClobbers(NumRegUnits);

618

621

622

623

625

626

628 if (ML && ML->getHeader()->isEHPad()) continue;

629

630

631

632

633 for (const auto &LI : BB->liveins()) {

635 RUDefs.set(*RUI);

636 }

637

638

639 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))

641

642 SpeculationState = SpeculateUnknown;

644 ProcessMI(&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);

645 }

646

647

650 if (TI != Preheader->end()) {

652 if (!MO.isReg())

653 continue;

655 if (!Reg)

656 continue;

658 TermRUs.set(*RUI);

659 }

660 }

661

662

663

664

665

666

667

668

669

670 for (CandidateInfo &Candidate : Candidates) {

671 if (Candidate.FI != std::numeric_limits::min() &&

672 StoredFIs.count(Candidate.FI))

673 continue;

674

675 unsigned Def = Candidate.Def;

676 bool Safe = true;

678 if (RUClobbers.test(*RUI) || TermRUs.test(*RUI)) {

679 Safe = false;

680 break;

681 }

682 }

683

684 if (!Safe)

685 continue;

686

689 if (!MO.getReg())

690 continue;

692 if (RUDefs.test(*RUI) || RUClobbers.test(*RUI)) {

693

694

695 Safe = false;

696 break;

697 }

698 }

699

700 if (!Safe)

701 break;

702 }

703

704 if (Safe)

705 HoistPostRA(MI, Candidate.Def, CurLoop, CurPreheader);

706 }

707}

708

709

710

713 if (!BB->isLiveIn(Reg))

714 BB->addLiveIn(Reg);

717 if (!MO.getReg())

718 continue;

719 if (TRI->regsOverlap(Reg, MO.getReg()))

720 MO.setIsKill(false);

721 }

722 }

723 }

724}

725

726

727

728void MachineLICMImpl::HoistPostRA(MachineInstr *MI, unsigned Def,

731 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);

732

733

734

737 << *MI);

738

739

742

743

744

745

746 assert(MI->isDebugInstr() && "Should not hoist debug inst");

748

749

750

751

752 AddToLiveIns(Def, CurLoop);

753

754 ++NumPostRAHoisted;

755 Changed = true;

756}

757

758

759

760bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,

762 if (SpeculationState != SpeculateUnknown)

763 return SpeculationState == SpeculateFalse;

764

765 if (BB != CurLoop->getHeader()) {

766

769 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)

771 SpeculationState = SpeculateTrue;

772 return false;

773 }

774 }

775

776 SpeculationState = SpeculateFalse;

777 return true;

778}

779

780

781

782

783

784bool MachineLICMImpl::isTriviallyReMaterializable(

786 if (TII->isTriviallyReMaterializable(MI))

787 return false;

788

790 if (MO.getReg().isVirtual())

791 return false;

792 }

793

794 return true;

795}

796

799

800

801 BackTrace.push_back(RegPressure);

802}

803

807}

808

809

810

811

812void MachineLICMImpl::ExitScopeIfDone(

816 if (OpenChildren[Node])

817 return;

818

819 for(;;) {

820 ExitScope(Node->getBlock());

821

823 if (!Parent || --OpenChildren[Parent] != 0)

824 break;

825 Node = Parent;

826 }

827}

828

829

830

831

832

836 MachineBasicBlock *Preheader = getCurPreheader(CurLoop, CurPreheader);

837 if (!Preheader)

838 return;

839

844

845

847 while (!WorkList.empty()) {

849 assert(Node && "Null dominator tree node?");

851

852

853

855 if (ML && ML->getHeader()->isEHPad())

856 continue;

857

858

860 continue;

861

863 unsigned NumChildren = Node->getNumChildren();

864

865

866

867

869 NumChildren = 0;

870

871 OpenChildren[Node] = NumChildren;

872 if (NumChildren) {

873

874

875

877 ParentMap[Child] = Node;

879 }

880 }

881 }

882

883 if (Scopes.size() == 0)

884 return;

885

886

888 BackTrace.clear();

889 InitRegPressure(Preheader);

890

891

894

895 EnterScope(MBB);

896

897

898 SpeculationState = SpeculateUnknown;

900 unsigned HoistRes = HoistResult::NotHoisted;

901 HoistRes = Hoist(&MI, Preheader, CurLoop);

902 if (HoistRes & HoistResult::NotHoisted) {

903

904

907 L = L->getParentLoop())

909

910 while (!InnerLoopWorkList.empty()) {

913 if (InnerLoopPreheader) {

914 HoistRes = Hoist(&MI, InnerLoopPreheader, InnerLoop);

915 if (HoistRes & HoistResult::Hoisted)

916 break;

917 }

918 }

919 }

920

921 if (HoistRes & HoistResult::ErasedMI)

922 continue;

923

924 UpdateRegPressure(&MI);

925 }

926

927

928 ExitScopeIfDone(Node, OpenChildren, ParentMap);

929 }

930}

931

934}

935

936

937

938

941

942

943

944

945

951 }

952

954 UpdateRegPressure(&MI, true);

955}

956

957

958void MachineLICMImpl::UpdateRegPressure(const MachineInstr *MI,

959 bool ConsiderUnseenAsDef) {

960 auto Cost = calcRegisterCost(MI, true, ConsiderUnseenAsDef);

961 for (const auto &RPIdAndCost : Cost) {

962 unsigned Class = RPIdAndCost.first;

963 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)

965 else

967 }

968}

969

970

971

972

973

974

975

977MachineLICMImpl::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,

978 bool ConsiderUnseenAsDef) {

980 if (MI->isImplicitDef())

982 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {

985 continue;

987 if (Reg.isVirtual())

988 continue;

989

990

991 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;

993

995 int RCCost = 0;

997 RCCost = W.RegWeight;

998 else {

1000 if (isNew && !isKill && ConsiderUnseenAsDef)

1001

1002 RCCost = W.RegWeight;

1003 else if (!isNew && isKill)

1004 RCCost = -W.RegWeight;

1005 }

1006 if (RCCost == 0)

1007 continue;

1008 const int *PS = TRI->getRegClassPressureSets(RC);

1009 for (; *PS != -1; ++PS)

1010 Cost[*PS] += RCCost;

1011 }

1012 return Cost;

1013}

1014

1015

1016

1018 assert(MI.mayLoad() && "Expected MI that loads!");

1019

1020

1021

1022 if (MI.memoperands_empty())

1023 return true;

1024

1027 if (PSV->isGOT() || PSV->isConstantPool())

1028 return true;

1029

1030 return false;

1031}

1032

1033

1034

1035

1036

1037

1038

1039

1043

1044 bool FoundCallerPresReg = false;

1045 if (MI.mayStore() || MI.hasUnmodeledSideEffects() ||

1046 (MI.getNumOperands() == 0))

1047 return false;

1048

1049

1051 if (MO.isReg()) {

1053

1054

1055 if (Reg.isVirtual())

1056 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);

1057 if (Reg.isVirtual())

1058 return false;

1059 if (TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))

1060 return false;

1061 else

1062 FoundCallerPresReg = true;

1063 } else if (!MO.isImm()) {

1064 return false;

1065 }

1066 }

1067 return FoundCallerPresReg;

1068}

1069

1070

1071

1072

1073

1077

1078

1079

1080 if (MI.isCopy())

1081 return false;

1082

1084

1085 Register CopySrcReg = MI.getOperand(1).getReg();

1087 return false;

1088

1089 if (TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))

1090 return false;

1091

1092 Register CopyDstReg = MI.getOperand(0).getReg();

1093

1094 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");

1095

1098 return true;

1099 }

1100 return false;

1101}

1102

1103

1104

1106

1107 bool DontMoveAcrossStore = HoistConstLoads || !AllowedToHoistLoads[CurLoop];

1108 if ((I.isSafeToMove(DontMoveAcrossStore)) &&

1110 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");

1111 return false;

1112 }

1113

1114

1115

1116

1117

1118

1119

1121 !IsGuaranteedToExecute(I.getParent(), CurLoop)) {

1122 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");

1123 return false;

1124 }

1125

1126

1127

1128

1129

1130 if (I.isConvergent())

1131 return false;

1132

1133 if (TII->shouldHoist(I, CurLoop))

1134 return false;

1135

1136 return true;

1137}

1138

1139

1140bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &I,

1142 if (!IsLICMCandidate(I, CurLoop)) {

1143 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");

1144 return false;

1145 }

1147}

1148

1149

1150

1151bool MachineLICMImpl::HasLoopPHIUse(const MachineInstr *MI,

1154 do {

1155 MI = Work.pop_back_val();

1158 if (Reg.isVirtual())

1159 continue;

1161

1162 if (UseMI.isPHI()) {

1163

1164

1166 return true;

1167

1168

1169

1171 return true;

1172 continue;

1173 }

1174

1176 Work.push_back(&UseMI);

1177 }

1178 }

1179 } while (!Work.empty());

1180 return false;

1181}

1182

1183

1184

1185bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,

1188 if (MRI->use_nodbg_empty(Reg))

1189 return false;

1190

1192 if (UseMI.isCopyLike())

1193 continue;

1195 continue;

1196 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {

1199 continue;

1201 if (MOReg != Reg)

1202 continue;

1203

1204 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))

1205 return true;

1206 }

1207

1208

1209 break;

1210 }

1211

1212 return false;

1213}

1214

1215

1216

1217bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {

1219 return true;

1220

1221 bool isCheap = false;

1222 unsigned NumDefs = MI.getDesc().getNumDefs();

1223 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {

1225 if (!DefMO.isReg() || !DefMO.isDef())

1226 continue;

1227 --NumDefs;

1229 if (Reg.isPhysical())

1230 continue;

1231

1232 if (TII->hasLowDefLatency(SchedModel, MI, i))

1233 return false;

1234 isCheap = true;

1235 }

1236

1237 return isCheap;

1238}

1239

1240

1241

1242bool MachineLICMImpl::CanCauseHighRegPressure(

1244 for (const auto &RPIdAndCost : Cost) {

1245 if (RPIdAndCost.second <= 0)

1246 continue;

1247

1248 unsigned Class = RPIdAndCost.first;

1249 int Limit = RegLimit[Class];

1250

1251

1252

1254 return true;

1255

1256 for (const auto &RP : BackTrace)

1257 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)

1258 return true;

1259 }

1260

1261 return false;

1262}

1263

1264

1265

1266

1267void MachineLICMImpl::UpdateBackTraceRegPressure(const MachineInstr *MI) {

1268

1269

1270 auto Cost = calcRegisterCost(MI, false,

1271 false);

1272

1273

1274 for (auto &RP : BackTrace)

1275 for (const auto &RPIdAndCost : Cost)

1276 RP[RPIdAndCost.first] += RPIdAndCost.second;

1277}

1278

1279

1280

1281bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &MI,

1283 if (MI.isImplicitDef())

1284 return true;

1285

1286

1287

1288

1289

1290

1291

1292

1293

1294

1295

1296

1297

1299 return true;

1300

1301 bool CheapInstr = IsCheapInstruction(MI);

1302 bool CreatesCopy = HasLoopPHIUse(&MI, CurLoop);

1303

1304

1305 if (CheapInstr && CreatesCopy) {

1306 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);

1307 return false;

1308 }

1309

1310

1311

1312 if (isTriviallyReMaterializable(MI))

1313 return true;

1314

1315

1316

1317 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {

1320 continue;

1322 if (Reg.isVirtual())

1323 continue;

1324 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg, CurLoop)) {

1326 ++NumHighLatency;

1327 return true;

1328 }

1329 }

1330

1331

1332

1333

1334

1335

1336

1337 auto Cost = calcRegisterCost(&MI, false,

1338 false);

1339

1340

1341

1342 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {

1344 ++NumLowRP;

1345 return true;

1346 }

1347

1348

1349 if (CreatesCopy) {

1350 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);

1351 return false;

1352 }

1353

1354

1355

1356

1358 (!IsGuaranteedToExecute(MI.getParent(), CurLoop) && !MayCSE(&MI))) {

1360 return false;

1361 }

1362

1363

1364

1365

1366 if (MI.isCopy() || MI.isRegSequence()) {

1367 Register DefReg = MI.getOperand(0).getReg();

1371 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||

1372 MRI->isConstantPhysReg(UseOp.getReg());

1373 }) &&

1374 IsLoopInvariantInst(MI, CurLoop) &&

1375 any_of(MRI->use_nodbg_instructions(DefReg),

1376 [&CurLoop, this, DefReg,

1378 if (!CurLoop->contains(&UseMI))

1379 return false;

1380

1381

1382

1383

1384

1385 if (CanCauseHighRegPressure(Cost, false) &&

1386 !CurLoop->isLoopInvariant(UseMI, DefReg))

1387 return false;

1388

1389 return true;

1390 }))

1391 return true;

1392 }

1393

1394

1395

1396 if (!isTriviallyReMaterializable(MI) &&

1397 MI.isDereferenceableInvariantLoad()) {

1398 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);

1399 return false;

1400 }

1401

1402 return true;

1403}

1404

1405

1406

1407

1410

1411 if (MI->canFoldAsLoad())

1412 return nullptr;

1413

1414

1415

1416

1417 if (MI->isDereferenceableInvariantLoad())

1418 return nullptr;

1419

1420

1421 unsigned LoadRegIndex;

1422 unsigned NewOpc =

1423 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),

1424 true,

1425 false,

1426 &LoadRegIndex);

1427 if (NewOpc == 0) return nullptr;

1431

1433

1435 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,

1436 true,

1437 false, NewMIs);

1440 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "

1441 "succeeded!");

1443 "Unfolded a load into multiple instructions!");

1448

1449

1450 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||

1451 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {

1452 NewMIs[0]->eraseFromParent();

1453 NewMIs[1]->eraseFromParent();

1454 return nullptr;

1455 }

1456

1457

1458 UpdateRegPressure(NewMIs[1]);

1459

1460

1461

1462

1463 if (MI->shouldUpdateAdditionalCallInfo())

1465

1466 MI->eraseFromParent();

1467 return NewMIs[0];

1468}

1469

1470

1471

1472

1476}

1477

1478

1479

1480void MachineLICMImpl::InitializeLoadsHoistableLoops() {

1483

1484

1485

1486 while (!Worklist.empty()) {

1487 auto *L = Worklist.pop_back_val();

1488 AllowedToHoistLoads[L] = true;

1490 Worklist.insert(Worklist.end(), L->getSubLoops().begin(),

1491 L->getSubLoops().end());

1492 }

1493

1494

1495

1496

1497

1498

1499

1500

1501 for (auto *Loop : reverse(LoopsInPreOrder)) {

1503

1504 if (!AllowedToHoistLoads[Loop])

1505 continue;

1506 for (auto &MI : *MBB) {

1507 if (MI.isLoadFoldBarrier() && MI.mayStore() && MI.isCall() &&

1508 !(MI.mayLoad() && MI.hasOrderedMemoryRef()))

1509 continue;

1511 AllowedToHoistLoads[L] = false;

1512 break;

1513 }

1514 }

1515 }

1516}

1517

1518

1519

1521MachineLICMImpl::LookForDuplicate(const MachineInstr *MI,

1522 std::vector<MachineInstr *> &PrevMIs) {

1524 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))

1525 return PrevMI;

1526

1527 return nullptr;

1528}

1529

1530

1531

1532

1533

1534bool MachineLICMImpl::EliminateCSE(

1536 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {

1537

1538

1539 if (MI->isImplicitDef())

1540 return false;

1541

1542

1543

1544 if (MI->mayLoad() && MI->isDereferenceableInvariantLoad())

1545 return false;

1546

1547 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {

1549

1550

1551

1553 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {

1555

1556

1558 MO.getReg() == Dup->getOperand(i).getReg()) &&

1559 "Instructions with different phys regs are not identical!");

1560

1563 }

1564

1566 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {

1567 unsigned Idx = Defs[i];

1569 Register DupReg = Dup->getOperand(Idx).getReg();

1570 OrigRCs.push_back(MRI->getRegClass(DupReg));

1571

1572 if (MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {

1573

1574 for (unsigned j = 0; j != i; ++j)

1575 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);

1576 return false;

1577 }

1578 }

1579

1580 for (unsigned Idx : Defs) {

1582 Register DupReg = Dup->getOperand(Idx).getReg();

1583 MRI->replaceRegWith(Reg, DupReg);

1584 MRI->clearKillFlags(DupReg);

1585

1586 if (MRI->use_nodbg_empty(DupReg))

1587 Dup->getOperand(Idx).setIsDead(false);

1588 }

1589

1590 MI->eraseFromParent();

1591 ++NumCSEed;

1592 return true;

1593 }

1594 return false;

1595}

1596

1597

1598

1600 if (MI->mayLoad() && MI->isDereferenceableInvariantLoad())

1601 return false;

1602

1603 unsigned Opcode = MI->getOpcode();

1604 for (auto &Map : CSEMap) {

1605

1608 Map.second.find(Opcode);

1609

1610

1611 if (CI == Map.second.end() || MI->isImplicitDef())

1612 continue;

1613 if (LookForDuplicate(MI, CI->second) != nullptr)

1614 return true;

1615 }

1616 }

1617

1618 return false;

1619}

1620

1621

1622

1623

1627

1628

1631 isTgtHotterThanSrc(SrcBlock, Preheader)) {

1632 ++NumNotHoistedDueToHotness;

1633 return HoistResult::NotHoisted;

1634 }

1635

1636 bool HasExtractHoistableLoad = false;

1637 if (!IsLoopInvariantInst(*MI, CurLoop) ||

1638 !IsProfitableToHoist(*MI, CurLoop)) {

1639

1640 MI = ExtractHoistableLoad(MI, CurLoop);

1641 if (MI)

1642 return HoistResult::NotHoisted;

1643 HasExtractHoistableLoad = true;

1644 }

1645

1646

1647

1648 if (MI->mayStore())

1649 NumStoreConst++;

1650

1651

1652

1654 dbgs() << "Hoisting " << *MI;

1655 if (MI->getParent()->getBasicBlock())

1659 dbgs() << "\n";

1660 });

1661

1662

1663

1664 if (FirstInLoop) {

1665 InitCSEMap(Preheader);

1666 FirstInLoop = false;

1667 }

1668

1669

1670 unsigned Opcode = MI->getOpcode();

1671 bool HasCSEDone = false;

1672 for (auto &Map : CSEMap) {

1673

1676 Map.second.find(Opcode);

1677 if (CI != Map.second.end()) {

1678 if (EliminateCSE(MI, CI)) {

1679 HasCSEDone = true;

1680 break;

1681 }

1682 }

1683 }

1684 }

1685

1686 if (!HasCSEDone) {

1687

1689

1690

1691

1692

1693 assert(MI->isDebugInstr() && "Should not hoist debug inst");

1695

1696

1697 UpdateBackTraceRegPressure(MI);

1698

1699

1700

1701

1704 MRI->clearKillFlags(MO.getReg());

1705

1706 CSEMap[Preheader][Opcode].push_back(MI);

1707 }

1708

1709 ++NumHoisted;

1710 Changed = true;

1711

1712 if (HasCSEDone || HasExtractHoistableLoad)

1713 return HoistResult::Hoisted | HoistResult::ErasedMI;

1714 return HoistResult::Hoisted;

1715}

1716

1717

1719MachineLICMImpl::getCurPreheader(MachineLoop *CurLoop,

1721

1722

1723

1724

1726 return nullptr;

1727

1728 if (!CurPreheader) {

1730 if (!CurPreheader) {

1732 if (!Pred) {

1734 return nullptr;

1735 }

1736

1738 MFAM, nullptr, MDTU);

1739 if (!CurPreheader) {

1741 return nullptr;

1742 }

1743 }

1744 }

1745 return CurPreheader;

1746}

1747

1748

1749

1750bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,

1752

1755

1756

1757 if (!SrcBF)

1758 return true;

1759

1760 double Ratio = (double)DstBF / SrcBF;

1761

1762

1764}

1765

1766template <typename DerivedT, bool PreRegAlloc>

1769 bool Changed = MachineLICMImpl(PreRegAlloc, nullptr, &MFAM).run(MF);

1770 if (!Changed)

1774 return PA;

1775}

1776

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

This file implements the BitVector class.

COFF::MachineTypes Machine

Analysis containing CSE Info

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines the DenseMap class.

const HexagonInstrInfo * TII

static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)

Return true if the specified block is in the list.

Machine Loop Invariant Code false early machinelicm

static cl::opt< bool > HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden)

#define GET_RESULT(RESULT, GETTER, INFIX)

static cl::opt< UseBFI > DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", cl::desc("Disable hoisting instructions to" " hotter blocks"), cl::init(UseBFI::PGO), cl::Hidden, cl::values(clEnumValN(UseBFI::None, "none", "disable the feature"), clEnumValN(UseBFI::PGO, "pgo", "enable the feature when using profile data"), clEnumValN(UseBFI::All, "all", "enable the feature with/wo profile data")))

static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)

Return true if this machine instruction loads from global offset table or constant pool.

static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)

static cl::opt< bool > HoistConstLoads("hoist-const-loads", cl::desc("Hoist invariant loads"), cl::init(true), cl::Hidden)

Machine Loop Invariant Code Motion

static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)

static bool InstructionStoresToFI(const MachineInstr *MI, int FI)

Return true if instruction stores to the specified frame.

static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI)

static void applyBitsNotInRegMaskToRegUnitsMask(const TargetRegisterInfo &TRI, BitVector &RUs, const uint32_t *Mask)

static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)

static bool isInvariantStore(const MachineInstr &MI, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI)

static cl::opt< unsigned > BlockFrequencyRatioThreshold("block-freq-ratio-threshold", cl::desc("Do not hoist instructions if target" "block is N times hotter than the source."), cl::init(100), cl::Hidden)

unsigned const TargetRegisterInfo * TRI

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This file describes how to lower LLVM code to machine code.

A manager for alias analyses.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

AAResults & getAAResults()

A container for analyses that lazily runs them and caches their results.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

bool test(unsigned Idx) const

uint64_t getFrequency() const

Returns the frequency as a fixpoint number scaled by the entry frequency.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

Base class for the actual dominator tree node.

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index.

bool hasProfileData(bool IncludeSynthetic=false) const

Return true if the function is annotated with profile data.

DomTreeT & getDomTree()

Flush DomTree updates and return DomTree.

Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override

TargetInstrInfo overrides.

bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

bool isAsCheapAsAMove(const MachineInstr &MI) const override

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const

Return all of the successor blocks of this loop.

void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const

Return all blocks inside the loop that have successors outside of the loop.

BlockT * getHeader() const

iterator_range< block_iterator > blocks() const

BlockT * getLoopPredecessor() const

If the given loop's header has exactly one unique predecessor outside the loop, return it.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

ArrayRef< BlockT * > getBlocks() const

Get a list of the basic blocks which make up this loop.

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

Represents a single loop in the control flow graph.

Describe properties that are true of each instruction in the target description file.

bool isValid() const

Returns true if this iterator is not yet at the end.

Wrapper class representing physical registers. Should be passed by value.

unsigned pred_size() const

MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P, std::vector< SparseBitVector<> > *LiveInSets=nullptr, MachineDomTreeUpdater *MDTU=nullptr)

Split the critical edge from this block to the given successor block, and return the newly created bl...

instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

unsigned succ_size() const

pred_iterator pred_begin()

void splice(iterator Where, MachineBasicBlock *Other, iterator From)

Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...

MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...

BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const

getblockFreq - Return block frequency.

Analysis pass which computes a MachineDominatorTree.

DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...

bool dominates(const MachineInstr *A, const MachineInstr *B) const

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

bool isSpillSlotObjectIndex(int ObjectIdx) const

Returns true if the specified index corresponds to a spill slot.

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

void eraseAdditionalCallInfo(const MachineInstr *MI)

Following functions update call site info.

Representation of each machine instruction.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Analysis pass that exposes the MachineLoopInfo for a machine function.

bool isLoopInvariant(MachineInstr &I, const Register ExcludeReg=0) const

Returns true if the instruction is loop invariant.

A description of a memory reference used in the backend.

MachineOperand class - Representation of each machine instruction operand.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

bool isImm() const

isImm - Tests if this is a MO_Immediate operand.

Register getReg() const

getReg - Returns the register number.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

AnalysisType & getAnalysis() const

getAnalysis() - This function is used by subclasses to get to the analysis information ...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Special value supplied for machine level alias analysis.

Wrapper class representing virtual and physical registers.

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

Implements a dense probed hash-table based set with some number of buckets stored inline.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

TargetInstrInfo - Interface to description of machine instruction set.

This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

Provide an instruction scheduling machine model to CodeGen passes.

void init(const TargetSubtargetInfo *TSInfo)

Initialize the machine model for instruction scheduling.

TargetSubtargetInfo - Generic base class for all target subtargets.

LLVM Value Representation.

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

Reg

All possible values of the reg field in the ModR/M byte.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

char & EarlyMachineLICMID

This pass performs loop invariant code motion on machine instructions.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void initializeMachineLICMPass(PassRegistry &)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

char & MachineLICMID

This pass performs loop invariant code motion on machine instructions.

void initializeEarlyMachineLICMPass(PassRegistry &)

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...