LLVM: lib/CodeGen/ModuloSchedule.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

22

23#define DEBUG_TYPE "pipeliner"

24using namespace llvm;

25

28 cl::desc("Swap target blocks of a conditional branch for MVE expander"));

29

34

35

36

37

38

39

40

43 assert(Phi.isPHI() && "Expecting a Phi.");

44

47 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

48 if (Phi.getOperand(i + 1).getMBB() != Loop)

49 InitVal = Phi.getOperand(i).getReg();

50 else

51 LoopVal = Phi.getOperand(i).getReg();

52

53 assert(InitVal && LoopVal && "Unexpected Phi structure.");

54}

55

56

58 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

59 if (Phi.getOperand(i + 1).getMBB() != LoopBB)

60 return Phi.getOperand(i).getReg();

62}

63

64

66 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

67 if (Phi.getOperand(i + 1).getMBB() == LoopBB)

68 return Phi.getOperand(i).getReg();

70}

71

73 BB = Schedule.getLoop()->getTopBlock();

74 Preheader = *BB->pred_begin();

75 if (Preheader == BB)

76 Preheader = *std::next(BB->pred_begin());

77

78

79

81 int DefStage = Schedule.getStage(MI);

84 unsigned MaxDiff = 0;

85 bool PhiIsSwapped = false;

88 int UseStage = Schedule.getStage(UseMI);

89 unsigned Diff = 0;

90 if (UseStage != -1 && UseStage >= DefStage)

91 Diff = UseStage - DefStage;

92 if (MI->isPHI()) {

93 if (isLoopCarried(*MI))

94 ++Diff;

95 else

96 PhiIsSwapped = true;

97 }

98 MaxDiff = std::max(Diff, MaxDiff);

99 }

100 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);

101 }

102 }

103

104 generatePipelinedLoop();

105}

106

107void ModuloScheduleExpander::generatePipelinedLoop() {

108 LoopInfo = TII->analyzeLoopForPipelining(BB);

110

111

113

114 unsigned MaxStageCount = Schedule.getNumStages() - 1;

115

116

117

118

119

120 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];

121

122

123

124

125 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];

126

127 InstrMapTy InstrMap;

128

130

131

132 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);

135

136

137

139 if (CI->isPHI())

140 continue;

141 unsigned StageNum = Schedule.getStage(CI);

143 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);

146 InstrMap[NewMI] = CI;

147 }

148

149

150

153 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);

156 InstrMap[NewMI] = &MI;

157 }

158

159 NewKernel = KernelBB;

162

163 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,

164 InstrMap, MaxStageCount, MaxStageCount, false);

165 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,

166 InstrMap, MaxStageCount, MaxStageCount, false);

167

169

170 SmallVector<MachineBasicBlock *, 4> EpilogBBs;

171

172 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,

173 PrologBBs);

174

175

176

177 splitLifetimes(KernelBB, EpilogBBs);

178

179

180 removeDeadInstructions(KernelBB, EpilogBBs);

181

182

183 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

184

185 delete[] VRMap;

186 delete[] VRMapPhi;

187}

188

190

191 for (auto &I : *BB)

192 LIS.RemoveMachineInstrFromMaps(I);

193 BB->clear();

194 BB->eraseFromParent();

195}

196

197

198void ModuloScheduleExpander::generateProlog(unsigned LastStage,

200 ValueMapTy *VRMap,

201 MBBVectorTy &PrologBBs) {

203 InstrMapTy InstrMap;

204

205

206

207

208 for (unsigned i = 0; i < LastStage; ++i) {

209

210

216 PredBB = NewBB;

218

219

220

221 for (int StageNum = i; StageNum >= 0; --StageNum) {

224 BBI != BBE; ++BBI) {

225 if (Schedule.getStage(&*BBI) == StageNum) {

226 if (BBI->isPHI())

227 continue;

229 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);

230 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);

233 InstrMap[NewMI] = &*BBI;

234 }

235 }

236 }

237 rewritePhiValues(NewBB, i, VRMap, InstrMap);

239 dbgs() << "prolog:\n";

240 NewBB->dump();

241 });

242 }

243

245

246

247

248 unsigned numBranches = TII->removeBranch(*Preheader);

249 if (numBranches) {

251 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());

252 }

253}

254

255

256

257

258void ModuloScheduleExpander::generateEpilog(

260 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,

261 MBBVectorTy &PrologBBs) {

262

263

264 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;

266 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);

267 assert(!checkBranch && "generateEpilog must be able to analyze the branch");

268 if (checkBranch)

269 return;

270

272 if (*LoopExitI == KernelBB)

273 ++LoopExitI;

274 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");

275 MachineBasicBlock *LoopExitBB = *LoopExitI;

276

277 MachineBasicBlock *PredBB = KernelBB;

278 MachineBasicBlock *EpilogStart = LoopExitBB;

279 InstrMapTy InstrMap;

280

281

282

283

284 int EpilogStage = LastStage + 1;

285 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {

286 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();

288 MF.insert(BB->getIterator(), NewBB);

289

292 LIS.insertMBBInMaps(NewBB);

293

294 if (EpilogStart == LoopExitBB)

295 EpilogStart = NewBB;

296

297

298

299 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {

300 for (auto &BBI : *BB) {

301 if (BBI.isPHI())

302 continue;

303 MachineInstr *In = &BBI;

304 if ((unsigned)Schedule.getStage(In) == StageNum) {

305

306

307 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);

308 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);

310 LIS.InsertMachineInstrInMaps(*NewMI);

311 InstrMap[NewMI] = In;

312 }

313 }

314 }

315 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,

316 InstrMap, LastStage, EpilogStage, i == 1);

317 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,

318 InstrMap, LastStage, EpilogStage, i == 1);

319 PredBB = NewBB;

320

322 dbgs() << "epilog:\n";

323 NewBB->dump();

324 });

325 }

326

327

329

330

331

332 TII->removeBranch(*KernelBB);

333 assert((OrigBB == TBB || OrigBB == FBB) &&

334 "Unable to determine looping branch direction");

335 if (OrigBB != TBB)

336 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());

337 else

338 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());

339

340 if (EpilogBBs.size() > 0) {

341 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();

343 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());

344 }

345}

346

347

348

354 if (O.getParent()->getParent() != MBB)

355 O.setReg(ToReg);

356}

357

358

359

363 if (MO.getParent()->getParent() != BB)

364 return true;

365 return false;

366}

367

368

369

370

371void ModuloScheduleExpander::generateExistingPhis(

373 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,

374 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {

375

376

377

378 unsigned PrologStage = 0;

379 unsigned PrevStage = 0;

380 bool InKernel = (LastStageNum == CurStageNum);

381 if (InKernel) {

382 PrologStage = LastStageNum - 1;

383 PrevStage = CurStageNum;

384 } else {

385 PrologStage = LastStageNum - (CurStageNum - LastStageNum);

386 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;

387 }

388

390 BBE = BB->getFirstNonPHI();

391 BBI != BBE; ++BBI) {

392 Register Def = BBI->getOperand(0).getReg();

393

396 getPhiRegs(*BBI, BB, InitVal, LoopVal);

397

399

400

402 if (auto It = VRMap[LastStageNum].find(LoopVal);

403 It != VRMap[LastStageNum].end())

404 PhiOp2 = It->second;

405

406 int StageScheduled = Schedule.getStage(&*BBI);

407 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));

408 unsigned NumStages = getStagesForReg(Def, CurStageNum);

409 if (NumStages == 0) {

410

411

412 Register NewReg = VRMap[PrevStage][LoopVal];

413 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,

414 InitVal, NewReg);

415 auto It = VRMap[CurStageNum].find(LoopVal);

416 if (It != VRMap[CurStageNum].end()) {

418 VRMap[CurStageNum][Def] = Reg;

419 }

420 }

421

422

423

424

425 unsigned MaxPhis = PrologStage + 2;

426 if (!InKernel && (int)PrologStage <= LoopValStage)

427 MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);

428 unsigned NumPhis = std::min(NumStages, MaxPhis);

429

431 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;

432

433

434

435

436

437 int StageDiff = 0;

438 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&

439 NumPhis == 1)

440 StageDiff = 1;

441

442

443 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)

444 StageDiff = StageScheduled - LoopValStage;

445 for (unsigned np = 0; np < NumPhis; ++np) {

446

447

448

449 if (np > PrologStage || StageScheduled >= (int)LastStageNum)

450 PhiOp1 = InitVal;

451

452 else if (PrologStage >= AccessStage + StageDiff + np &&

453 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)

454 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];

455

456

457 else if (PrologStage >= AccessStage + StageDiff + np) {

458

459

460 PhiOp1 = LoopVal;

461 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);

462 int Indirects = 1;

463 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {

464 int PhiStage = Schedule.getStage(InstOp1);

465 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)

467 else

469 InstOp1 = MRI.getVRegDef(PhiOp1);

470 int PhiOpStage = Schedule.getStage(InstOp1);

471 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);

472 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np) {

473 auto &M = VRMap[PrologStage - StageAdj - Indirects - np];

474 if (auto It = M.find(PhiOp1); It != M.end()) {

475 PhiOp1 = It->second;

476 break;

477 }

478 }

479 ++Indirects;

480 }

481 } else

482 PhiOp1 = InitVal;

483

484

485 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))

486 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)

488

489 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);

490 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();

491

492

493

494 if (!InKernel) {

495 int StageDiffAdj = 0;

496 if (LoopValStage != -1 && StageScheduled > LoopValStage)

497 StageDiffAdj = StageScheduled - LoopValStage;

498

499

500 if (np == 0 && PrevStage == LastStageNum &&

501 (StageScheduled != 0 || LoopValStage != 0) &&

502 VRMap[PrevStage - StageDiffAdj].count(LoopVal))

503 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];

504

505

506 else if (np > 0 && PrevStage == LastStageNum &&

507 VRMap[PrevStage - np + 1].count(Def))

508 PhiOp2 = VRMap[PrevStage - np + 1][Def];

509

510 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&

511 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))

512 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];

513

514

515 else if (VRMap[PrevStage - np].count(Def) &&

516 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||

517 (LoopValStage == StageScheduled)))

518 PhiOp2 = VRMap[PrevStage - np][Def];

519 }

520

521

522

523

524

525 if (LoopDefIsPhi) {

526 if (static_cast<int>(PrologStage - np) >= StageScheduled) {

527 int LVNumStages = getStagesForPhi(LoopVal);

528 int StageDiff = (StageScheduled - LoopValStage);

529 LVNumStages -= StageDiff;

530

531 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {

532 NewReg = PhiOp2;

533 unsigned ReuseStage = CurStageNum;

534 if (isLoopCarried(*PhiInst))

535 ReuseStage -= LVNumStages;

536

537

538 if (VRMap[ReuseStage - np].count(LoopVal)) {

539 NewReg = VRMap[ReuseStage - np][LoopVal];

540

541 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,

542 Def, NewReg);

543

544 VRMap[CurStageNum - np][Def] = NewReg;

545 PhiOp2 = NewReg;

546 if (VRMap[LastStageNum - np - 1].count(LoopVal))

547 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

548

549 if (IsLast && np == NumPhis - 1)

551 continue;

552 }

553 }

554 }

555 if (InKernel && StageDiff > 0 &&

556 VRMap[CurStageNum - StageDiff - np].count(LoopVal))

557 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];

558 }

559

560 const TargetRegisterClass *RC = MRI.getRegClass(Def);

561 NewReg = MRI.createVirtualRegister(RC);

562

563 MachineInstrBuilder NewPhi =

565 TII->get(TargetOpcode::PHI), NewReg);

568 LIS.InsertMachineInstrInMaps(*NewPhi);

569 if (np == 0)

570 InstrMap[NewPhi] = &*BBI;

571

572

573

574

576 if (InKernel && VRMap[PrevStage - np].count(LoopVal))

577 PrevReg = VRMap[PrevStage - np][LoopVal];

578 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,

579 NewReg, PrevReg);

580

581 if (VRMap[CurStageNum - np].count(Def)) {

583 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,

584 NewReg);

585 }

586

587

588

589

590 if (IsLast && np == NumPhis - 1)

592

593

594 if (InKernel)

595 PhiOp2 = NewReg;

596

597

598 VRMap[CurStageNum - np][Def] = NewReg;

599 }

600

601 while (NumPhis++ < NumStages) {

602 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,

603 NewReg, 0);

604 }

605

606

607

608 if (NumStages == 0 && IsLast) {

609 auto &CurStageMap = VRMap[CurStageNum];

610 auto It = CurStageMap.find(LoopVal);

611 if (It != CurStageMap.end())

613 }

614 }

615}

616

617

618

619

620void ModuloScheduleExpander::generatePhis(

622 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,

623 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,

624 bool IsLast) {

625

626

627 unsigned PrologStage = 0;

628 unsigned PrevStage = 0;

629 unsigned StageDiff = CurStageNum - LastStageNum;

630 bool InKernel = (StageDiff == 0);

631 if (InKernel) {

632 PrologStage = LastStageNum - 1;

633 PrevStage = CurStageNum;

634 } else {

635 PrologStage = LastStageNum - StageDiff;

636 PrevStage = LastStageNum + StageDiff - 1;

637 }

638

640 BBE = BB->instr_end();

641 BBI != BBE; ++BBI) {

642 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {

643 MachineOperand &MO = BBI->getOperand(i);

645 continue;

646

647 int StageScheduled = Schedule.getStage(&*BBI);

648 assert(StageScheduled != -1 && "Expecting scheduled instruction.");

650 unsigned NumPhis = getStagesForReg(Def, CurStageNum);

651

652

653

654 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&

656 NumPhis = 1;

657 if (!InKernel && (unsigned)StageScheduled > PrologStage)

658 continue;

659

661 if (InKernel) {

662 PhiOp2 = VRMap[PrevStage][Def];

663 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))

664 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)

666 }

667

668

669 if (NumPhis > PrologStage + 1 - StageScheduled)

670 NumPhis = PrologStage + 1 - StageScheduled;

671 for (unsigned np = 0; np < NumPhis; ++np) {

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694 Register PhiOp1 = VRMap[PrologStage][Def];

695 if (np <= PrologStage)

696 PhiOp1 = VRMap[PrologStage - np][Def];

697 if (!InKernel) {

698 if (PrevStage == LastStageNum && np == 0)

699 PhiOp2 = VRMap[LastStageNum][Def];

700 else

701 PhiOp2 = VRMapPhi[PrevStage - np][Def];

702 }

703

704 const TargetRegisterClass *RC = MRI.getRegClass(Def);

705 Register NewReg = MRI.createVirtualRegister(RC);

706

707 MachineInstrBuilder NewPhi =

709 TII->get(TargetOpcode::PHI), NewReg);

712 LIS.InsertMachineInstrInMaps(*NewPhi);

713 if (np == 0)

714 InstrMap[NewPhi] = &*BBI;

715

716

717

718 if (InKernel) {

719 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,

720 NewReg);

721 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,

722 NewReg);

723

724 PhiOp2 = NewReg;

725 VRMapPhi[PrevStage - np - 1][Def] = NewReg;

726 } else {

727 VRMapPhi[CurStageNum - np][Def] = NewReg;

728 if (np == NumPhis - 1)

729 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,

730 NewReg);

731 }

732 if (IsLast && np == NumPhis - 1)

734 }

735 }

736 }

737}

738

739

740

741

742

743void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,

744 MBBVectorTy &EpilogBBs) {

745

746

750 MI != ME;) {

751

752 if (MI->isInlineAsm()) {

753 ++MI;

754 continue;

755 }

756 bool SawStore = false;

757

758

759 if (MI->isSafeToMove(SawStore) && MI->isPHI()) {

760 ++MI;

761 continue;

762 }

763 bool used = true;

764 for (const MachineOperand &MO : MI->all_defs()) {

766

769 if (used)

770 break;

771 continue;

772 }

773 unsigned realUses = 0;

774 for (const MachineOperand &U : MRI.use_operands(reg)) {

775

776

777 if (U.getParent()->getParent() != BB) {

778 realUses++;

779 used = true;

780 break;

781 }

782 }

783 if (realUses > 0)

784 break;

785 used = false;

786 }

787 if (!used) {

788 LIS.RemoveMachineInstrFromMaps(*MI);

789 MI++->eraseFromParent();

790 continue;

791 }

792 ++MI;

793 }

794

795

797 Register reg = MI.getOperand(0).getReg();

798 if (MRI.use_begin(reg) == MRI.use_end()) {

799 LIS.RemoveMachineInstrFromMaps(MI);

800 MI.eraseFromParent();

801 }

802 }

803}

804

805

806

807

808

809

810

811

812

813

814

815void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,

816 MBBVectorTy &EpilogBBs) {

817 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

818 for (auto &PHI : KernelBB->phis()) {

820

821

823 E = MRI.use_instr_end();

824 I != E; ++I) {

825 if (I->isPHI() && I->getParent() == KernelBB) {

826

828 if (!LCDef)

829 continue;

830 MachineInstr *MI = MRI.getVRegDef(LCDef);

831 if (MI || MI->getParent() != KernelBB || MI->isPHI())

832 continue;

833

834

838 if (BBJ.readsRegister(Def, nullptr)) {

839

840 if (!SplitReg) {

841 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));

842 MachineInstr *newCopy =

843 BuildMI(*KernelBB, MI, MI->getDebugLoc(),

844 TII->get(TargetOpcode::COPY), SplitReg)

846 LIS.InsertMachineInstrInMaps(*newCopy);

847 }

848 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);

849 }

850 if (!SplitReg)

851 continue;

852

853 for (auto &Epilog : EpilogBBs)

855 if (I.readsRegister(Def, nullptr))

856 I.substituteRegister(Def, SplitReg, 0, *TRI);

857 break;

858 }

859 }

860 }

861}

862

863

864

865

866void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,

867 MBBVectorTy &PrologBBs,

869 MBBVectorTy &EpilogBBs,

870 ValueMapTy *VRMap) {

871 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");

872 MachineBasicBlock *LastPro = KernelBB;

873 MachineBasicBlock *LastEpi = KernelBB;

874

875

876

877 unsigned MaxIter = PrologBBs.size() - 1;

878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {

879

880

881 MachineBasicBlock *Prolog = PrologBBs[j];

882 MachineBasicBlock *Epilog = EpilogBBs[i];

883

885 std::optional StaticallyGreater =

886 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);

887 unsigned numAdded = 0;

888 if (!StaticallyGreater) {

891 } else if (*StaticallyGreater == false) {

893 Prolog->removeSuccessor(LastPro);

896 Epilog->removePHIsIncomingValuesForPredecessor(*LastEpi);

897

898 if (LastPro != LastEpi) {

899 for (auto &MI : *LastEpi)

900 LIS.RemoveMachineInstrFromMaps(MI);

901 LastEpi->clear();

902 LastEpi->eraseFromParent();

903 }

904 if (LastPro == KernelBB) {

905 LoopInfo->disposed(&LIS);

906 NewKernel = nullptr;

907 }

908 for (auto &MI : *LastPro)

909 LIS.RemoveMachineInstrFromMaps(MI);

910 LastPro->clear();

911 LastPro->eraseFromParent();

912 } else {

913 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());

914 Epilog->removePHIsIncomingValuesForPredecessor(*Prolog);

915 }

919 E = Prolog->instr_rend();

920 I != E && numAdded > 0; ++I, --numAdded)

921 updateInstruction(&*I, false, j, 0, VRMap);

922 }

923

924 if (NewKernel) {

925 LoopInfo->setPreheader(PrologBBs[MaxIter]);

926 LoopInfo->adjustTripCount(-(MaxIter + 1));

927 }

928}

929

930

931

932bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {

933 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();

934 const MachineOperand *BaseOp;

936 bool OffsetIsScalable;

937 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))

938 return false;

939

940

941 if (OffsetIsScalable)

942 return false;

943

944 if (!BaseOp->isReg())

945 return false;

946

948

949 MachineRegisterInfo &MRI = MF.getRegInfo();

950

951 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);

952 if (BaseDef && BaseDef->isPHI()) {

954 BaseDef = MRI.getVRegDef(BaseReg);

955 }

956 if (!BaseDef)

957 return false;

958

959 int D = 0;

960 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)

961 return false;

962

963 Delta = D;

964 return true;

965}

966

967

968

969

970void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,

972 unsigned Num) {

973 if (Num == 0)

974 return;

975

976

978 return;

980 for (MachineMemOperand *MMO : NewMI.memoperands()) {

981

982 if (MMO->isVolatile() || MMO->isAtomic() ||

983 (MMO->isInvariant() && MMO->isDereferenceable()) ||

984 (!MMO->getValue())) {

986 continue;

987 }

988 unsigned Delta;

989 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {

990 int64_t AdjOffset = Delta * Num;

992 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));

993 } else {

994 NewMMOs.push_back(MF.getMachineMemOperand(

996 }

997 }

999}

1000

1001

1002

1004 unsigned CurStageNum,

1005 unsigned InstStageNum) {

1006 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);

1007 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);

1008 return NewMI;

1009}

1010

1011

1012

1013

1014MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(

1015 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {

1016 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);

1017 auto It = InstrChanges.find(OldMI);

1018 if (It != InstrChanges.end()) {

1019 std::pair<Register, int64_t> RegAndOffset = It->second;

1020 unsigned BasePos, OffsetPos;

1021 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))

1022 return nullptr;

1024 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);

1025 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)

1026 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);

1028 }

1029 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);

1030 return NewMI;

1031}

1032

1033

1034

1035void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,

1036 bool LastDef,

1037 unsigned CurStageNum,

1038 unsigned InstrStageNum,

1039 ValueMapTy *VRMap) {

1040 for (MachineOperand &MO : NewMI->operands()) {

1042 continue;

1044 if (MO.isDef()) {

1045

1046 const TargetRegisterClass *RC = MRI.getRegClass(reg);

1047 Register NewReg = MRI.createVirtualRegister(RC);

1049 VRMap[CurStageNum][reg] = NewReg;

1050 if (LastDef)

1052 } else if (MO.isUse()) {

1053 MachineInstr *Def = MRI.getVRegDef(reg);

1054

1055 int DefStageNum = Schedule.getStage(Def);

1056 unsigned StageNum = CurStageNum;

1057 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {

1058

1059 unsigned StageDiff = (InstrStageNum - DefStageNum);

1060

1061 StageNum -= StageDiff;

1062 }

1063 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())

1064 MO.setReg(It->second);

1065 }

1066 }

1067}

1068

1069

1070

1071

1073 SmallPtrSet<MachineInstr *, 8> Visited;

1074 MachineInstr *Def = MRI.getVRegDef(Reg);

1075 while (Def->isPHI()) {

1076 if (!Visited.insert(Def).second)

1077 break;

1078 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)

1079 if (Def->getOperand(i + 1).getMBB() == BB) {

1080 Def = MRI.getVRegDef(Def->getOperand(i).getReg());

1081 break;

1082 }

1083 }

1084 return Def;

1085}

1086

1087

1088Register ModuloScheduleExpander::getPrevMapVal(

1089 unsigned StageNum, unsigned PhiStage, Register LoopVal, unsigned LoopStage,

1092 if (StageNum > PhiStage) {

1093 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);

1094 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))

1095

1096 PrevVal = VRMap[StageNum - 1][LoopVal];

1097 else if (VRMap[StageNum].count(LoopVal))

1098

1099

1100 PrevVal = VRMap[StageNum][LoopVal];

1101 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)

1102

1103 PrevVal = LoopVal;

1104 else if (StageNum == PhiStage + 1)

1105

1107 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)

1108

1109 PrevVal =

1110 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),

1111 LoopStage, VRMap, BB);

1112 }

1113 return PrevVal;

1114}

1115

1116

1117

1118

1119

1120void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,

1121 unsigned StageNum,

1122 ValueMapTy *VRMap,

1123 InstrMapTy &InstrMap) {

1124 for (auto &PHI : BB->phis()) {

1128 Register PhiDef = PHI.getOperand(0).getReg();

1129

1130 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));

1131 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));

1132 unsigned NumPhis = getStagesForPhi(PhiDef);

1133 if (NumPhis > StageNum)

1134 NumPhis = StageNum;

1135 for (unsigned np = 0; np <= NumPhis; ++np) {

1137 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);

1138 if (!NewVal)

1139 NewVal = InitVal;

1140 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,

1141 NewVal);

1142 }

1143 }

1144}

1145

1146

1147

1148

1149void ModuloScheduleExpander::rewriteScheduledInstr(

1150 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,

1153 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);

1154 int StagePhi = Schedule.getStage(Phi) + PhiNum;

1155

1156

1157 for (MachineOperand &UseOp :

1159 MachineInstr *UseMI = UseOp.getParent();

1161 continue;

1164 continue;

1166 continue;

1167 }

1169 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");

1170 MachineInstr *OrigMI = OrigInstr->second;

1171 int StageSched = Schedule.getStage(OrigMI);

1172 int CycleSched = Schedule.getCycle(OrigMI);

1174

1175 if (StagePhi == StageSched && Phi->isPHI()) {

1176 int CyclePhi = Schedule.getCycle(Phi);

1177 if (PrevReg && InProlog)

1178 ReplaceReg = PrevReg;

1179 else if (PrevReg && !isLoopCarried(*Phi) &&

1180 (CyclePhi <= CycleSched || OrigMI->isPHI()))

1181 ReplaceReg = PrevReg;

1182 else

1183 ReplaceReg = NewReg;

1184 }

1185

1186

1187 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))

1188 ReplaceReg = NewReg;

1189 if (StagePhi > StageSched && Phi->isPHI())

1190 ReplaceReg = NewReg;

1191 if (!InProlog && Phi->isPHI() && StagePhi < StageSched)

1192 ReplaceReg = NewReg;

1193 if (ReplaceReg) {

1194 const TargetRegisterClass *NRC =

1195 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));

1196 if (NRC)

1197 UseOp.setReg(ReplaceReg);

1198 else {

1199 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));

1201 TII->get(TargetOpcode::COPY), SplitReg)

1202 .addReg(ReplaceReg);

1203 UseOp.setReg(SplitReg);

1204 LIS.InsertMachineInstrInMaps(*newCopy);

1205 }

1206 }

1207 }

1208}

1209

1210bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {

1211 if (Phi.isPHI())

1212 return false;

1213 int DefCycle = Schedule.getCycle(&Phi);

1214 int DefStage = Schedule.getStage(&Phi);

1215

1218 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);

1219 MachineInstr *Use = MRI.getVRegDef(LoopVal);

1220 if (!Use || Use->isPHI())

1221 return true;

1222 int LoopCycle = Schedule.getCycle(Use);

1223 int LoopStage = Schedule.getStage(Use);

1224 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);

1225}

1226

1227

1228

1229

1230

1231

1232

1233

1234namespace {

1235

1236

1238 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {

1244 if (MRI.use_empty(MI.getOperand(0).getReg())) {

1245 if (LIS)

1247 MI.eraseFromParent();

1249 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {

1251 MRI.constrainRegClass(MI.getOperand(1).getReg(),

1252 MRI.getRegClass(MI.getOperand(0).getReg()));

1253 assert(ConstrainRegClass &&

1254 "Expected a valid constrained register class!");

1255 (void)ConstrainRegClass;

1256 MRI.replaceRegWith(MI.getOperand(0).getReg(),

1257 MI.getOperand(1).getReg());

1258 if (LIS)

1260 MI.eraseFromParent();

1262 }

1263 }

1264 }

1265}

1266

1267

1268

1269class KernelRewriter {

1270 ModuloSchedule &S;

1271 MachineBasicBlock *BB;

1272 MachineBasicBlock *PreheaderBB, *ExitBB;

1273 MachineRegisterInfo &MRI;

1274 const TargetInstrInfo *TII;

1275 LiveIntervals *LIS;

1276

1277

1278 DenseMap<const TargetRegisterClass *, Register> Undefs;

1279

1280

1281 DenseMap<std::pair<Register, Register>, Register> Phis;

1282

1283 DenseMap<Register, Register> UndefPhis;

1284

1285

1286

1288

1289

1290

1292 const TargetRegisterClass *RC = nullptr);

1293

1294 Register undef(const TargetRegisterClass *RC);

1295

1296public:

1297 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,

1298 LiveIntervals *LIS = nullptr);

1299 void rewrite();

1300};

1301}

1302

1305 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),

1306 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),

1307 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {

1309 if (PreheaderBB == BB)

1310 PreheaderBB = *std::next(BB->pred_begin());

1311}

1312

1313void KernelRewriter::rewrite() {

1314

1315

1316

1317

1319 MachineInstr *FirstMI = nullptr;

1321 if (MI->isPHI())

1322 continue;

1323 if (MI->getParent())

1324 MI->removeFromParent();

1326 if (!FirstMI)

1327 FirstMI = MI;

1328 }

1329 assert(FirstMI && "Failed to find first MI in schedule");

1330

1331

1332

1334 if (LIS)

1336 (I++)->eraseFromParent();

1337 }

1338

1339

1340 for (MachineInstr &MI : *BB) {

1341 if (MI.isPHI() || MI.isTerminator())

1342 continue;

1343 for (MachineOperand &MO : MI.uses()) {

1345 continue;

1348 }

1349 }

1350 EliminateDeadPhis(BB, MRI, LIS);

1351

1352

1353

1354

1355

1356 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {

1357 if (MI->isPHI()) {

1358 Register R = MI->getOperand(0).getReg();

1360 continue;

1361 }

1362

1363 for (MachineOperand &Def : MI->defs()) {

1364 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {

1365 if (MI.getParent() != BB) {

1367 break;

1368 }

1369 }

1370 }

1371 }

1372}

1373

1376 if (!Producer)

1377 return Reg;

1378

1379 int ConsumerStage = S.getStage(&MI);

1381

1382

1383 if (Producer->getParent() != BB)

1384

1385 return Reg;

1386 int ProducerStage = S.getStage(Producer);

1387 assert(ConsumerStage != -1 &&

1388 "In-loop consumer should always be scheduled!");

1389 assert(ConsumerStage >= ProducerStage);

1390 unsigned StageDiff = ConsumerStage - ProducerStage;

1391

1392 for (unsigned I = 0; I < StageDiff; ++I)

1394 return Reg;

1395 }

1396

1397

1398

1401 auto LoopProducer = Producer;

1402 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {

1405 LoopProducer = MRI.getUniqueVRegDef(LoopReg);

1406 assert(LoopProducer);

1407 }

1408 int LoopProducerStage = S.getStage(LoopProducer);

1409

1410 std::optional IllegalPhiDefault;

1411

1412 if (LoopProducerStage == -1) {

1413

1414 } else if (LoopProducerStage > ConsumerStage) {

1415

1416

1417

1418

1419#ifndef NDEBUG

1420 int LoopProducerCycle = S.getCycle(LoopProducer);

1421 int ConsumerCycle = S.getCycle(&MI);

1422#endif

1423 assert(LoopProducerCycle <= ConsumerCycle);

1424 assert(LoopProducerStage == ConsumerStage + 1);

1425

1426

1427

1428

1429

1430

1431 IllegalPhiDefault = Defaults.front();

1433 } else {

1434 assert(ConsumerStage >= LoopProducerStage);

1435 int StageDiff = ConsumerStage - LoopProducerStage;

1436 if (StageDiff > 0) {

1437 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()

1438 << " to " << (Defaults.size() + StageDiff) << "\n");

1439

1440

1441

1442 Defaults.resize(Defaults.size() + StageDiff,

1443 Defaults.empty() ? std::optional()

1444 : Defaults.back());

1445 }

1446 }

1447

1448

1449 auto DefaultI = Defaults.rbegin();

1450 while (DefaultI != Defaults.rend())

1451 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));

1452

1453 if (IllegalPhiDefault) {

1454

1455

1456

1457

1458

1459 auto RC = MRI.getRegClass(Reg);

1461 MachineInstr *IllegalPhi =

1463 .addReg(*IllegalPhiDefault)

1464 .addMBB(PreheaderBB)

1466 .addMBB(BB);

1467

1468

1469 S.setStage(IllegalPhi, LoopProducerStage);

1470 return R;

1471 }

1472

1473 return LoopReg;

1474}

1475

1476Register KernelRewriter::phi(Register LoopReg, std::optional InitReg,

1477 const TargetRegisterClass *RC) {

1478

1479 if (InitReg) {

1480 auto I = Phis.find({LoopReg, *InitReg});

1481 if (I != Phis.end())

1482 return I->second;

1483 } else {

1484 for (auto &KV : Phis) {

1485 if (KV.first.first == LoopReg)

1486 return KV.second;

1487 }

1488 }

1489

1490

1491

1492 auto I = UndefPhis.find(LoopReg);

1493 if (I != UndefPhis.end()) {

1495 if (!InitReg)

1496

1497

1498 return R;

1499

1500 MachineInstr *MI = MRI.getVRegDef(R);

1501 MI->getOperand(1).setReg(*InitReg);

1502 Phis.insert({{LoopReg, *InitReg}, R});

1503 const TargetRegisterClass *ConstrainRegClass =

1504 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));

1505 assert(ConstrainRegClass && "Expected a valid constrained register class!");

1506 (void)ConstrainRegClass;

1508 return R;

1509 }

1510

1511

1512 if (!RC)

1513 RC = MRI.getRegClass(LoopReg);

1515 if (InitReg) {

1516 const TargetRegisterClass *ConstrainRegClass =

1517 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));

1518 assert(ConstrainRegClass && "Expected a valid constrained register class!");

1519 (void)ConstrainRegClass;

1520 }

1522 .addReg(InitReg ? *InitReg : undef(RC))

1523 .addMBB(PreheaderBB)

1526 if (!InitReg)

1527 UndefPhis[LoopReg] = R;

1528 else

1529 Phis[{LoopReg, *InitReg}] = R;

1530 return R;

1531}

1532

1533Register KernelRewriter::undef(const TargetRegisterClass *RC) {

1535 if (R == 0) {

1536

1537

1538

1539 R = MRI.createVirtualRegister(RC);

1540 auto *InsertBB = &PreheaderBB->getParent()->front();

1541 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),

1542 TII->get(TargetOpcode::IMPLICIT_DEF), R);

1543 }

1544 return R;

1545}

1546

1547namespace {

1548

1549

1550

1551class KernelOperandInfo {

1552 MachineBasicBlock *BB;

1553 MachineRegisterInfo &MRI;

1555 MachineOperand *Source;

1556 MachineOperand *Target;

1557

1558public:

1559 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,

1560 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)

1564 while (isRegInLoop(MO)) {

1565 MachineInstr *MI = MRI.getVRegDef(MO->getReg());

1566 if (MI->isFullCopy()) {

1567 MO = &MI->getOperand(1);

1568 continue;

1569 }

1570 if (MI->isPHI())

1571 break;

1572

1573 if (IllegalPhis.count(MI)) {

1574 MO = &MI->getOperand(3);

1575 continue;

1576 }

1577

1579 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)

1580 : &MI->getOperand(3);

1582 }

1584 }

1585

1587 return PhiDefaults.size() == Other.PhiDefaults.size();

1588 }

1589

1590 void print(raw_ostream &OS) const {

1591 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "

1592 << *Source->getParent();

1593 }

1594

1595private:

1596 bool isRegInLoop(MachineOperand *MO) {

1598 MRI.getVRegDef(MO->getReg())->getParent() == BB;

1599 }

1600};

1601}

1602

1603MachineBasicBlock *

1608 else

1610 for (auto I = BB->begin(), NI = NewBB->begin(); I->isTerminator();

1611 ++I, ++NI) {

1616 }

1617 return NewBB;

1618}

1619

1621 int MinStage) {

1623 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {

1626 if (Stage == -1 || Stage >= MinStage)

1627 continue;

1628

1632

1633

1636 MI->getParent());

1638 }

1639 for (auto &Sub : Subs)

1640 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,

1641 *MRI.getTargetRegisterInfo());

1642 }

1643 if (LIS)

1644 LIS->RemoveMachineInstrFromMaps(*MI);

1645 MI->eraseFromParent();

1646 }

1647}

1648

1655 if (MI.isPHI()) {

1656

1657

1659

1660

1661 Register PhiR = MI.getOperand(0).getReg();

1662 auto RC = MRI.getRegClass(PhiR);

1663 Register NR = MRI.createVirtualRegister(RC);

1665 DebugLoc(), TII->get(TargetOpcode::PHI), NR)

1670 Remaps[PhiR] = NR;

1671 }

1672 }

1674 continue;

1675 MI.removeFromParent();

1676 DestBB->insert(InsertPt, &MI);

1679 BlockMIs.erase({SourceBB, KernelMI});

1680 }

1683 assert(MI.getNumOperands() == 3);

1685

1686

1687 if (getStage(Def) == Stage) {

1688 Register PhiReg = MI.getOperand(0).getReg();

1689 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),

1690 nullptr) != -1);

1691 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());

1692 MI.getOperand(0).setReg(PhiReg);

1694 }

1695 }

1696 for (auto *P : PhiToDelete)

1697 P->eraseFromParent();

1699

1700

1703 DestBB->insert(InsertPt, NewMI);

1704 Register OrigR = Phi->getOperand(0).getReg();

1705 Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));

1709 Remaps[OrigR] = R;

1713 return R;

1714 };

1717 if (!MO.isReg())

1718 continue;

1719 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())

1720 MO.setReg(It->second);

1721 else {

1722

1723

1725 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {

1728 }

1729 }

1730 }

1731 }

1732}

1733

1740 for (unsigned I = 0; I < distance; ++I) {

1743 unsigned LoopRegIdx = 3, InitRegIdx = 1;

1745 std::swap(LoopRegIdx, InitRegIdx);

1746 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();

1747 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);

1748 }

1749 return CanonicalUseReg;

1750}

1751

1757

1758

1759 LS.reset();

1760 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {

1761 LS[I] = true;

1765 }

1766

1767

1768

1769

1770

1771

1772

1774 EliminateDeadPhis(ExitingBB, MRI, LIS, true);

1775

1776

1777

1778

1779

1780

1781

1782

1783

1784

1785

1786

1787

1788

1789

1790

1791 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {

1795

1796

1797 EliminateDeadPhis(B, MRI, LIS, true);

1800 }

1801 for (size_t I = 0; I < Epilogs.size(); I++) {

1802 LS.reset();

1803 for (size_t J = I; J < Epilogs.size(); J++) {

1804 int Iteration = J;

1805 unsigned Stage = Schedule.getNumStages() - 1 + I - J;

1806

1807 for (size_t K = Iteration; K > I; K--)

1809 LS[Stage] = true;

1810 }

1813 }

1814

1815

1816

1817

1818 auto PI = Prologs.begin();

1819 auto EI = Epilogs.begin();

1821 for (; PI != Prologs.end(); ++PI, ++EI) {

1823 (*PI)->addSuccessor(*EI);

1825 Register Reg = MI.getOperand(1).getReg();

1827 if (Use && Use->getParent() == Pred) {

1829 if (CanonicalUse->isPHI()) {

1830

1831

1832

1834 }

1836 }

1839 }

1840 }

1841

1842

1847

1848

1850 for (auto I = B->instr_rbegin();

1851 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {

1854 }

1855 }

1857 if (LIS)

1858 LIS->RemoveMachineInstrFromMaps(*MI);

1859 MI->eraseFromParent();

1860 }

1862

1863

1865 EliminateDeadPhis(B, MRI, LIS);

1866 EliminateDeadPhis(ExitingBB, MRI, LIS);

1867}

1868

1872 if (Exit == BB)

1873 Exit = *std::next(BB->succ_begin());

1874

1876 MF.insert(std::next(BB->getIterator()), NewBB);

1877

1878

1880 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());

1881 Register OldR = MI.getOperand(3).getReg();

1882 Register R = MRI.createVirtualRegister(RC);

1885 if (Use.getParent() != BB)

1888 Use->substituteRegister(OldR, R, 0,

1889 *MRI.getTargetRegisterInfo());

1895 }

1896 BB->replaceSuccessor(Exit, NewBB);

1897 Exit->replacePhiUsesWith(BB, NewBB);

1899

1902 bool CanAnalyzeBr = TII->analyzeBranch(*BB, TBB, FBB, Cond);

1903 (void)CanAnalyzeBr;

1904 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");

1905 TII->removeBranch(*BB);

1906 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,

1908 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());

1909 return NewBB;

1910}

1911

1916 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, nullptr);

1918}

1919

1921 if (MI->isPHI()) {

1922

1923

1924 Register PhiR = MI->getOperand(0).getReg();

1925 Register R = MI->getOperand(3).getReg();

1926 int RMIStage = getStage(MRI.getUniqueVRegDef(R));

1927 if (RMIStage != -1 && AvailableStages[MI->getParent()].test(RMIStage))

1928 R = MI->getOperand(1).getReg();

1929 MRI.setRegClass(R, MRI.getRegClass(PhiR));

1930 MRI.replaceRegWith(PhiR, R);

1931

1932

1933 MI->getOperand(0).setReg(PhiR);

1935 return;

1936 }

1937

1939 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||

1941

1942 return;

1943

1947

1948

1951 MI->getParent());

1953 }

1954 for (auto &Sub : Subs)

1955 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,

1956 *MRI.getTargetRegisterInfo());

1957 }

1958 if (LIS)

1959 LIS->RemoveMachineInstrFromMaps(*MI);

1960 MI->eraseFromParent();

1961}

1962

1964

1965 bool KernelDisposed = false;

1966 int TC = Schedule.getNumStages() - 1;

1968 ++PI, ++EI, --TC) {

1974 std::optional StaticallyGreater =

1976 if (!StaticallyGreater) {

1977 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");

1978

1980 } else if (*StaticallyGreater == false) {

1981 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");

1982

1983

1984 Prolog->removeSuccessor(Fallthrough);

1986 P.removeOperand(2);

1987 P.removeOperand(1);

1988 }

1990 KernelDisposed = true;

1991 } else {

1992 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");

1993

1996 P.removeOperand(4);

1997 P.removeOperand(3);

1998 }

1999 }

2000 }

2001

2002 if (!KernelDisposed) {

2005 } else {

2007 }

2008}

2009

2014

2016 BB = Schedule.getLoop()->getTopBlock();

2021

2025}

2026

2028 BB = Schedule.getLoop()->getTopBlock();

2030

2031

2032

2033 std::string ScheduleDump;

2037

2038

2039

2040 assert(LIS && "Requires LiveIntervals!");

2045 if (!ExpandedKernel) {

2046

2048 return;

2049 }

2050

2052

2053

2055 KR.rewrite();

2057

2058

2059

2061 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {

2062 if (NI->isPHI())

2063 IllegalPhis.insert(&*NI);

2064 }

2065

2066

2067

2069 auto OI = ExpandedKernel->begin();

2070 auto NI = BB->begin();

2071 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {

2072 while (OI->isPHI() || OI->isFullCopy())

2073 ++OI;

2074 while (NI->isPHI() || NI->isFullCopy())

2075 ++NI;

2076 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");

2077

2078 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();

2079 OOpI != OI->operands_end(); ++OOpI, ++NOpI)

2080 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),

2081 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));

2082 }

2083

2084 bool Failed = false;

2085 for (auto &OldAndNew : KOIs) {

2086 if (OldAndNew.first == OldAndNew.second)

2087 continue;

2089 errs() << "Modulo kernel validation error: [\n";

2090 errs() << " [golden] ";

2091 OldAndNew.first.print(errs());

2092 errs() << " ";

2093 OldAndNew.second.print(errs());

2094 errs() << "]\n";

2095 }

2096

2098 errs() << "Golden reference kernel:\n";

2100 errs() << "New kernel:\n";

2102 errs() << ScheduleDump;

2104 "Modulo kernel validation (-pipeliner-experimental-cg) failed");

2105 }

2106

2107

2108

2111}

2112

2114 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);

2115

2116

2118

2119 return NewMI;

2120}

2121

2122

2123

2124

2128 if (Exit->pred_size() == 1)

2129 return Exit;

2130

2133

2136 MF->insert(Loop->getIterator(), NewExit);

2138

2143 FBB = NewExit;

2144 else if (FBB == Loop)

2145 TBB = NewExit;

2146 else

2148 TII->removeBranch(*Loop);

2150 Loop->replaceSuccessor(Exit, NewExit);

2151 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());

2153

2154 Exit->replacePhiUsesWith(Loop, NewExit);

2155

2156 return NewExit;

2157}

2158

2159

2160

2161

2162void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,

2163 int RequiredTC,

2164 InstrMapTy &LastStage0Insts,

2165 MachineBasicBlock &GreaterThan,

2166 MachineBasicBlock &Otherwise) {

2168 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,

2169 LastStage0Insts);

2170

2172

2173

2177 } else {

2179 }

2180}

2181

2182

2183

2184

2185

2186

2187void ModuloScheduleExpanderMVE::generatePipelinedLoop() {

2188

2189

2190

2191

2192

2193

2194

2195

2196

2197

2198

2199

2200

2201

2202

2203

2204

2205

2206

2207

2208

2209

2210

2211

2212

2213

2214

2215

2216

2217

2218

2219

2220

2221

2222

2223

2224

2225

2226

2227

2228

2229

2230

2231

2232

2233

2234

2235

2236

2237

2238

2239

2240

2241

2242

2243

2244

2245

2246

2247

2248

2249

2250

2251

2252

2253

2254

2255

2256

2257

2258

2259

2260

2262 assert(LoopInfo && "Must be able to analyze loop!");

2263

2264 calcNumUnroll();

2265

2266 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());

2267 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());

2268 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());

2269 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());

2270 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());

2271

2272 MF.insert(OrigKernel->getIterator(), Check);

2274 MF.insert(OrigKernel->getIterator(), Prolog);

2276 MF.insert(OrigKernel->getIterator(), NewKernel);

2278 MF.insert(OrigKernel->getIterator(), Epilog);

2280 MF.insert(OrigKernel->getIterator(), NewPreheader);

2282

2284

2285 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);

2287

2288 OrigPreheader->addSuccessor(Check);

2291

2293 Check->addSuccessor(NewPreheader);

2294

2295 Prolog->addSuccessor(NewKernel);

2296

2297 NewKernel->addSuccessor(NewKernel);

2298 NewKernel->addSuccessor(Epilog);

2299

2300 Epilog->addSuccessor(NewPreheader);

2301 Epilog->addSuccessor(NewExit);

2302

2303 InstrMapTy LastStage0Insts;

2304 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,

2305 LastStage0Insts, *Prolog, *NewPreheader);

2306

2307

2308

2310 generateProlog(PrologVRMap);

2311 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);

2312 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);

2313}

2314

2315

2316void ModuloScheduleExpanderMVE::updateInstrUse(

2317 MachineInstr *MI, int StageNum, int PhaseNum,

2318 SmallVectorImpl &CurVRMap,

2319 SmallVectorImpl *PrevVRMap) {

2320

2321

2322

2323

2324

2325

2326 for (MachineOperand &UseMO : MI->uses()) {

2327 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())

2328 continue;

2329 int DiffStage = 0;

2330 Register OrigReg = UseMO.getReg();

2331 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);

2332 if (!DefInst || DefInst->getParent() != OrigKernel)

2333 continue;

2336 if (DefInst->isPHI()) {

2337 ++DiffStage;

2339 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);

2340

2341 DefReg = LoopReg;

2342 DefInst = MRI.getVRegDef(LoopReg);

2343 }

2344 unsigned DefStageNum = Schedule.getStage(DefInst);

2345 DiffStage += StageNum - DefStageNum;

2347 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))

2348

2349 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];

2350 else if (!PrevVRMap)

2351

2352

2353 NewReg = InitReg;

2354 else

2355

2356

2357

2358

2359 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];

2360

2361 const TargetRegisterClass *NRC =

2362 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));

2363 if (NRC)

2364 UseMO.setReg(NewReg);

2365 else {

2366 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2367 MachineInstr *NewCopy = BuildMI(*OrigKernel, MI, MI->getDebugLoc(),

2368 TII->get(TargetOpcode::COPY), SplitReg)

2371 UseMO.setReg(SplitReg);

2372 }

2373 }

2374}

2375

2376

2377

2382 if (LoopVal == Reg)

2383 return Φ

2384 }

2385 return nullptr;

2386}

2387

2388

2389void ModuloScheduleExpanderMVE::generatePhi(

2390 MachineInstr *OrigMI, int UnrollNum,

2391 SmallVectorImpl &PrologVRMap,

2392 SmallVectorImpl &KernelVRMap,

2393 SmallVectorImpl &PhiVRMap) {

2394 int StageNum = Schedule.getStage(OrigMI);

2395 bool UsePrologReg;

2396 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)

2397 UsePrologReg = true;

2398 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)

2399 UsePrologReg = false;

2400 else

2401 return;

2402

2403

2404

2405

2406

2407

2408

2409

2410

2411

2412

2413

2414

2415

2416

2417

2418

2419

2420

2421

2422

2423

2424

2425

2426

2427

2428

2429

2430

2431

2432

2433

2434 for (MachineOperand &DefMO : OrigMI->defs()) {

2435 if (!DefMO.isReg() || DefMO.isDead())

2436 continue;

2437 Register OrigReg = DefMO.getReg();

2438 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);

2439 if (NewReg == KernelVRMap[UnrollNum].end())

2440 continue;

2442 if (UsePrologReg) {

2443 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;

2444 CorrespondReg = PrologVRMap[PrologNum][OrigReg];

2445 } else {

2447 if (!Phi)

2448 continue;

2449 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);

2450 }

2451

2453 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2454 MachineInstr *NewPhi =

2455 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),

2456 TII->get(TargetOpcode::PHI), PhiReg)

2457 .addReg(NewReg->second)

2459 .addReg(CorrespondReg)

2462 PhiVRMap[UnrollNum][OrigReg] = PhiReg;

2463 }

2464}

2465

2468 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {

2469 if (Phi.getOperand(Idx).getReg() == OrigReg) {

2470 Phi.getOperand(Idx).setReg(NewReg);

2471 Phi.getOperand(Idx + 1).setMBB(NewMBB);

2472 return;

2473 }

2474 }

2475}

2476

2477

2478void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,

2483 E = MRI.use_end();

2484 I != E; ++I) {

2485 MachineOperand &O = *I;

2486 if (O.getParent()->getParent() != OrigKernel &&

2487 O.getParent()->getParent() != Prolog &&

2488 O.getParent()->getParent() != NewKernel &&

2489 O.getParent()->getParent() != Epilog)

2491 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())

2493 }

2494

2495

2496

2497 if (!UsesAfterLoop.empty()) {

2498 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2499 MachineInstr *NewPhi =

2500 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),

2501 TII->get(TargetOpcode::PHI), PhiReg)

2507

2508 for (MachineOperand *MO : UsesAfterLoop)

2510

2511

2512

2515 }

2516

2517

2518

2519 if (!LoopPhis.empty()) {

2520 for (MachineInstr *Phi : LoopPhis) {

2522 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);

2523 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));

2524 MachineInstr *NewPhi =

2525 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(),

2526 Phi->getDebugLoc(), TII->get(TargetOpcode::PHI), NewInit)

2532 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);

2533 }

2534 }

2535}

2536

2537void ModuloScheduleExpanderMVE::generateProlog(

2538 SmallVectorImpl &PrologVRMap) {

2539 PrologVRMap.clear();

2540 PrologVRMap.resize(Schedule.getNumStages() - 1);

2541 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;

2542 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;

2543 ++PrologNum) {

2544 for (MachineInstr *MI : Schedule.getInstructions()) {

2545 if (MI->isPHI())

2546 continue;

2547 int StageNum = Schedule.getStage(MI);

2548 if (StageNum > PrologNum)

2549 continue;

2551 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);

2552 NewMIMap[NewMI] = {PrologNum, StageNum};

2553 Prolog->push_back(NewMI);

2555 }

2556 }

2557

2558 for (auto I : NewMIMap) {

2559 MachineInstr *MI = I.first;

2560 int PrologNum = I.second.first;

2561 int StageNum = I.second.second;

2562 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);

2563 }

2564

2566 dbgs() << "prolog:\n";

2568 });

2569}

2570

2571void ModuloScheduleExpanderMVE::generateKernel(

2572 SmallVectorImpl &PrologVRMap,

2573 SmallVectorImpl &KernelVRMap, InstrMapTy &LastStage0Insts) {

2574 KernelVRMap.clear();

2575 KernelVRMap.resize(NumUnroll);

2577 PhiVRMap.resize(NumUnroll);

2578 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;

2579 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {

2580 for (MachineInstr *MI : Schedule.getInstructions()) {

2581 if (MI->isPHI())

2582 continue;

2583 int StageNum = Schedule.getStage(MI);

2585 if (UnrollNum == NumUnroll - 1)

2586 LastStage0Insts[MI] = NewMI;

2587 updateInstrDef(NewMI, KernelVRMap[UnrollNum],

2588 (UnrollNum == NumUnroll - 1 && StageNum == 0));

2589 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);

2590 NewMIMap[NewMI] = {UnrollNum, StageNum};

2591 NewKernel->push_back(NewMI);

2593 }

2594 }

2595

2596 for (auto I : NewMIMap) {

2597 MachineInstr *MI = I.first;

2598 int UnrollNum = I.second.first;

2599 int StageNum = I.second.second;

2600 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);

2601 }

2602

2603

2604 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,

2606

2608 dbgs() << "kernel:\n";

2609 NewKernel->dump();

2610 });

2611}

2612

2613void ModuloScheduleExpanderMVE::generateEpilog(

2614 SmallVectorImpl &KernelVRMap,

2615 SmallVectorImpl &EpilogVRMap, InstrMapTy &LastStage0Insts) {

2616 EpilogVRMap.clear();

2617 EpilogVRMap.resize(Schedule.getNumStages() - 1);

2618 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;

2619 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;

2620 ++EpilogNum) {

2621 for (MachineInstr *MI : Schedule.getInstructions()) {

2622 if (MI->isPHI())

2623 continue;

2624 int StageNum = Schedule.getStage(MI);

2625 if (StageNum <= EpilogNum)

2626 continue;

2628 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);

2629 NewMIMap[NewMI] = {EpilogNum, StageNum};

2630 Epilog->push_back(NewMI);

2632 }

2633 }

2634

2635 for (auto I : NewMIMap) {

2636 MachineInstr *MI = I.first;

2637 int EpilogNum = I.second.first;

2638 int StageNum = I.second.second;

2639 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);

2640 }

2641

2642

2643

2644

2645

2646 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);

2647

2649 dbgs() << "epilog:\n";

2651 });

2652}

2653

2654

2655void ModuloScheduleExpanderMVE::calcNumUnroll() {

2656 DenseMap<MachineInstr *, unsigned> Inst2Idx;

2657 NumUnroll = 1;

2658 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)

2659 Inst2Idx[Schedule.getInstructions()[I]] = I;

2660

2661 for (MachineInstr *MI : Schedule.getInstructions()) {

2662 if (MI->isPHI())

2663 continue;

2664 int StageNum = Schedule.getStage(MI);

2665 for (const MachineOperand &MO : MI->uses()) {

2667 continue;

2670 continue;

2671

2672 int NumUnrollLocal = 1;

2674 ++NumUnrollLocal;

2675

2676

2678 }

2679 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);

2680 if (Inst2Idx[MI] <= Inst2Idx[DefMI])

2681 --NumUnrollLocal;

2682 NumUnroll = std::max(NumUnroll, NumUnrollLocal);

2683 }

2684 }

2685 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");

2686}

2687

2688

2689

2690

2691void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,

2692 ValueMapTy &VRMap,

2693 bool LastDef) {

2694 for (MachineOperand &MO : NewMI->all_defs()) {

2696 continue;

2698 const TargetRegisterClass *RC = MRI.getRegClass(Reg);

2699 Register NewReg = MRI.createVirtualRegister(RC);

2701 VRMap[Reg] = NewReg;

2702 if (LastDef)

2703 mergeRegUsesAfterPipeline(Reg, NewReg);

2704 }

2705}

2706

2708 OrigKernel = Schedule.getLoop()->getTopBlock();

2709 OrigPreheader = Schedule.getLoop()->getLoopPreheader();

2710 OrigExit = Schedule.getLoop()->getExitBlock();

2711

2713

2714 generatePipelinedLoop();

2715}

2716

2717

2719 if (!L.getExitBlock()) {

2720 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");

2721 return false;

2722 }

2723

2726

2727

2728

2731

2732

2736 if (Ref.getParent() != BB || Ref.isPHI()) {

2737 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "

2738 "referenced outside of the loop or by phi.\n");

2739 return false;

2740 }

2741

2742

2743

2744

2747 if (Register(LoopVal).isVirtual() ||

2748 MRI.getVRegDef(LoopVal)->getParent() != BB) {

2750 dbgs() << "Can not apply MVE expander: A phi source value coming "

2751 "from the loop is not defined in the loop.\n");

2752 return false;

2753 }

2754 if (UsedByPhi.count(LoopVal)) {

2755 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "

2756 "loop is referenced by two or more phis.\n");

2757 return false;

2758 }

2759 UsedByPhi.insert(LoopVal);

2760 }

2761

2762 return true;

2763}

2764

2765

2766

2767

2768

2769

2770

2771

2772

2773

2774

2775

2776

2777

2778namespace {

2780public:

2781 static char ID;

2782

2785 }

2786

2787 bool runOnMachineFunction(MachineFunction &MF) override;

2789

2790 void getAnalysisUsage(AnalysisUsage &AU) const override {

2794 }

2795};

2796}

2797

2798char ModuloScheduleTest::ID = 0;

2799

2801 "Modulo Schedule test pass", false, false)

2806

2807bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {

2808 MachineLoopInfo &MLI = getAnalysis().getLI();

2809 for (auto *L : MLI) {

2810 if (L->getTopBlock() != L->getBottomBlock())

2811 continue;

2812 runOnLoop(MF, *L);

2813 return false;

2814 }

2815 return false;

2816}

2817

2819 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");

2820 std::pair<StringRef, StringRef> StageTokenAndValue =

2821 getToken(StageAndCycle.first, "-");

2822 std::pair<StringRef, StringRef> CycleTokenAndValue =

2823 getToken(StageAndCycle.second, "-");

2824 if (StageTokenAndValue.first != "Stage" ||

2825 CycleTokenAndValue.first != "_Cycle") {

2827 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");

2828 return;

2829 }

2830

2831 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);

2832 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);

2833

2834 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";

2835}

2836

2837void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {

2838 LiveIntervals &LIS = getAnalysis().getLIS();

2839 MachineBasicBlock *BB = L.getTopBlock();

2840 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";

2841

2842 DenseMap<MachineInstr *, int> Cycle, Stage;

2843 std::vector<MachineInstr *> Instrs;

2844 for (MachineInstr &MI : *BB) {

2845 if (MI.isTerminator())

2846 continue;

2847 Instrs.push_back(&MI);

2848 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {

2849 dbgs() << "Parsing post-instr symbol for " << MI;

2851 }

2852 }

2853

2854 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),

2855 std::move(Stage));

2856 ModuloScheduleExpander MSE(

2858 MSE.expand();

2859 MSE.cleanup();

2860}

2861

2862

2863

2864

2865

2870 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);

2871 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());

2872 MI->setPostInstrSymbol(MF, Sym);

2873 }

2874}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

MachineInstrBuilder MachineInstrBuilder & DefMI

static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)

Clone an instruction from MI.

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static const Function * getParent(const Value *V)

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)

Return the register values for the operands of a Phi instruction.

static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

Register const TargetRegisterInfo * TRI

Promote Memory to Register

This file provides utility analysis objects describing memory locations.

static bool hasUseAfterLoop(Register Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)

Return true if the register has a use that occurs outside the specified loop.

Definition ModuloSchedule.cpp:360

static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI)

Replace all uses of FromReg that appear outside the specified basic block with ToReg.

Definition ModuloSchedule.cpp:349

static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)

Definition ModuloSchedule.cpp:2466

static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)

Return a phi if Reg is referenced by the phi.

Definition ModuloSchedule.cpp:2378

static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit, LiveIntervals &LIS)

Create a dedicated exit for Loop.

Definition ModuloSchedule.cpp:2125

static void parseSymbolString(StringRef S, int &Cycle, int &Stage)

Definition ModuloSchedule.cpp:2818

static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))

static Register getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)

Return the Phi register value that comes from the incoming block.

Definition ModuloSchedule.cpp:57

MachineInstr unsigned OpIdx

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

const SmallVectorImpl< MachineOperand > & Cond

Remove Loads Into Fake Uses

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

iterator find(const_arg_type_t< KeyT > Val)

bool erase(const KeyT &Val)

DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator

Implements a dense probed hash-table based set.

bool hasInterval(Register Reg) const

SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)

void insertMBBInMaps(MachineBasicBlock *MBB)

void RemoveMachineInstrFromMaps(MachineInstr &MI)

void removeInterval(Register Reg)

Interval removal.

static constexpr LocationSize beforeOrAfterPointer()

Any location before or after the base pointer (but still within the underlying object).

Represents a single loop in the control flow graph.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)

Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.

instr_iterator instr_begin()

LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)

Replace successor OLD with NEW and update probability info.

LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)

Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...

LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

iterator_range< iterator > phis()

Returns a range that iterates over the phis in the basic block.

reverse_instr_iterator instr_rbegin()

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

void push_back(MachineInstr *MI)

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

succ_iterator succ_begin()

LLVM_ABI iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

LLVM_ABI void dump() const

LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator

LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)

Remove successor from the successors list of this MachineBasicBlock.

LLVM_ABI iterator getFirstNonPHI()

Returns a pointer to the first instruction in this block that is not a PHINode instruction.

LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const

reverse_instr_iterator instr_rend()

Instructions::iterator instr_iterator

pred_iterator pred_begin()

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< iterator > terminators()

LLVM_ABI instr_iterator getFirstInstrTerminator()

Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.

MachineInstrBundleIterator< MachineInstr > iterator

Instructions::reverse_iterator reverse_instr_iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineBasicBlock & front() const

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineInstr - Allocate a new MachineInstr.

void insert(iterator MBBI, MachineBasicBlock *MBB)

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

Representation of each machine instruction.

mop_range defs()

Returns all explicit operands that are register definitions.

const MachineBasicBlock * getParent() const

filtered_mop_range all_defs()

Returns an iterator range over all operands that are (explicit or implicit) register defs.

unsigned getNumOperands() const

Retuns the total number of operands.

bool memoperands_empty() const

Return true if we don't have any memory operands which described the memory access done by this instr...

LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)

Assign this MachineInstr's memory reference descriptor list.

LLVM_ABI void dropMemRefs(MachineFunction &MF)

Clear this MachineInstr's memory reference descriptor list.

ArrayRef< MachineMemOperand * > memoperands() const

Access to memory operands of the instruction.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

MachineOperand class - Representation of each machine instruction operand.

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

MachineBasicBlock * getMBB() const

LLVM_ABI void setReg(Register Reg)

Change the register this operand corresponds to.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

void setMBB(MachineBasicBlock *MBB)

Register getReg() const

getReg - Returns the register number.

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

defusechain_instr_iterator< true, false, false, true > use_instr_iterator

use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...

defusechain_iterator< true, false, false, true, false > use_iterator

use_iterator/use_begin/use_end - Walk all uses of the specified register.

void expand()

Definition ModuloSchedule.cpp:2707

static bool canApply(MachineLoop &L)

Check if ModuloScheduleExpanderMVE can be applied to L.

Definition ModuloSchedule.cpp:2718

The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...

MachineBasicBlock * getRewrittenKernel()

Returns the newly rewritten kernel block, or nullptr if this was optimized away.

void cleanup()

Performs final cleanup after expansion.

Definition ModuloSchedule.cpp:189

void expand()

Performs the actual expansion.

Definition ModuloSchedule.cpp:72

DenseMap< MachineInstr *, std::pair< Register, int64_t > > InstrChangesTy

void annotate()

Performs the annotation.

Definition ModuloSchedule.cpp:2866

Represents a schedule for a single-block loop.

int getNumStages() const

Return the number of stages contained in this schedule, which is the largest stage index + 1.

ArrayRef< MachineInstr * > getInstructions()

Return the rescheduled instructions in order.

void print(raw_ostream &OS)

Definition ModuloSchedule.cpp:30

int getCycle(MachineInstr *MI)

Return the cycle that MI is scheduled at, or -1.

void setStage(MachineInstr *MI, int MIStage)

Set the stage of a newly created instruction.

int getStage(MachineInstr *MI)

Return the stage that MI is scheduled in, or -1.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

std::deque< MachineBasicBlock * > PeeledBack

SmallVector< MachineInstr *, 4 > IllegalPhisToDelete

Illegal phis that need to be deleted once we re-link stages.

void expand()

Definition ModuloSchedule.cpp:2015

DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs

CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.

SmallVector< MachineBasicBlock *, 4 > Prologs

All prolog and epilog blocks.

MachineBasicBlock * peelKernel(LoopPeelDirection LPD)

Peels one iteration of the rewritten kernel (BB) in the specified direction.

Definition ModuloSchedule.cpp:1604

ModuloSchedule & Schedule

std::deque< MachineBasicBlock * > PeeledFront

State passed from peelKernel to peelPrologAndEpilogs().

unsigned getStage(MachineInstr *MI)

Helper to get the stage of an instruction in the schedule.

void rewriteUsesOf(MachineInstr *MI)

Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).

Definition ModuloSchedule.cpp:1920

SmallVector< MachineBasicBlock *, 4 > Epilogs

DenseMap< MachineBasicBlock *, BitVector > AvailableStages

For every block, the stages that are available.

std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopInfo

Target loop info before kernel peeling.

DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs

Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)

All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...

Definition ModuloSchedule.cpp:1913

MachineBasicBlock * Preheader

The original loop preheader.

void rewriteKernel()

Converts BB from the original loop body to the rewritten, pipelined steady-state.

Definition ModuloSchedule.cpp:2010

DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration

When peeling the epilogue keep track of the distance between the phi nodes and the kernel.

DenseMap< MachineBasicBlock *, BitVector > LiveStages

For every block, the stages that are produced.

const TargetInstrInfo * TII

void filterInstructions(MachineBasicBlock *MB, int MinStage)

Definition ModuloSchedule.cpp:1620

void peelPrologAndEpilogs()

Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.

Definition ModuloSchedule.cpp:1752

MachineBasicBlock * BB

The original loop block that gets rewritten in-place.

void fixupBranches()

Insert branches between prologs, kernel and epilogs.

Definition ModuloSchedule.cpp:1963

MachineBasicBlock * CreateLCSSAExitingBlock()

Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...

Definition ModuloSchedule.cpp:1869

void validateAgainstModuloScheduleExpander()

Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...

Definition ModuloSchedule.cpp:2027

Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)

Helper function to find the right canonical register for a phi instruction coming from a peeled out p...

Definition ModuloSchedule.cpp:1735

MachineRegisterInfo & MRI

void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)

Definition ModuloSchedule.cpp:1649

Wrapper class representing virtual and physical registers.

constexpr bool isValid() const

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

reference emplace_back(ArgTypes &&... Args)

iterator erase(const_iterator CI)

void push_back(const T &Elt)

reverse_iterator rbegin()

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

TargetInstrInfo - Interface to description of machine instruction set.

virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const

Reverses the branch condition of the specified condition list, returning false on success and true if...

virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const

Remove the branching code at the end of the specific MBB.

virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const

Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...

virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const

Insert branch code into the end of the specified MachineBasicBlock.

unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const

virtual const TargetInstrInfo * getInstrInfo() const

A Use represents the edge between a Value definition and its users.

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

A raw_ostream that writes to an SmallVector or SmallString.

StringRef str() const

Return a StringRef for the vector contents.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

NodeAddr< DefNode * > Def

NodeAddr< PhiNode * > Phi

NodeAddr< UseNode * > Use

BaseReg

Stack frame base register. Bit 0 of FREInfo.Info.

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)

Peels a single block loop.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")

getToken - This function extracts one token from source, ignoring any leading characters that appear ...

testing::Matcher< const detail::ErrorHolder & > Failed()

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &)

auto reverse(ContainerTy &&C)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

@ Ref

The access may reference the value stored in memory.

@ Sub

Subtraction of integers.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

DWARFExpression::Operation Op

@ LPD_Back

Peel the last iteration of the loop.

@ LPD_Front

Peel the first iteration of the loop.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.