LLVM: lib/CodeGen/ModuloSchedule.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

21

22#define DEBUG_TYPE "pipeliner"

23using namespace llvm;

24

27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));

28

32}

33

34

35

36

37

38

39

41 unsigned &InitVal, unsigned &LoopVal) {

42 assert(Phi.isPHI() && "Expecting a Phi.");

43

44 InitVal = 0;

45 LoopVal = 0;

46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

47 if (Phi.getOperand(i + 1).getMBB() != Loop)

48 InitVal = Phi.getOperand(i).getReg();

49 else

50 LoopVal = Phi.getOperand(i).getReg();

51

52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");

53}

54

55

57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)

59 return Phi.getOperand(i).getReg();

60 return 0;

61}

62

63

65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)

67 return Phi.getOperand(i).getReg();

68 return 0;

69}

70

74 if (Preheader == BB)

75 Preheader = *std::next(BB->pred_begin());

76

77

78

83 unsigned MaxDiff = 0;

84 bool PhiIsSwapped = false;

88 unsigned Diff = 0;

89 if (UseStage != -1 && UseStage >= DefStage)

90 Diff = UseStage - DefStage;

91 if (MI->isPHI()) {

92 if (isLoopCarried(*MI))

93 ++Diff;

94 else

95 PhiIsSwapped = true;

96 }

97 MaxDiff = std::max(Diff, MaxDiff);

98 }

99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);

100 }

101 }

102

103 generatePipelinedLoop();

104}

105

106void ModuloScheduleExpander::generatePipelinedLoop() {

109

110

112

113 unsigned MaxStageCount = Schedule.getNumStages() - 1;

114

115

116

117

118

119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];

120

121

122

123

124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];

125

126 InstrMapTy InstrMap;

127

129

130

131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);

134

135

136

138 if (CI->isPHI())

139 continue;

140 unsigned StageNum = Schedule.getStage(CI);

141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);

142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);

144 InstrMap[NewMI] = CI;

145 }

146

147

148

151 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);

153 InstrMap[NewMI] = &MI;

154 }

155

156 NewKernel = KernelBB;

159

160 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,

161 InstrMap, MaxStageCount, MaxStageCount, false);

162 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,

163 InstrMap, MaxStageCount, MaxStageCount, false);

164

166

168

169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,

170 PrologBBs);

171

172

173

174 splitLifetimes(KernelBB, EpilogBBs);

175

176

177 removeDeadInstructions(KernelBB, EpilogBBs);

178

179

180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

181

182 delete[] VRMap;

183 delete[] VRMapPhi;

184}

185

187

188 for (auto &I : *BB)

190 BB->clear();

191 BB->eraseFromParent();

192}

193

194

195void ModuloScheduleExpander::generateProlog(unsigned LastStage,

197 ValueMapTy *VRMap,

198 MBBVectorTy &PrologBBs) {

200 InstrMapTy InstrMap;

201

202

203

204

205 for (unsigned i = 0; i < LastStage; ++i) {

206

207

213 PredBB = NewBB;

215

216

217

218 for (int StageNum = i; StageNum >= 0; --StageNum) {

221 BBI != BBE; ++BBI) {

222 if (Schedule.getStage(&*BBI) == StageNum) {

223 if (BBI->isPHI())

224 continue;

226 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);

227 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);

229 InstrMap[NewMI] = &*BBI;

230 }

231 }

232 }

233 rewritePhiValues(NewBB, i, VRMap, InstrMap);

235 dbgs() << "prolog:\n";

236 NewBB->dump();

237 });

238 }

239

241

242

243

244 unsigned numBranches = TII->removeBranch(*Preheader);

245 if (numBranches) {

248 }

249}

250

251

252

253

254void ModuloScheduleExpander::generateEpilog(

256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,

257 MBBVectorTy &PrologBBs) {

258

259

263 assert(!checkBranch && "generateEpilog must be able to analyze the branch");

264 if (checkBranch)

265 return;

266

268 if (*LoopExitI == KernelBB)

269 ++LoopExitI;

270 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");

272

275 InstrMapTy InstrMap;

276

277

278

279

280 int EpilogStage = LastStage + 1;

281 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {

285

289

290 if (EpilogStart == LoopExitBB)

291 EpilogStart = NewBB;

292

293

294

295 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {

296 for (auto &BBI : *BB) {

297 if (BBI.isPHI())

298 continue;

300 if ((unsigned)Schedule.getStage(In) == StageNum) {

301

302

303 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);

304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);

306 InstrMap[NewMI] = In;

307 }

308 }

309 }

310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,

311 InstrMap, LastStage, EpilogStage, i == 1);

312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,

313 InstrMap, LastStage, EpilogStage, i == 1);

314 PredBB = NewBB;

315

317 dbgs() << "epilog:\n";

318 NewBB->dump();

319 });

320 }

321

322

324

325

326

328 assert((OrigBB == TBB || OrigBB == FBB) &&

329 "Unable to determine looping branch direction");

330 if (OrigBB != TBB)

332 else

334

335 if (EpilogBBs.size() > 0) {

339 }

340}

341

342

343

350 if (O.getParent()->getParent() != MBB)

351 O.setReg(ToReg);

354}

355

356

357

361 if (MO.getParent()->getParent() != BB)

362 return true;

363 return false;

364}

365

366

367

368

369void ModuloScheduleExpander::generateExistingPhis(

371 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,

372 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {

373

374

375

376 unsigned PrologStage = 0;

377 unsigned PrevStage = 0;

378 bool InKernel = (LastStageNum == CurStageNum);

379 if (InKernel) {

380 PrologStage = LastStageNum - 1;

381 PrevStage = CurStageNum;

382 } else {

383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);

384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;

385 }

386

388 BBE = BB->getFirstNonPHI();

389 BBI != BBE; ++BBI) {

390 Register Def = BBI->getOperand(0).getReg();

391

392 unsigned InitVal = 0;

393 unsigned LoopVal = 0;

394 getPhiRegs(*BBI, BB, InitVal, LoopVal);

395

396 unsigned PhiOp1 = 0;

397

398

399 unsigned PhiOp2 = LoopVal;

400 if (auto It = VRMap[LastStageNum].find(LoopVal);

401 It != VRMap[LastStageNum].end())

402 PhiOp2 = It->second;

403

404 int StageScheduled = Schedule.getStage(&*BBI);

406 unsigned NumStages = getStagesForReg(Def, CurStageNum);

407 if (NumStages == 0) {

408

409

410 unsigned NewReg = VRMap[PrevStage][LoopVal];

411 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,

412 InitVal, NewReg);

413 if (VRMap[CurStageNum].count(LoopVal))

414 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];

415 }

416

417

418

419

420 unsigned MaxPhis = PrologStage + 2;

421 if (!InKernel && (int)PrologStage <= LoopValStage)

422 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);

423 unsigned NumPhis = std::min(NumStages, MaxPhis);

424

425 unsigned NewReg = 0;

426 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;

427

428

429

430

431

432 int StageDiff = 0;

433 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&

434 NumPhis == 1)

435 StageDiff = 1;

436

437

438 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)

439 StageDiff = StageScheduled - LoopValStage;

440 for (unsigned np = 0; np < NumPhis; ++np) {

441

442

443

444 if (np > PrologStage || StageScheduled >= (int)LastStageNum)

445 PhiOp1 = InitVal;

446

447 else if (PrologStage >= AccessStage + StageDiff + np &&

448 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)

449 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];

450

451

452 else if (PrologStage >= AccessStage + StageDiff + np) {

453

454

455 PhiOp1 = LoopVal;

457 int Indirects = 1;

458 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {

459 int PhiStage = Schedule.getStage(InstOp1);

460 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)

462 else

465 int PhiOpStage = Schedule.getStage(InstOp1);

466 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);

467 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&

468 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {

469 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];

470 break;

471 }

472 ++Indirects;

473 }

474 } else

475 PhiOp1 = InitVal;

476

477

479 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)

481

483 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();

484

485

486

487 if (!InKernel) {

488 int StageDiffAdj = 0;

489 if (LoopValStage != -1 && StageScheduled > LoopValStage)

490 StageDiffAdj = StageScheduled - LoopValStage;

491

492

493 if (np == 0 && PrevStage == LastStageNum &&

494 (StageScheduled != 0 || LoopValStage != 0) &&

495 VRMap[PrevStage - StageDiffAdj].count(LoopVal))

496 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];

497

498

499 else if (np > 0 && PrevStage == LastStageNum &&

500 VRMap[PrevStage - np + 1].count(Def))

501 PhiOp2 = VRMap[PrevStage - np + 1][Def];

502

503 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&

504 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))

505 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];

506

507

508 else if (VRMap[PrevStage - np].count(Def) &&

509 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||

510 (LoopValStage == StageScheduled)))

511 PhiOp2 = VRMap[PrevStage - np][Def];

512 }

513

514

515

516

517

518 if (LoopDefIsPhi) {

519 if (static_cast<int>(PrologStage - np) >= StageScheduled) {

520 int LVNumStages = getStagesForPhi(LoopVal);

521 int StageDiff = (StageScheduled - LoopValStage);

522 LVNumStages -= StageDiff;

523

524 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {

525 NewReg = PhiOp2;

526 unsigned ReuseStage = CurStageNum;

527 if (isLoopCarried(*PhiInst))

528 ReuseStage -= LVNumStages;

529

530

531 if (VRMap[ReuseStage - np].count(LoopVal)) {

532 NewReg = VRMap[ReuseStage - np][LoopVal];

533

534 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,

535 Def, NewReg);

536

537 VRMap[CurStageNum - np][Def] = NewReg;

538 PhiOp2 = NewReg;

539 if (VRMap[LastStageNum - np - 1].count(LoopVal))

540 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

541

542 if (IsLast && np == NumPhis - 1)

544 continue;

545 }

546 }

547 }

548 if (InKernel && StageDiff > 0 &&

549 VRMap[CurStageNum - StageDiff - np].count(LoopVal))

550 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];

551 }

552

555

558 TII->get(TargetOpcode::PHI), NewReg);

561 if (np == 0)

562 InstrMap[NewPhi] = &*BBI;

563

564

565

566

567 unsigned PrevReg = 0;

568 if (InKernel && VRMap[PrevStage - np].count(LoopVal))

569 PrevReg = VRMap[PrevStage - np][LoopVal];

570 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,

571 NewReg, PrevReg);

572

573 if (VRMap[CurStageNum - np].count(Def)) {

574 unsigned R = VRMap[CurStageNum - np][Def];

575 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,

576 NewReg);

577 }

578

579

580

581

582 if (IsLast && np == NumPhis - 1)

584

585

586 if (InKernel)

587 PhiOp2 = NewReg;

588

589

590 VRMap[CurStageNum - np][Def] = NewReg;

591 }

592

593 while (NumPhis++ < NumStages) {

594 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,

595 NewReg, 0);

596 }

597

598

599

600 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))

602 }

603}

604

605

606

607

608void ModuloScheduleExpander::generatePhis(

610 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,

611 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,

612 bool IsLast) {

613

614

615 unsigned PrologStage = 0;

616 unsigned PrevStage = 0;

617 unsigned StageDiff = CurStageNum - LastStageNum;

618 bool InKernel = (StageDiff == 0);

619 if (InKernel) {

620 PrologStage = LastStageNum - 1;

621 PrevStage = CurStageNum;

622 } else {

623 PrologStage = LastStageNum - StageDiff;

624 PrevStage = LastStageNum + StageDiff - 1;

625 }

626

628 BBE = BB->instr_end();

629 BBI != BBE; ++BBI) {

630 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {

633 continue;

634

635 int StageScheduled = Schedule.getStage(&*BBI);

636 assert(StageScheduled != -1 && "Expecting scheduled instruction.");

638 unsigned NumPhis = getStagesForReg(Def, CurStageNum);

639

640

641

642 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&

644 NumPhis = 1;

645 if (!InKernel && (unsigned)StageScheduled > PrologStage)

646 continue;

647

648 unsigned PhiOp2;

649 if (InKernel) {

650 PhiOp2 = VRMap[PrevStage][Def];

652 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)

654 }

655

656

657 if (NumPhis > PrologStage + 1 - StageScheduled)

658 NumPhis = PrologStage + 1 - StageScheduled;

659 for (unsigned np = 0; np < NumPhis; ++np) {

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682 unsigned PhiOp1 = VRMap[PrologStage][Def];

683 if (np <= PrologStage)

684 PhiOp1 = VRMap[PrologStage - np][Def];

685 if (!InKernel) {

686 if (PrevStage == LastStageNum && np == 0)

687 PhiOp2 = VRMap[LastStageNum][Def];

688 else

689 PhiOp2 = VRMapPhi[PrevStage - np][Def];

690 }

691

694

697 TII->get(TargetOpcode::PHI), NewReg);

700 if (np == 0)

701 InstrMap[NewPhi] = &*BBI;

702

703

704

705 if (InKernel) {

706 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,

707 NewReg);

708 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,

709 NewReg);

710

711 PhiOp2 = NewReg;

712 VRMapPhi[PrevStage - np - 1][Def] = NewReg;

713 } else {

714 VRMapPhi[CurStageNum - np][Def] = NewReg;

715 if (np == NumPhis - 1)

716 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,

717 NewReg);

718 }

719 if (IsLast && np == NumPhis - 1)

721 }

722 }

723 }

724}

725

726

727

728

729

730void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,

731 MBBVectorTy &EpilogBBs) {

732

733

737 MI != ME;) {

738

739 if (MI->isInlineAsm()) {

740 ++MI;

741 continue;

742 }

743 bool SawStore = false;

744

745

746 if (MI->isSafeToMove(SawStore) && MI->isPHI()) {

747 ++MI;

748 continue;

749 }

750 bool used = true;

753

756 if (used)

757 break;

758 continue;

759 }

760 unsigned realUses = 0;

762

763

764 if (U.getParent()->getParent() != BB) {

765 realUses++;

766 used = true;

767 break;

768 }

769 }

770 if (realUses > 0)

771 break;

772 used = false;

773 }

774 if (!used) {

776 MI++->eraseFromParent();

777 continue;

778 }

779 ++MI;

780 }

781

782

784 Register reg = MI.getOperand(0).getReg();

787 MI.eraseFromParent();

788 }

789 }

790}

791

792

793

794

795

796

797

798

799

800

801

802void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,

803 MBBVectorTy &EpilogBBs) {

805 for (auto &PHI : KernelBB->phis()) {

807

808

811 I != E; ++I) {

812 if (I->isPHI() && I->getParent() == KernelBB) {

813

815 if (!LCDef)

816 continue;

818 if (MI || MI->getParent() != KernelBB || MI->isPHI())

819 continue;

820

821

822 unsigned SplitReg = 0;

825 if (BBJ.readsRegister(Def, nullptr)) {

826

827 if (SplitReg == 0) {

829 BuildMI(*KernelBB, MI, MI->getDebugLoc(),

830 TII->get(TargetOpcode::COPY), SplitReg)

832 }

833 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);

834 }

835 if (!SplitReg)

836 continue;

837

838 for (auto &Epilog : EpilogBBs)

840 if (I.readsRegister(Def, nullptr))

841 I.substituteRegister(Def, SplitReg, 0, *TRI);

842 break;

843 }

844 }

845 }

846}

847

848

851 if (MI.isPHI())

852 break;

853 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)

854 if (MI.getOperand(i + 1).getMBB() == Incoming) {

855 MI.removeOperand(i + 1);

856 MI.removeOperand(i);

857 break;

858 }

859 }

860}

861

862

863

864

865void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,

866 MBBVectorTy &PrologBBs,

868 MBBVectorTy &EpilogBBs,

869 ValueMapTy *VRMap) {

870 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");

873

874

875

877 unsigned MaxIter = PrologBBs.size() - 1;

878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {

879

880

883

885 std::optional StaticallyGreater =

887 unsigned numAdded = 0;

888 if (!StaticallyGreater) {

891 } else if (*StaticallyGreater == false) {

893 Prolog->removeSuccessor(LastPro);

897

898 if (LastPro != LastEpi) {

899 LastEpi->clear();

901 }

902 if (LastPro == KernelBB) {

904 NewKernel = nullptr;

905 }

906 LastPro->clear();

908 } else {

911 }

915 E = Prolog->instr_rend();

916 I != E && numAdded > 0; ++I, --numAdded)

917 updateInstruction(&*I, false, j, 0, VRMap);

918 }

919

920 if (NewKernel) {

921 LoopInfo->setPreheader(PrologBBs[MaxIter]);

922 LoopInfo->adjustTripCount(-(MaxIter + 1));

923 }

924}

925

926

927

928bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {

932 bool OffsetIsScalable;

934 return false;

935

936

937 if (OffsetIsScalable)

938 return false;

939

940 if (!BaseOp->isReg())

941 return false;

942

944

946

948 if (BaseDef && BaseDef->isPHI()) {

950 BaseDef = MRI.getVRegDef(BaseReg);

951 }

952 if (!BaseDef)

953 return false;

954

955 int D = 0;

957 return false;

958

959 Delta = D;

960 return true;

961}

962

963

964

965

966void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,

968 unsigned Num) {

969 if (Num == 0)

970 return;

971

972

974 return;

977

978 if (MMO->isVolatile() || MMO->isAtomic() ||

979 (MMO->isInvariant() && MMO->isDereferenceable()) ||

980 (!MMO->getValue())) {

982 continue;

983 }

984 unsigned Delta;

985 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {

986 int64_t AdjOffset = Delta * Num;

989 } else {

992 }

993 }

995}

996

997

998

1000 unsigned CurStageNum,

1001 unsigned InstStageNum) {

1003 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);

1004 return NewMI;

1005}

1006

1007

1008

1009

1010MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(

1011 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {

1013 auto It = InstrChanges.find(OldMI);

1014 if (It != InstrChanges.end()) {

1015 std::pair<unsigned, int64_t> RegAndOffset = It->second;

1016 unsigned BasePos, OffsetPos;

1018 return nullptr;

1020 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);

1021 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)

1022 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);

1024 }

1025 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);

1026 return NewMI;

1027}

1028

1029

1030

1031void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,

1032 bool LastDef,

1033 unsigned CurStageNum,

1034 unsigned InstrStageNum,

1035 ValueMapTy *VRMap) {

1038 continue;

1040 if (MO.isDef()) {

1041

1043 Register NewReg = MRI.createVirtualRegister(RC);

1045 VRMap[CurStageNum][reg] = NewReg;

1046 if (LastDef)

1048 } else if (MO.isUse()) {

1050

1051 int DefStageNum = Schedule.getStage(Def);

1052 unsigned StageNum = CurStageNum;

1053 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {

1054

1055 unsigned StageDiff = (InstrStageNum - DefStageNum);

1056

1057 StageNum -= StageDiff;

1058 }

1059 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())

1060 MO.setReg(It->second);

1061 }

1062 }

1063}

1064

1065

1066

1067

1068MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {

1071 while (Def->isPHI()) {

1072 if (!Visited.insert(Def).second)

1073 break;

1074 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)

1075 if (Def->getOperand(i + 1).getMBB() == BB) {

1076 Def = MRI.getVRegDef(Def->getOperand(i).getReg());

1077 break;

1078 }

1079 }

1080 return Def;

1081}

1082

1083

1084unsigned ModuloScheduleExpander::getPrevMapVal(

1085 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,

1087 unsigned PrevVal = 0;

1088 if (StageNum > PhiStage) {

1090 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))

1091

1092 PrevVal = VRMap[StageNum - 1][LoopVal];

1093 else if (VRMap[StageNum].count(LoopVal))

1094

1095

1096 PrevVal = VRMap[StageNum][LoopVal];

1097 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)

1098

1099 PrevVal = LoopVal;

1100 else if (StageNum == PhiStage + 1)

1101

1103 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)

1104

1105 PrevVal =

1106 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),

1107 LoopStage, VRMap, BB);

1108 }

1109 return PrevVal;

1110}

1111

1112

1113

1114

1115

1116void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,

1117 unsigned StageNum,

1118 ValueMapTy *VRMap,

1119 InstrMapTy &InstrMap) {

1120 for (auto &PHI : BB->phis()) {

1121 unsigned InitVal = 0;

1122 unsigned LoopVal = 0;

1124 Register PhiDef = PHI.getOperand(0).getReg();

1125

1126 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));

1127 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));

1128 unsigned NumPhis = getStagesForPhi(PhiDef);

1129 if (NumPhis > StageNum)

1130 NumPhis = StageNum;

1131 for (unsigned np = 0; np <= NumPhis; ++np) {

1132 unsigned NewVal =

1133 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);

1134 if (!NewVal)

1135 NewVal = InitVal;

1136 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,

1137 NewVal);

1138 }

1139 }

1140}

1141

1142

1143

1144

1145void ModuloScheduleExpander::rewriteScheduledInstr(

1146 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,

1147 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,

1148 unsigned PrevReg) {

1150 int StagePhi = Schedule.getStage(Phi) + PhiNum;

1151

1152

1157 continue;

1160 continue;

1162 continue;

1163 }

1165 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");

1167 int StageSched = Schedule.getStage(OrigMI);

1168 int CycleSched = Schedule.getCycle(OrigMI);

1169 unsigned ReplaceReg = 0;

1170

1171 if (StagePhi == StageSched && Phi->isPHI()) {

1172 int CyclePhi = Schedule.getCycle(Phi);

1173 if (PrevReg && InProlog)

1174 ReplaceReg = PrevReg;

1175 else if (PrevReg && !isLoopCarried(*Phi) &&

1176 (CyclePhi <= CycleSched || OrigMI->isPHI()))

1177 ReplaceReg = PrevReg;

1178 else

1179 ReplaceReg = NewReg;

1180 }

1181

1182

1183 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))

1184 ReplaceReg = NewReg;

1185 if (StagePhi > StageSched && Phi->isPHI())

1186 ReplaceReg = NewReg;

1187 if (!InProlog && Phi->isPHI() && StagePhi < StageSched)

1188 ReplaceReg = NewReg;

1189 if (ReplaceReg) {

1191 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));

1192 if (NRC)

1193 UseOp.setReg(ReplaceReg);

1194 else {

1195 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));

1197 SplitReg)

1198 .addReg(ReplaceReg);

1199 UseOp.setReg(SplitReg);

1200 }

1201 }

1202 }

1203}

1204

1205bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {

1206 if (Phi.isPHI())

1207 return false;

1208 int DefCycle = Schedule.getCycle(&Phi);

1209 int DefStage = Schedule.getStage(&Phi);

1210

1211 unsigned InitVal = 0;

1212 unsigned LoopVal = 0;

1213 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);

1215 if (Use || Use->isPHI())

1216 return true;

1219 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);

1220}

1221

1222

1223

1224

1225

1226

1227

1228

1229namespace {

1230

1231

1233 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {

1234 bool Changed = true;

1235 while (Changed) {

1236 Changed = false;

1239 if (MRI.use_empty(MI.getOperand(0).getReg())) {

1240 if (LIS)

1242 MI.eraseFromParent();

1243 Changed = true;

1244 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {

1246 MRI.constrainRegClass(MI.getOperand(1).getReg(),

1247 MRI.getRegClass(MI.getOperand(0).getReg()));

1248 assert(ConstrainRegClass &&

1249 "Expected a valid constrained register class!");

1250 (void)ConstrainRegClass;

1251 MRI.replaceRegWith(MI.getOperand(0).getReg(),

1252 MI.getOperand(1).getReg());

1253 if (LIS)

1255 MI.eraseFromParent();

1256 Changed = true;

1257 }

1258 }

1259 }

1260}

1261

1262

1263

1264class KernelRewriter {

1271

1272

1274

1275

1277

1279

1280

1281

1283

1284

1285

1288

1290

1291public:

1294 void rewrite();

1295};

1296}

1297

1300 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),

1301 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),

1302 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {

1304 if (PreheaderBB == BB)

1305 PreheaderBB = *std::next(BB->pred_begin());

1306}

1307

1308void KernelRewriter::rewrite() {

1309

1310

1311

1312

1316 if (MI->isPHI())

1317 continue;

1318 if (MI->getParent())

1319 MI->removeFromParent();

1321 if (!FirstMI)

1322 FirstMI = MI;

1323 }

1324 assert(FirstMI && "Failed to find first MI in schedule");

1325

1326

1327

1329 if (LIS)

1331 (I++)->eraseFromParent();

1332 }

1333

1334

1336 if (MI.isPHI() || MI.isTerminator())

1337 continue;

1340 continue;

1343 }

1344 }

1345 EliminateDeadPhis(BB, MRI, LIS);

1346

1347

1348

1349

1350

1351 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {

1352 if (MI->isPHI()) {

1353 Register R = MI->getOperand(0).getReg();

1355 continue;

1356 }

1357

1360 if (MI.getParent() != BB) {

1362 break;

1363 }

1364 }

1365 }

1366 }

1367}

1368

1371 if (!Producer)

1372 return Reg;

1373

1374 int ConsumerStage = S.getStage(&MI);

1376

1377

1378 if (Producer->getParent() != BB)

1379

1380 return Reg;

1381 int ProducerStage = S.getStage(Producer);

1382 assert(ConsumerStage != -1 &&

1383 "In-loop consumer should always be scheduled!");

1384 assert(ConsumerStage >= ProducerStage);

1385 unsigned StageDiff = ConsumerStage - ProducerStage;

1386

1387 for (unsigned I = 0; I < StageDiff; ++I)

1388 Reg = phi(Reg);

1389 return Reg;

1390 }

1391

1392

1393

1396 auto LoopProducer = Producer;

1397 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {

1400 LoopProducer = MRI.getUniqueVRegDef(LoopReg);

1401 assert(LoopProducer);

1402 }

1403 int LoopProducerStage = S.getStage(LoopProducer);

1404

1405 std::optional IllegalPhiDefault;

1406

1407 if (LoopProducerStage == -1) {

1408

1409 } else if (LoopProducerStage > ConsumerStage) {

1410

1411

1412

1413

1414#ifndef NDEBUG

1415 int LoopProducerCycle = S.getCycle(LoopProducer);

1416 int ConsumerCycle = S.getCycle(&MI);

1417#endif

1418 assert(LoopProducerCycle <= ConsumerCycle);

1419 assert(LoopProducerStage == ConsumerStage + 1);

1420

1421

1422

1423

1424

1425

1426 IllegalPhiDefault = Defaults.front();

1428 } else {

1429 assert(ConsumerStage >= LoopProducerStage);

1430 int StageDiff = ConsumerStage - LoopProducerStage;

1431 if (StageDiff > 0) {

1432 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()

1433 << " to " << (Defaults.size() + StageDiff) << "\n");

1434

1435

1436

1437 Defaults.resize(Defaults.size() + StageDiff,

1438 Defaults.empty() ? std::optional()

1439 : Defaults.back());

1440 }

1441 }

1442

1443

1444 auto DefaultI = Defaults.rbegin();

1445 while (DefaultI != Defaults.rend())

1446 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));

1447

1448 if (IllegalPhiDefault) {

1449

1450

1451

1452

1453

1454 auto RC = MRI.getRegClass(Reg);

1458 .addReg(*IllegalPhiDefault)

1459 .addMBB(PreheaderBB)

1461 .addMBB(BB);

1462

1463

1464 S.setStage(IllegalPhi, LoopProducerStage);

1465 return R;

1466 }

1467

1468 return LoopReg;

1469}

1470

1471Register KernelRewriter::phi(Register LoopReg, std::optional InitReg,

1473

1474 if (InitReg) {

1475 auto I = Phis.find({LoopReg, *InitReg});

1476 if (I != Phis.end())

1477 return I->second;

1478 } else {

1479 for (auto &KV : Phis) {

1480 if (KV.first.first == LoopReg)

1481 return KV.second;

1482 }

1483 }

1484

1485

1486

1487 auto I = UndefPhis.find(LoopReg);

1488 if (I != UndefPhis.end()) {

1490 if (!InitReg)

1491

1492

1493 return R;

1494

1496 MI->getOperand(1).setReg(*InitReg);

1497 Phis.insert({{LoopReg, *InitReg}, R});

1499 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));

1500 assert(ConstrainRegClass && "Expected a valid constrained register class!");

1501 (void)ConstrainRegClass;

1502 UndefPhis.erase(I);

1503 return R;

1504 }

1505

1506

1507 if (!RC)

1508 RC = MRI.getRegClass(LoopReg);

1510 if (InitReg) {

1512 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));

1513 assert(ConstrainRegClass && "Expected a valid constrained register class!");

1514 (void)ConstrainRegClass;

1515 }

1516 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)

1517 .addReg(InitReg ? *InitReg : undef(RC))

1518 .addMBB(PreheaderBB)

1521 if (!InitReg)

1522 UndefPhis[LoopReg] = R;

1523 else

1524 Phis[{LoopReg, *InitReg}] = R;

1525 return R;

1526}

1527

1530 if (R == 0) {

1531

1532

1533

1534 R = MRI.createVirtualRegister(RC);

1535 auto *InsertBB = &PreheaderBB->getParent()->front();

1536 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),

1537 TII->get(TargetOpcode::IMPLICIT_DEF), R);

1538 }

1539 return R;

1540}

1541

1542namespace {

1543

1544

1545

1546class KernelOperandInfo {

1552

1553public:

1559 while (isRegInLoop(MO)) {

1561 if (MI->isFullCopy()) {

1562 MO = &MI->getOperand(1);

1563 continue;

1564 }

1565 if (MI->isPHI())

1566 break;

1567

1568 if (IllegalPhis.count(MI)) {

1569 MO = &MI->getOperand(3);

1570 continue;

1571 }

1572

1574 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)

1575 : &MI->getOperand(3);

1577 }

1579 }

1580

1582 return PhiDefaults.size() == Other.PhiDefaults.size();

1583 }

1584

1586 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "

1587 << *Source->getParent();

1588 }

1589

1590private:

1593 MRI.getVRegDef(MO->getReg())->getParent() == BB;

1594 }

1595};

1596}

1597

1603 else

1605 for (auto I = BB->begin(), NI = NewBB->begin(); I->isTerminator();

1606 ++I, ++NI) {

1611 }

1612 return NewBB;

1613}

1614

1616 int MinStage) {

1618 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {

1621 if (Stage == -1 || Stage >= MinStage)

1622 continue;

1623

1627

1628

1631 MI->getParent());

1633 }

1634 for (auto &Sub : Subs)

1635 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,

1637 }

1638 if (LIS)

1640 MI->eraseFromParent();

1641 }

1642}

1643

1650 if (MI.isPHI()) {

1651

1652

1654

1655

1656 Register PhiR = MI.getOperand(0).getReg();

1665 Remaps[PhiR] = NR;

1666 }

1667 }

1669 continue;

1670 MI.removeFromParent();

1671 DestBB->insert(InsertPt, &MI);

1674 BlockMIs.erase({SourceBB, KernelMI});

1675 }

1678 assert(MI.getNumOperands() == 3);

1680

1681

1682 if (getStage(Def) == Stage) {

1683 Register PhiReg = MI.getOperand(0).getReg();

1684 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),

1685 nullptr) != -1);

1687 MI.getOperand(0).setReg(PhiReg);

1689 }

1690 }

1691 for (auto *P : PhiToDelete)

1692 P->eraseFromParent();

1694

1695

1698 DestBB->insert(InsertPt, NewMI);

1699 Register OrigR = Phi->getOperand(0).getReg();

1704 Remaps[OrigR] = R;

1708 return R;

1709 };

1712 if (!MO.isReg())

1713 continue;

1714 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())

1715 MO.setReg(It->second);

1716 else {

1717

1718

1720 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {

1723 }

1724 }

1725 }

1726 }

1727}

1728

1735 for (unsigned I = 0; I < distance; ++I) {

1738 unsigned LoopRegIdx = 3, InitRegIdx = 1;

1740 std::swap(LoopRegIdx, InitRegIdx);

1741 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();

1743 }

1744 return CanonicalUseReg;

1745}

1746

1752

1753

1754 LS.reset();

1756 LS[I] = true;

1760 }

1761

1762

1763

1764

1765

1766

1767

1769 EliminateDeadPhis(ExitingBB, MRI, LIS, true);

1770

1771

1772

1773

1774

1775

1776

1777

1778

1779

1780

1781

1782

1783

1784

1785

1790

1791

1792 EliminateDeadPhis(B, MRI, LIS, true);

1795 }

1796 for (size_t I = 0; I < Epilogs.size(); I++) {

1797 LS.reset();

1798 for (size_t J = I; J < Epilogs.size(); J++) {

1799 int Iteration = J;

1801

1802 for (size_t K = Iteration; K > I; K--)

1804 LS[Stage] = true;

1805 }

1808 }

1809

1810

1811

1812

1813 auto PI = Prologs.begin();

1814 auto EI = Epilogs.begin();

1816 for (; PI != Prologs.end(); ++PI, ++EI) {

1818 (*PI)->addSuccessor(*EI);

1820 Register Reg = MI.getOperand(1).getReg();

1822 if (Use && Use->getParent() == Pred) {

1824 if (CanonicalUse->isPHI()) {

1825

1826

1827

1829 }

1831 }

1834 }

1835 }

1836

1837

1842

1843

1845 for (auto I = B->instr_rbegin();

1846 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {

1849 }

1850 }

1852 if (LIS)

1854 MI->eraseFromParent();

1855 }

1857

1858

1860 EliminateDeadPhis(B, MRI, LIS);

1861 EliminateDeadPhis(ExitingBB, MRI, LIS);

1862}

1863

1867 if (Exit == BB)

1869

1872

1873

1876 Register OldR = MI.getOperand(3).getReg();

1880 if (Use.getParent() != BB)

1883 Use->substituteRegister(OldR, R, 0,

1890 }

1892 Exit->replacePhiUsesWith(BB, NewBB);

1894

1898 (void)CanAnalyzeBr;

1899 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");

1904 return NewBB;

1905}

1906

1911 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, nullptr);

1913}

1914

1916 if (MI->isPHI()) {

1917

1918

1919 Register PhiR = MI->getOperand(0).getReg();

1920 Register R = MI->getOperand(3).getReg();

1922 if (RMIStage != -1 && AvailableStages[MI->getParent()].test(RMIStage))

1923 R = MI->getOperand(1).getReg();

1926

1927

1928 MI->getOperand(0).setReg(PhiR);

1930 return;

1931 }

1932

1934 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||

1936

1937 return;

1938

1942

1943

1946 MI->getParent());

1948 }

1949 for (auto &Sub : Subs)

1950 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,

1952 }

1953 if (LIS)

1955 MI->eraseFromParent();

1956}

1957

1959

1960 bool KernelDisposed = false;

1963 ++PI, ++EI, --TC) {

1969 std::optional StaticallyGreater =

1971 if (!StaticallyGreater) {

1972 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");

1973

1975 } else if (*StaticallyGreater == false) {

1976 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");

1977

1978

1979 Prolog->removeSuccessor(Fallthrough);

1981 P.removeOperand(2);

1982 P.removeOperand(1);

1983 }

1985 KernelDisposed = true;

1986 } else {

1987 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");

1988

1991 P.removeOperand(4);

1992 P.removeOperand(3);

1993 }

1994 }

1995 }

1996

1997 if (!KernelDisposed) {

2000 } else {

2002 }

2003}

2004

2007 KR.rewrite();

2008}

2009

2016

2020}

2021

2025

2026

2027

2028 std::string ScheduleDump;

2032

2033

2034

2035 assert(LIS && "Requires LiveIntervals!");

2040 if (!ExpandedKernel) {

2041

2043 return;

2044 }

2045

2047

2048

2050 KR.rewrite();

2052

2053

2054

2057 if (NI->isPHI())

2058 IllegalPhis.insert(&*NI);

2059 }

2060

2061

2062

2064 auto OI = ExpandedKernel->begin();

2066 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {

2067 while (OI->isPHI() || OI->isFullCopy())

2068 ++OI;

2069 while (NI->isPHI() || NI->isFullCopy())

2070 ++NI;

2071 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");

2072

2073 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();

2074 OOpI != OI->operands_end(); ++OOpI, ++NOpI)

2075 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),

2076 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));

2077 }

2078

2079 bool Failed = false;

2080 for (auto &OldAndNew : KOIs) {

2081 if (OldAndNew.first == OldAndNew.second)

2082 continue;

2084 errs() << "Modulo kernel validation error: [\n";

2085 errs() << " [golden] ";

2086 OldAndNew.first.print(errs());

2087 errs() << " ";

2088 OldAndNew.second.print(errs());

2089 errs() << "]\n";

2090 }

2091

2093 errs() << "Golden reference kernel:\n";

2095 errs() << "New kernel:\n";

2097 errs() << ScheduleDump;

2099 "Modulo kernel validation (-pipeliner-experimental-cg) failed");

2100 }

2101

2102

2103

2106}

2107

2110

2111

2113

2114 return NewMI;

2115}

2116

2117

2118

2119

2122 if (Exit->pred_size() == 1)

2123 return Exit;

2124

2127

2130 MF->insert(Loop->getIterator(), NewExit);

2131

2136 FBB = NewExit;

2137 else if (FBB == Loop)

2138 TBB = NewExit;

2139 else

2143 Loop->replaceSuccessor(Exit, NewExit);

2144 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());

2146

2147 Exit->replacePhiUsesWith(Loop, NewExit);

2148

2149 return NewExit;

2150}

2151

2152

2153

2154

2156 int RequiredTC,

2157 InstrMapTy &LastStage0Insts,

2161 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,

2162 LastStage0Insts);

2163

2165

2166

2170 } else {

2172 }

2173}

2174

2175

2176

2177

2178

2179

2180void ModuloScheduleExpanderMVE::generatePipelinedLoop() {

2181

2182

2183

2184

2185

2186

2187

2188

2189

2190

2191

2192

2193

2194

2195

2196

2197

2198

2199

2200

2201

2202

2203

2204

2205

2206

2207

2208

2209

2210

2211

2212

2213

2214

2215

2216

2217

2218

2219

2220

2221

2222

2223

2224

2225

2226

2227

2228

2229

2230

2231

2232

2233

2234

2235

2236

2237

2238

2239

2240

2241

2242

2243

2244

2245

2246

2247

2248

2249

2250

2251

2252

2253

2255 assert(LoopInfo && "Must be able to analyze loop!");

2256

2257 calcNumUnroll();

2258

2264

2270

2272

2275

2279

2282

2283 Prolog->addSuccessor(NewKernel);

2284

2287

2288 Epilog->addSuccessor(NewPreheader);

2289 Epilog->addSuccessor(NewExit);

2290

2291 InstrMapTy LastStage0Insts;

2292 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,

2293 LastStage0Insts, *Prolog, *NewPreheader);

2294

2295

2296

2298 generateProlog(PrologVRMap);

2299 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);

2300 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);

2301}

2302

2303

2304void ModuloScheduleExpanderMVE::updateInstrUse(

2308

2309

2310

2311

2312

2313

2315 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())

2316 continue;

2317 int DiffStage = 0;

2318 Register OrigReg = UseMO.getReg();

2320 if (!DefInst || DefInst->getParent() != OrigKernel)

2321 continue;

2322 unsigned InitReg = 0;

2323 unsigned DefReg = OrigReg;

2324 if (DefInst->isPHI()) {

2325 ++DiffStage;

2326 unsigned LoopReg;

2327 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);

2328

2329 DefReg = LoopReg;

2330 DefInst = MRI.getVRegDef(LoopReg);

2331 }

2332 unsigned DefStageNum = Schedule.getStage(DefInst);

2333 DiffStage += StageNum - DefStageNum;

2335 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))

2336

2337 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];

2338 else if (!PrevVRMap)

2339

2340

2341 NewReg = InitReg;

2342 else

2343

2344

2345

2346

2347 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];

2348

2350 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));

2351 if (NRC)

2352 UseMO.setReg(NewReg);

2353 else {

2354 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2355 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),

2356 SplitReg)

2358 UseMO.setReg(SplitReg);

2359 }

2360 }

2361}

2362

2363

2364

2367 unsigned InitVal, LoopVal;

2369 if (LoopVal == Reg)

2370 return Φ

2371 }

2372 return nullptr;

2373}

2374

2375

2376void ModuloScheduleExpanderMVE::generatePhi(

2381 int StageNum = Schedule.getStage(OrigMI);

2382 bool UsePrologReg;

2383 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)

2384 UsePrologReg = true;

2385 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)

2386 UsePrologReg = false;

2387 else

2388 return;

2389

2390

2391

2392

2393

2394

2395

2396

2397

2398

2399

2400

2401

2402

2403

2404

2405

2406

2407

2408

2409

2410

2411

2412

2413

2414

2415

2416

2417

2418

2419

2420

2422 if (!DefMO.isReg() || DefMO.isDead())

2423 continue;

2424 Register OrigReg = DefMO.getReg();

2425 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);

2426 if (NewReg == KernelVRMap[UnrollNum].end())

2427 continue;

2429 if (UsePrologReg) {

2430 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;

2431 CorrespondReg = PrologVRMap[PrologNum][OrigReg];

2432 } else {

2434 if (!Phi)

2435 continue;

2436 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);

2437 }

2438

2440 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2442 TII->get(TargetOpcode::PHI), PhiReg)

2443 .addReg(NewReg->second)

2445 .addReg(CorrespondReg)

2447 PhiVRMap[UnrollNum][OrigReg] = PhiReg;

2448 }

2449}

2450

2453 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {

2454 if (Phi.getOperand(Idx).getReg() == OrigReg) {

2455 Phi.getOperand(Idx).setReg(NewReg);

2456 Phi.getOperand(Idx + 1).setMBB(NewMBB);

2457 return;

2458 }

2459 }

2460}

2461

2462

2463void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,

2468 E = MRI.use_end();

2469 I != E; ++I) {

2471 if (O.getParent()->getParent() != OrigKernel &&

2472 O.getParent()->getParent() != Prolog &&

2473 O.getParent()->getParent() != NewKernel &&

2474 O.getParent()->getParent() != Epilog)

2476 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())

2478 }

2479

2480

2481

2482 if (!UsesAfterLoop.empty()) {

2483 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2485 TII->get(TargetOpcode::PHI), PhiReg)

2490

2493

2496 }

2497

2498

2499

2500 if (!LoopPhis.empty()) {

2502 unsigned InitReg, LoopReg;

2503 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);

2504 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));

2506 TII->get(TargetOpcode::PHI), NewInit)

2511 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);

2512 }

2513 }

2514}

2515

2516void ModuloScheduleExpanderMVE::generateProlog(

2518 PrologVRMap.clear();

2521 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;

2522 ++PrologNum) {

2524 if (MI->isPHI())

2525 continue;

2526 int StageNum = Schedule.getStage(MI);

2527 if (StageNum > PrologNum)

2528 continue;

2530 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);

2531 NewMIMap[NewMI] = {PrologNum, StageNum};

2532 Prolog->push_back(NewMI);

2533 }

2534 }

2535

2536 for (auto I : NewMIMap) {

2538 int PrologNum = I.second.first;

2539 int StageNum = I.second.second;

2540 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);

2541 }

2542

2544 dbgs() << "prolog:\n";

2546 });

2547}

2548

2549void ModuloScheduleExpanderMVE::generateKernel(

2552 KernelVRMap.clear();

2553 KernelVRMap.resize(NumUnroll);

2555 PhiVRMap.resize(NumUnroll);

2558 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {

2560 if (MI->isPHI())

2561 continue;

2562 int StageNum = Schedule.getStage(MI);

2564 if (UnrollNum == NumUnroll - 1)

2565 LastStage0Insts[MI] = NewMI;

2566 updateInstrDef(NewMI, KernelVRMap[UnrollNum],

2567 (UnrollNum == NumUnroll - 1 && StageNum == 0));

2568 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);

2569 NewMIMap[NewMI] = {UnrollNum, StageNum};

2571 }

2572 }

2573

2574 for (auto I : NewMIMap) {

2576 int UnrollNum = I.second.first;

2577 int StageNum = I.second.second;

2578 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);

2579 }

2580

2581

2582 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,

2583 *Epilog);

2584

2586 dbgs() << "kernel:\n";

2587 NewKernel->dump();

2588 });

2589}

2590

2591void ModuloScheduleExpanderMVE::generateEpilog(

2594 EpilogVRMap.clear();

2597 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;

2598 ++EpilogNum) {

2600 if (MI->isPHI())

2601 continue;

2602 int StageNum = Schedule.getStage(MI);

2603 if (StageNum <= EpilogNum)

2604 continue;

2606 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);

2607 NewMIMap[NewMI] = {EpilogNum, StageNum};

2608 Epilog->push_back(NewMI);

2609 }

2610 }

2611

2612 for (auto I : NewMIMap) {

2614 int EpilogNum = I.second.first;

2615 int StageNum = I.second.second;

2616 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);

2617 }

2618

2619

2620

2621

2622

2623 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);

2624

2626 dbgs() << "epilog:\n";

2628 });

2629}

2630

2631

2632void ModuloScheduleExpanderMVE::calcNumUnroll() {

2634 NumUnroll = 1;

2637

2639 if (MI->isPHI())

2640 continue;

2641 int StageNum = Schedule.getStage(MI);

2644 continue;

2647 continue;

2648

2649 int NumUnrollLocal = 1;

2651 ++NumUnrollLocal;

2652

2653

2655 }

2656 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);

2657 if (Inst2Idx[MI] <= Inst2Idx[DefMI])

2658 --NumUnrollLocal;

2659 NumUnroll = std::max(NumUnroll, NumUnrollLocal);

2660 }

2661 }

2662 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");

2663}

2664

2665

2666

2667

2668void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,

2669 ValueMapTy &VRMap,

2670 bool LastDef) {

2673 continue;

2676 Register NewReg = MRI.createVirtualRegister(RC);

2678 VRMap[Reg] = NewReg;

2679 if (LastDef)

2680 mergeRegUsesAfterPipeline(Reg, NewReg);

2681 }

2682}

2683

2688

2690

2691 generatePipelinedLoop();

2692}

2693

2694

2696 if (!L.getExitBlock()) {

2697 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");

2698 return false;

2699 }

2700

2703

2704

2705

2708

2709

2713 if (Ref.getParent() != BB || Ref.isPHI()) {

2714 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "

2715 "referenced outside of the loop or by phi.\n");

2716 return false;

2717 }

2718

2719

2720

2721

2722 unsigned InitVal, LoopVal;

2724 if (Register(LoopVal).isVirtual() ||

2725 MRI.getVRegDef(LoopVal)->getParent() != BB) {

2727 dbgs() << "Can not apply MVE expander: A phi source value coming "

2728 "from the loop is not defined in the loop.\n");

2729 return false;

2730 }

2731 if (UsedByPhi.count(LoopVal)) {

2732 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "

2733 "loop is referenced by two or more phis.\n");

2734 return false;

2735 }

2736 UsedByPhi.insert(LoopVal);

2737 }

2738

2739 return true;

2740}

2741

2742

2743

2744

2745

2746

2747

2748

2749

2750

2751

2752

2753

2754

2755namespace {

2757public:

2758 static char ID;

2759

2762 }

2763

2764 bool runOnMachineFunction(MachineFunction &MF) override;

2766

2767 void getAnalysisUsage(AnalysisUsage &AU) const override {

2771 }

2772};

2773}

2774

2775char ModuloScheduleTest::ID = 0;

2776

2778 "Modulo Schedule test pass", false, false)

2783

2784bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {

2785 MachineLoopInfo &MLI = getAnalysis().getLI();

2786 for (auto *L : MLI) {

2787 if (L->getTopBlock() != L->getBottomBlock())

2788 continue;

2789 runOnLoop(MF, *L);

2790 return false;

2791 }

2792 return false;

2793}

2794

2796 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");

2797 std::pair<StringRef, StringRef> StageTokenAndValue =

2798 getToken(StageAndCycle.first, "-");

2799 std::pair<StringRef, StringRef> CycleTokenAndValue =

2800 getToken(StageAndCycle.second, "-");

2801 if (StageTokenAndValue.first != "Stage" ||

2802 CycleTokenAndValue.first != "_Cycle") {

2804 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");

2805 return;

2806 }

2807

2808 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);

2809 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);

2810

2811 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";

2812}

2813

2815 LiveIntervals &LIS = getAnalysis().getLIS();

2817 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";

2818

2820 std::vector<MachineInstr *> Instrs;

2822 if (MI.isTerminator())

2823 continue;

2824 Instrs.push_back(&MI);

2826 dbgs() << "Parsing post-instr symbol for " << MI;

2828 }

2829 }

2830

2832 std::move(Stage));

2835 MSE.expand();

2836 MSE.cleanup();

2837}

2838

2839

2840

2841

2842

2849 MI->setPostInstrSymbol(MF, Sym);

2850 }

2851}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

MachineInstrBuilder MachineInstrBuilder & DefMI

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static const Function * getParent(const Value *V)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

std::optional< std::vector< StOtherPiece > > Other

DenseMap< Block *, BlockRelaxAux > Blocks

global merge Global merge function pass

const HexagonInstrInfo * TII

static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)

Return the register values for the operands of a Phi instruction.

unsigned const TargetRegisterInfo * TRI

This file provides utility analysis objects describing memory locations.

static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)

Create a dedicated exit for Loop.

static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)

Remove the incoming block from the Phis in a basic block.

static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)

Replace all uses of FromReg that appear outside the specified basic block with ToReg.

static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)

static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)

Return a phi if Reg is referenced by the phi.

static void parseSymbolString(StringRef S, int &Cycle, int &Stage)

static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))

static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)

Return true if the register has a use that occurs outside the specified loop.

static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)

Return the Phi register value that comes from the incoming block.

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

const SmallVectorImpl< MachineOperand > & Cond

Remove Loads Into Fake Uses

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

This class represents an Operation in the Expression.

iterator find(const_arg_type_t< KeyT > Val)

DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator

Implements a dense probed hash-table based set.

A possibly irreducible generalization of a Loop.

unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override

Remove the branching code at the end of the specific MBB.

bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override

Insert branch code into the end of the specified MachineBasicBlock.

bool hasInterval(Register Reg) const

void insertMBBInMaps(MachineBasicBlock *MBB)

void RemoveMachineInstrFromMaps(MachineInstr &MI)

LiveInterval & createEmptyInterval(Register Reg)

Interval creation.

static constexpr LocationSize beforeOrAfterPointer()

Any location before or after the base pointer (but still within the underlying object).

BlockT * getExitBlock() const

If getExitBlocks would return exactly one block, return that block.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

Represents a single loop in the control flow graph.

MCSymbol * getOrCreateSymbol(const Twine &Name)

Lookup the symbol inside with the specified Name.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)

Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...

void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)

Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.

instr_iterator instr_begin()

void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)

Replace successor OLD with NEW and update probability info.

void transferSuccessors(MachineBasicBlock *FromMBB)

Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...

instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

iterator_range< iterator > phis()

Returns a range that iterates over the phis in the basic block.

reverse_instr_iterator instr_rbegin()

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

void push_back(MachineInstr *MI)

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

succ_iterator succ_begin()

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator

void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)

Remove successor from the successors list of this MachineBasicBlock.

iterator getFirstNonPHI()

Returns a pointer to the first instruction in this block that is not a PHINode instruction.

void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const

reverse_instr_iterator instr_rend()

Instructions::iterator instr_iterator

pred_iterator pred_begin()

void eraseFromParent()

This method unlinks 'this' from the containing function and deletes it.

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< iterator > terminators()

instr_iterator getFirstInstrTerminator()

Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.

Instructions::reverse_iterator reverse_instr_iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MCContext & getContext() const

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

MachineInstr * CloneMachineInstr(const MachineInstr *Orig)

Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...

const MachineBasicBlock & front() const

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineBasicBlock - Allocate a new MachineBasicBlock.

void insert(iterator MBBI, MachineBasicBlock *MBB)

Register getReg(unsigned Idx) const

Get the register for the operand index.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

unsigned getNumOperands() const

Retuns the total number of operands.

bool memoperands_empty() const

Return true if we don't have any memory operands which described the memory access done by this instr...

void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)

Assign this MachineInstr's memory reference descriptor list.

void dropMemRefs(MachineFunction &MF)

Clear this MachineInstr's memory reference descriptor list.

iterator_range< mop_iterator > operands()

iterator_range< mop_iterator > defs()

Returns a range over all explicit operands that are register definitions.

ArrayRef< MachineMemOperand * > memoperands() const

Access to memory operands of the instruction.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

iterator_range< filtered_mop_iterator > all_defs()

Returns an iterator range over all operands that are (explicit or implicit) register defs.

MachineBasicBlock * getTopBlock()

Return the "top" block in the loop, which is the first block in the linear layout,...

A description of a memory reference used in the backend.

MachineOperand class - Representation of each machine instruction operand.

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

MachineBasicBlock * getMBB() const

void setReg(Register Reg)

Change the register this operand corresponds to.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

void setMBB(MachineBasicBlock *MBB)

Register getReg() const

getReg - Returns the register number.

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)

defusechain_iterator - This class provides iterator support for machine operands in the function that...

reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

const TargetRegisterClass * getRegClass(Register Reg) const

Return the register class of the specified virtual register.

MachineInstr * getVRegDef(Register Reg) const

getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...

use_instr_iterator use_instr_begin(Register RegNo) const

Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")

createVirtualRegister - Create and return a new virtual register in the function with the specified r...

static use_instr_iterator use_instr_end()

void setRegClass(Register Reg, const TargetRegisterClass *RC)

setRegClass - Set the register class of the specified virtual register.

iterator_range< use_instr_iterator > use_instructions(Register Reg) const

const TargetRegisterInfo * getTargetRegisterInfo() const

use_iterator use_begin(Register RegNo) const

static use_iterator use_end()

iterator_range< use_iterator > use_operands(Register Reg) const

void replaceRegWith(Register FromReg, Register ToReg)

replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.

MachineInstr * getUniqueVRegDef(Register Reg) const

getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...

static bool canApply(MachineLoop &L)

Check if ModuloScheduleExpanderMVE can be applied to L.

The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...

MachineBasicBlock * getRewrittenKernel()

Returns the newly rewritten kernel block, or nullptr if this was optimized away.

void cleanup()

Performs final cleanup after expansion.

void expand()

Performs the actual expansion.

void annotate()

Performs the annotation.

Represents a schedule for a single-block loop.

int getNumStages() const

Return the number of stages contained in this schedule, which is the largest stage index + 1.

MachineLoop * getLoop() const

Return the single-block loop being scheduled.

ArrayRef< MachineInstr * > getInstructions()

Return the rescheduled instructions in order.

void print(raw_ostream &OS)

int getCycle(MachineInstr *MI)

Return the cycle that MI is scheduled at, or -1.

void setStage(MachineInstr *MI, int MIStage)

Set the stage of a newly created instruction.

int getStage(MachineInstr *MI)

Return the stage that MI is scheduled in, or -1.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

std::deque< MachineBasicBlock * > PeeledBack

SmallVector< MachineInstr *, 4 > IllegalPhisToDelete

Illegal phis that need to be deleted once we re-link stages.

DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs

CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.

SmallVector< MachineBasicBlock *, 4 > Prologs

All prolog and epilog blocks.

MachineBasicBlock * peelKernel(LoopPeelDirection LPD)

Peels one iteration of the rewritten kernel (BB) in the specified direction.

ModuloSchedule & Schedule

std::deque< MachineBasicBlock * > PeeledFront

State passed from peelKernel to peelPrologAndEpilogs().

unsigned getStage(MachineInstr *MI)

Helper to get the stage of an instruction in the schedule.

void rewriteUsesOf(MachineInstr *MI)

Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).

SmallVector< MachineBasicBlock *, 4 > Epilogs

DenseMap< MachineBasicBlock *, BitVector > AvailableStages

For every block, the stages that are available.

DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs

Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)

All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...

MachineBasicBlock * Preheader

The original loop preheader.

void rewriteKernel()

Converts BB from the original loop body to the rewritten, pipelined steady-state.

DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration

When peeling the epilogue keep track of the distance between the phi nodes and the kernel.

DenseMap< MachineBasicBlock *, BitVector > LiveStages

For every block, the stages that are produced.

const TargetInstrInfo * TII

void filterInstructions(MachineBasicBlock *MB, int MinStage)

void peelPrologAndEpilogs()

Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.

MachineBasicBlock * BB

The original loop block that gets rewritten in-place.

void fixupBranches()

Insert branches between prologs, kernel and epilogs.

MachineBasicBlock * CreateLCSSAExitingBlock()

Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...

void validateAgainstModuloScheduleExpander()

Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...

Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)

Helper function to find the right canonical register for a phi instruction coming from a peeled out p...

MachineRegisterInfo & MRI

void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)

Wrapper class representing virtual and physical registers.

constexpr bool isValid() const

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

iterator erase(const_iterator CI)

void push_back(const T &Elt)

reverse_iterator rbegin()

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

TargetInstrInfo - Interface to description of machine instruction set.

virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const

Reverses the branch condition of the specified condition list, returning false on success and true if...

virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const

Remove the branching code at the end of the specific MBB.

virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const

Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...

virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const

Return true if the instruction contains a base register and offset.

virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const

Insert branch code into the end of the specified MachineBasicBlock.

unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const

virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const

If the instruction is an increment of a constant value, return the amount.

bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const

Get the base operand and byte offset of an instruction that reads/writes memory.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

virtual const TargetInstrInfo * getInstrInfo() const

Target - Wrapper for Target specific information.

A Use represents the edge between a Value definition and its users.

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

A raw_ostream that writes to an SmallVector or SmallString.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

NodeAddr< PhiNode * > Phi

NodeAddr< DefNode * > Def

const_iterator end(StringRef path LLVM_LIFETIME_BOUND)

Get end iterator over path.

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)

Peels a single block loop.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

testing::Matcher< const detail::ErrorHolder & > Failed()

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

auto reverse(ContainerTy &&C)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

@ Ref

The access may reference the value stored in memory.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

OutputIt copy(R &&Range, OutputIt Out)

void initializeModuloScheduleTestPass(PassRegistry &)

@ LPD_Back

Peel the last iteration of the loop.

@ LPD_Front

Peel the first iteration of the loop.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...