LLVM: lib/CodeGen/ModuloSchedule.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

21

22#define DEBUG_TYPE "pipeliner"

23using namespace llvm;

24

27 cl::desc("Swap target blocks of a conditional branch for MVE expander"));

28

32}

33

34

35

36

37

38

39

41 unsigned &InitVal, unsigned &LoopVal) {

42 assert(Phi.isPHI() && "Expecting a Phi.");

43

44 InitVal = 0;

45 LoopVal = 0;

46 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

47 if (Phi.getOperand(i + 1).getMBB() != Loop)

48 InitVal = Phi.getOperand(i).getReg();

49 else

50 LoopVal = Phi.getOperand(i).getReg();

51

52 assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");

53}

54

55

57 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

58 if (Phi.getOperand(i + 1).getMBB() != LoopBB)

59 return Phi.getOperand(i).getReg();

60 return 0;

61}

62

63

65 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)

66 if (Phi.getOperand(i + 1).getMBB() == LoopBB)

67 return Phi.getOperand(i).getReg();

68 return 0;

69}

70

74 if (Preheader == BB)

75 Preheader = *std::next(BB->pred_begin());

76

77

78

83 unsigned MaxDiff = 0;

84 bool PhiIsSwapped = false;

88 unsigned Diff = 0;

89 if (UseStage != -1 && UseStage >= DefStage)

90 Diff = UseStage - DefStage;

91 if (MI->isPHI()) {

92 if (isLoopCarried(*MI))

93 ++Diff;

94 else

95 PhiIsSwapped = true;

96 }

97 MaxDiff = std::max(Diff, MaxDiff);

98 }

99 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);

100 }

101 }

102

103 generatePipelinedLoop();

104}

105

106void ModuloScheduleExpander::generatePipelinedLoop() {

109

110

112

113 unsigned MaxStageCount = Schedule.getNumStages() - 1;

114

115

116

117

118

119 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];

120

121

122

123

124 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];

125

126 InstrMapTy InstrMap;

127

129

130

131 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);

134

135

136

138 if (CI->isPHI())

139 continue;

140 unsigned StageNum = Schedule.getStage(CI);

141 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);

142 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);

144 InstrMap[NewMI] = CI;

145 }

146

147

148

151 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);

153 InstrMap[NewMI] = &MI;

154 }

155

156 NewKernel = KernelBB;

159

160 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,

161 InstrMap, MaxStageCount, MaxStageCount, false);

162 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,

163 InstrMap, MaxStageCount, MaxStageCount, false);

164

166

168

169 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,

170 PrologBBs);

171

172

173

174 splitLifetimes(KernelBB, EpilogBBs);

175

176

177 removeDeadInstructions(KernelBB, EpilogBBs);

178

179

180 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

181

182 delete[] VRMap;

183 delete[] VRMapPhi;

184}

185

187

188 for (auto &I : *BB)

190 BB->clear();

191 BB->eraseFromParent();

192}

193

194

195void ModuloScheduleExpander::generateProlog(unsigned LastStage,

197 ValueMapTy *VRMap,

198 MBBVectorTy &PrologBBs) {

200 InstrMapTy InstrMap;

201

202

203

204

205 for (unsigned i = 0; i < LastStage; ++i) {

206

207

213 PredBB = NewBB;

215

216

217

218 for (int StageNum = i; StageNum >= 0; --StageNum) {

221 BBI != BBE; ++BBI) {

222 if (Schedule.getStage(&*BBI) == StageNum) {

223 if (BBI->isPHI())

224 continue;

226 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);

227 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);

229 InstrMap[NewMI] = &*BBI;

230 }

231 }

232 }

233 rewritePhiValues(NewBB, i, VRMap, InstrMap);

235 dbgs() << "prolog:\n";

236 NewBB->dump();

237 });

238 }

239

241

242

243

244 unsigned numBranches = TII->removeBranch(*Preheader);

245 if (numBranches) {

248 }

249}

250

251

252

253

254void ModuloScheduleExpander::generateEpilog(

256 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,

257 MBBVectorTy &PrologBBs) {

258

259

263 assert(!checkBranch && "generateEpilog must be able to analyze the branch");

264 if (checkBranch)

265 return;

266

268 if (*LoopExitI == KernelBB)

269 ++LoopExitI;

270 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");

272

275 InstrMapTy InstrMap;

276

277

278

279

280 int EpilogStage = LastStage + 1;

281 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {

285

289

290 if (EpilogStart == LoopExitBB)

291 EpilogStart = NewBB;

292

293

294

295 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {

296 for (auto &BBI : *BB) {

297 if (BBI.isPHI())

298 continue;

300 if ((unsigned)Schedule.getStage(In) == StageNum) {

301

302

303 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);

304 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);

306 InstrMap[NewMI] = In;

307 }

308 }

309 }

310 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,

311 InstrMap, LastStage, EpilogStage, i == 1);

312 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,

313 InstrMap, LastStage, EpilogStage, i == 1);

314 PredBB = NewBB;

315

317 dbgs() << "epilog:\n";

318 NewBB->dump();

319 });

320 }

321

322

324

325

326

328 assert((OrigBB == TBB || OrigBB == FBB) &&

329 "Unable to determine looping branch direction");

330 if (OrigBB != TBB)

332 else

334

335 if (EpilogBBs.size() > 0) {

339 }

340}

341

342

343

350 if (O.getParent()->getParent() != MBB)

351 O.setReg(ToReg);

354}

355

356

357

361 if (MO.getParent()->getParent() != BB)

362 return true;

363 return false;

364}

365

366

367

368

369void ModuloScheduleExpander::generateExistingPhis(

371 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,

372 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {

373

374

375

376 unsigned PrologStage = 0;

377 unsigned PrevStage = 0;

378 bool InKernel = (LastStageNum == CurStageNum);

379 if (InKernel) {

380 PrologStage = LastStageNum - 1;

381 PrevStage = CurStageNum;

382 } else {

383 PrologStage = LastStageNum - (CurStageNum - LastStageNum);

384 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;

385 }

386

388 BBE = BB->getFirstNonPHI();

389 BBI != BBE; ++BBI) {

390 Register Def = BBI->getOperand(0).getReg();

391

392 unsigned InitVal = 0;

393 unsigned LoopVal = 0;

394 getPhiRegs(*BBI, BB, InitVal, LoopVal);

395

396 unsigned PhiOp1 = 0;

397

398

399 unsigned PhiOp2 = LoopVal;

400 if (VRMap[LastStageNum].count(LoopVal))

401 PhiOp2 = VRMap[LastStageNum][LoopVal];

402

403 int StageScheduled = Schedule.getStage(&*BBI);

405 unsigned NumStages = getStagesForReg(Def, CurStageNum);

406 if (NumStages == 0) {

407

408

409 unsigned NewReg = VRMap[PrevStage][LoopVal];

410 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,

411 InitVal, NewReg);

412 if (VRMap[CurStageNum].count(LoopVal))

413 VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];

414 }

415

416

417

418

419 unsigned MaxPhis = PrologStage + 2;

420 if (!InKernel && (int)PrologStage <= LoopValStage)

421 MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);

422 unsigned NumPhis = std::min(NumStages, MaxPhis);

423

424 unsigned NewReg = 0;

425 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;

426

427

428

429

430

431 int StageDiff = 0;

432 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&

433 NumPhis == 1)

434 StageDiff = 1;

435

436

437 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)

438 StageDiff = StageScheduled - LoopValStage;

439 for (unsigned np = 0; np < NumPhis; ++np) {

440

441

442

443 if (np > PrologStage || StageScheduled >= (int)LastStageNum)

444 PhiOp1 = InitVal;

445

446 else if (PrologStage >= AccessStage + StageDiff + np &&

447 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)

448 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];

449

450

451 else if (PrologStage >= AccessStage + StageDiff + np) {

452

453

454 PhiOp1 = LoopVal;

456 int Indirects = 1;

457 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {

458 int PhiStage = Schedule.getStage(InstOp1);

459 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)

461 else

464 int PhiOpStage = Schedule.getStage(InstOp1);

465 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);

466 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&

467 VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {

468 PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];

469 break;

470 }

471 ++Indirects;

472 }

473 } else

474 PhiOp1 = InitVal;

475

476

478 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)

480

482 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();

483

484

485

486 if (!InKernel) {

487 int StageDiffAdj = 0;

488 if (LoopValStage != -1 && StageScheduled > LoopValStage)

489 StageDiffAdj = StageScheduled - LoopValStage;

490

491

492 if (np == 0 && PrevStage == LastStageNum &&

493 (StageScheduled != 0 || LoopValStage != 0) &&

494 VRMap[PrevStage - StageDiffAdj].count(LoopVal))

495 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];

496

497

498 else if (np > 0 && PrevStage == LastStageNum &&

499 VRMap[PrevStage - np + 1].count(Def))

500 PhiOp2 = VRMap[PrevStage - np + 1][Def];

501

502 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&

503 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))

504 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];

505

506

507 else if (VRMap[PrevStage - np].count(Def) &&

508 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||

509 (LoopValStage == StageScheduled)))

510 PhiOp2 = VRMap[PrevStage - np][Def];

511 }

512

513

514

515

516

517 if (LoopDefIsPhi) {

518 if (static_cast<int>(PrologStage - np) >= StageScheduled) {

519 int LVNumStages = getStagesForPhi(LoopVal);

520 int StageDiff = (StageScheduled - LoopValStage);

521 LVNumStages -= StageDiff;

522

523 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {

524 NewReg = PhiOp2;

525 unsigned ReuseStage = CurStageNum;

526 if (isLoopCarried(*PhiInst))

527 ReuseStage -= LVNumStages;

528

529

530 if (VRMap[ReuseStage - np].count(LoopVal)) {

531 NewReg = VRMap[ReuseStage - np][LoopVal];

532

533 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,

534 Def, NewReg);

535

536 VRMap[CurStageNum - np][Def] = NewReg;

537 PhiOp2 = NewReg;

538 if (VRMap[LastStageNum - np - 1].count(LoopVal))

539 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

540

541 if (IsLast && np == NumPhis - 1)

543 continue;

544 }

545 }

546 }

547 if (InKernel && StageDiff > 0 &&

548 VRMap[CurStageNum - StageDiff - np].count(LoopVal))

549 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];

550 }

551

554

557 TII->get(TargetOpcode::PHI), NewReg);

560 if (np == 0)

561 InstrMap[NewPhi] = &*BBI;

562

563

564

565

566 unsigned PrevReg = 0;

567 if (InKernel && VRMap[PrevStage - np].count(LoopVal))

568 PrevReg = VRMap[PrevStage - np][LoopVal];

569 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,

570 NewReg, PrevReg);

571

572 if (VRMap[CurStageNum - np].count(Def)) {

573 unsigned R = VRMap[CurStageNum - np][Def];

574 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,

575 NewReg);

576 }

577

578

579

580

581 if (IsLast && np == NumPhis - 1)

583

584

585 if (InKernel)

586 PhiOp2 = NewReg;

587

588

589 VRMap[CurStageNum - np][Def] = NewReg;

590 }

591

592 while (NumPhis++ < NumStages) {

593 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,

594 NewReg, 0);

595 }

596

597

598

599 if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))

601 }

602}

603

604

605

606

607void ModuloScheduleExpander::generatePhis(

609 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,

610 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,

611 bool IsLast) {

612

613

614 unsigned PrologStage = 0;

615 unsigned PrevStage = 0;

616 unsigned StageDiff = CurStageNum - LastStageNum;

617 bool InKernel = (StageDiff == 0);

618 if (InKernel) {

619 PrologStage = LastStageNum - 1;

620 PrevStage = CurStageNum;

621 } else {

622 PrologStage = LastStageNum - StageDiff;

623 PrevStage = LastStageNum + StageDiff - 1;

624 }

625

627 BBE = BB->instr_end();

628 BBI != BBE; ++BBI) {

629 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {

632 continue;

633

634 int StageScheduled = Schedule.getStage(&*BBI);

635 assert(StageScheduled != -1 && "Expecting scheduled instruction.");

637 unsigned NumPhis = getStagesForReg(Def, CurStageNum);

638

639

640

641 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&

643 NumPhis = 1;

644 if (!InKernel && (unsigned)StageScheduled > PrologStage)

645 continue;

646

647 unsigned PhiOp2;

648 if (InKernel) {

649 PhiOp2 = VRMap[PrevStage][Def];

651 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)

653 }

654

655

656 if (NumPhis > PrologStage + 1 - StageScheduled)

657 NumPhis = PrologStage + 1 - StageScheduled;

658 for (unsigned np = 0; np < NumPhis; ++np) {

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681 unsigned PhiOp1 = VRMap[PrologStage][Def];

682 if (np <= PrologStage)

683 PhiOp1 = VRMap[PrologStage - np][Def];

684 if (!InKernel) {

685 if (PrevStage == LastStageNum && np == 0)

686 PhiOp2 = VRMap[LastStageNum][Def];

687 else

688 PhiOp2 = VRMapPhi[PrevStage - np][Def];

689 }

690

693

696 TII->get(TargetOpcode::PHI), NewReg);

699 if (np == 0)

700 InstrMap[NewPhi] = &*BBI;

701

702

703

704 if (InKernel) {

705 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,

706 NewReg);

707 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,

708 NewReg);

709

710 PhiOp2 = NewReg;

711 VRMapPhi[PrevStage - np - 1][Def] = NewReg;

712 } else {

713 VRMapPhi[CurStageNum - np][Def] = NewReg;

714 if (np == NumPhis - 1)

715 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,

716 NewReg);

717 }

718 if (IsLast && np == NumPhis - 1)

720 }

721 }

722 }

723}

724

725

726

727

728

729void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,

730 MBBVectorTy &EpilogBBs) {

731

732

736 MI != ME;) {

737

738 if (MI->isInlineAsm()) {

739 ++MI;

740 continue;

741 }

742 bool SawStore = false;

743

744

745 if (MI->isSafeToMove(SawStore) && MI->isPHI()) {

746 ++MI;

747 continue;

748 }

749 bool used = true;

752

755 if (used)

756 break;

757 continue;

758 }

759 unsigned realUses = 0;

761

762

763 if (U.getParent()->getParent() != BB) {

764 realUses++;

765 used = true;

766 break;

767 }

768 }

769 if (realUses > 0)

770 break;

771 used = false;

772 }

773 if (!used) {

775 MI++->eraseFromParent();

776 continue;

777 }

778 ++MI;

779 }

780

781

783 Register reg = MI.getOperand(0).getReg();

786 MI.eraseFromParent();

787 }

788 }

789}

790

791

792

793

794

795

796

797

798

799

800

801void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,

802 MBBVectorTy &EpilogBBs) {

804 for (auto &PHI : KernelBB->phis()) {

806

807

810 I != E; ++I) {

811 if (I->isPHI() && I->getParent() == KernelBB) {

812

814 if (!LCDef)

815 continue;

817 if (MI || MI->getParent() != KernelBB || MI->isPHI())

818 continue;

819

820

821 unsigned SplitReg = 0;

824 if (BBJ.readsRegister(Def, nullptr)) {

825

826 if (SplitReg == 0) {

828 BuildMI(*KernelBB, MI, MI->getDebugLoc(),

829 TII->get(TargetOpcode::COPY), SplitReg)

831 }

832 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);

833 }

834 if (!SplitReg)

835 continue;

836

837 for (auto &Epilog : EpilogBBs)

839 if (I.readsRegister(Def, nullptr))

840 I.substituteRegister(Def, SplitReg, 0, *TRI);

841 break;

842 }

843 }

844 }

845}

846

847

850 if (MI.isPHI())

851 break;

852 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)

853 if (MI.getOperand(i + 1).getMBB() == Incoming) {

854 MI.removeOperand(i + 1);

855 MI.removeOperand(i);

856 break;

857 }

858 }

859}

860

861

862

863

864void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,

865 MBBVectorTy &PrologBBs,

867 MBBVectorTy &EpilogBBs,

868 ValueMapTy *VRMap) {

869 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");

872

873

874

876 unsigned MaxIter = PrologBBs.size() - 1;

877 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {

878

879

882

884 std::optional StaticallyGreater =

886 unsigned numAdded = 0;

887 if (!StaticallyGreater) {

890 } else if (*StaticallyGreater == false) {

892 Prolog->removeSuccessor(LastPro);

896

897 if (LastPro != LastEpi) {

898 LastEpi->clear();

900 }

901 if (LastPro == KernelBB) {

903 NewKernel = nullptr;

904 }

905 LastPro->clear();

907 } else {

910 }

914 E = Prolog->instr_rend();

915 I != E && numAdded > 0; ++I, --numAdded)

916 updateInstruction(&*I, false, j, 0, VRMap);

917 }

918

919 if (NewKernel) {

920 LoopInfo->setPreheader(PrologBBs[MaxIter]);

921 LoopInfo->adjustTripCount(-(MaxIter + 1));

922 }

923}

924

925

926

927bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {

931 bool OffsetIsScalable;

933 return false;

934

935

936 if (OffsetIsScalable)

937 return false;

938

939 if (!BaseOp->isReg())

940 return false;

941

943

945

947 if (BaseDef && BaseDef->isPHI()) {

949 BaseDef = MRI.getVRegDef(BaseReg);

950 }

951 if (!BaseDef)

952 return false;

953

954 int D = 0;

956 return false;

957

958 Delta = D;

959 return true;

960}

961

962

963

964

965void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,

967 unsigned Num) {

968 if (Num == 0)

969 return;

970

971

973 return;

976

977 if (MMO->isVolatile() || MMO->isAtomic() ||

978 (MMO->isInvariant() && MMO->isDereferenceable()) ||

979 (!MMO->getValue())) {

981 continue;

982 }

983 unsigned Delta;

984 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {

985 int64_t AdjOffset = Delta * Num;

988 } else {

991 }

992 }

994}

995

996

997

999 unsigned CurStageNum,

1000 unsigned InstStageNum) {

1002 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);

1003 return NewMI;

1004}

1005

1006

1007

1008

1009MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(

1010 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {

1012 auto It = InstrChanges.find(OldMI);

1013 if (It != InstrChanges.end()) {

1014 std::pair<unsigned, int64_t> RegAndOffset = It->second;

1015 unsigned BasePos, OffsetPos;

1017 return nullptr;

1019 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);

1020 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)

1021 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);

1023 }

1024 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);

1025 return NewMI;

1026}

1027

1028

1029

1030void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,

1031 bool LastDef,

1032 unsigned CurStageNum,

1033 unsigned InstrStageNum,

1034 ValueMapTy *VRMap) {

1037 continue;

1039 if (MO.isDef()) {

1040

1042 Register NewReg = MRI.createVirtualRegister(RC);

1044 VRMap[CurStageNum][reg] = NewReg;

1045 if (LastDef)

1047 } else if (MO.isUse()) {

1049

1050 int DefStageNum = Schedule.getStage(Def);

1051 unsigned StageNum = CurStageNum;

1052 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {

1053

1054 unsigned StageDiff = (InstrStageNum - DefStageNum);

1055

1056 StageNum -= StageDiff;

1057 }

1058 if (VRMap[StageNum].count(reg))

1059 MO.setReg(VRMap[StageNum][reg]);

1060 }

1061 }

1062}

1063

1064

1065

1066

1067MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {

1070 while (Def->isPHI()) {

1071 if (!Visited.insert(Def).second)

1072 break;

1073 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)

1074 if (Def->getOperand(i + 1).getMBB() == BB) {

1075 Def = MRI.getVRegDef(Def->getOperand(i).getReg());

1076 break;

1077 }

1078 }

1079 return Def;

1080}

1081

1082

1083unsigned ModuloScheduleExpander::getPrevMapVal(

1084 unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,

1086 unsigned PrevVal = 0;

1087 if (StageNum > PhiStage) {

1089 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))

1090

1091 PrevVal = VRMap[StageNum - 1][LoopVal];

1092 else if (VRMap[StageNum].count(LoopVal))

1093

1094

1095 PrevVal = VRMap[StageNum][LoopVal];

1096 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)

1097

1098 PrevVal = LoopVal;

1099 else if (StageNum == PhiStage + 1)

1100

1102 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)

1103

1104 PrevVal =

1105 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),

1106 LoopStage, VRMap, BB);

1107 }

1108 return PrevVal;

1109}

1110

1111

1112

1113

1114

1115void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,

1116 unsigned StageNum,

1117 ValueMapTy *VRMap,

1118 InstrMapTy &InstrMap) {

1119 for (auto &PHI : BB->phis()) {

1120 unsigned InitVal = 0;

1121 unsigned LoopVal = 0;

1123 Register PhiDef = PHI.getOperand(0).getReg();

1124

1125 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));

1126 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));

1127 unsigned NumPhis = getStagesForPhi(PhiDef);

1128 if (NumPhis > StageNum)

1129 NumPhis = StageNum;

1130 for (unsigned np = 0; np <= NumPhis; ++np) {

1131 unsigned NewVal =

1132 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);

1133 if (!NewVal)

1134 NewVal = InitVal;

1135 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,

1136 NewVal);

1137 }

1138 }

1139}

1140

1141

1142

1143

1144void ModuloScheduleExpander::rewriteScheduledInstr(

1145 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,

1146 unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,

1147 unsigned PrevReg) {

1149 int StagePhi = Schedule.getStage(Phi) + PhiNum;

1150

1151

1156 continue;

1159 continue;

1161 continue;

1162 }

1164 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");

1166 int StageSched = Schedule.getStage(OrigMI);

1167 int CycleSched = Schedule.getCycle(OrigMI);

1168 unsigned ReplaceReg = 0;

1169

1170 if (StagePhi == StageSched && Phi->isPHI()) {

1171 int CyclePhi = Schedule.getCycle(Phi);

1172 if (PrevReg && InProlog)

1173 ReplaceReg = PrevReg;

1174 else if (PrevReg && !isLoopCarried(*Phi) &&

1175 (CyclePhi <= CycleSched || OrigMI->isPHI()))

1176 ReplaceReg = PrevReg;

1177 else

1178 ReplaceReg = NewReg;

1179 }

1180

1181

1182 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))

1183 ReplaceReg = NewReg;

1184 if (StagePhi > StageSched && Phi->isPHI())

1185 ReplaceReg = NewReg;

1186 if (!InProlog && Phi->isPHI() && StagePhi < StageSched)

1187 ReplaceReg = NewReg;

1188 if (ReplaceReg) {

1190 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));

1191 if (NRC)

1192 UseOp.setReg(ReplaceReg);

1193 else {

1194 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));

1196 SplitReg)

1197 .addReg(ReplaceReg);

1198 UseOp.setReg(SplitReg);

1199 }

1200 }

1201 }

1202}

1203

1204bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {

1205 if (Phi.isPHI())

1206 return false;

1207 int DefCycle = Schedule.getCycle(&Phi);

1208 int DefStage = Schedule.getStage(&Phi);

1209

1210 unsigned InitVal = 0;

1211 unsigned LoopVal = 0;

1212 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);

1214 if (Use || Use->isPHI())

1215 return true;

1218 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);

1219}

1220

1221

1222

1223

1224

1225

1226

1227

1228namespace {

1229

1230

1232 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {

1233 bool Changed = true;

1234 while (Changed) {

1235 Changed = false;

1238 if (MRI.use_empty(MI.getOperand(0).getReg())) {

1239 if (LIS)

1241 MI.eraseFromParent();

1242 Changed = true;

1243 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {

1245 MRI.constrainRegClass(MI.getOperand(1).getReg(),

1246 MRI.getRegClass(MI.getOperand(0).getReg()));

1247 assert(ConstrainRegClass &&

1248 "Expected a valid constrained register class!");

1249 (void)ConstrainRegClass;

1250 MRI.replaceRegWith(MI.getOperand(0).getReg(),

1251 MI.getOperand(1).getReg());

1252 if (LIS)

1254 MI.eraseFromParent();

1255 Changed = true;

1256 }

1257 }

1258 }

1259}

1260

1261

1262

1263class KernelRewriter {

1270

1271

1273

1274

1276

1278

1279

1280

1282

1283

1284

1287

1289

1290public:

1293 void rewrite();

1294};

1295}

1296

1299 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),

1300 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),

1301 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {

1303 if (PreheaderBB == BB)

1304 PreheaderBB = *std::next(BB->pred_begin());

1305}

1306

1307void KernelRewriter::rewrite() {

1308

1309

1310

1311

1315 if (MI->isPHI())

1316 continue;

1317 if (MI->getParent())

1318 MI->removeFromParent();

1320 if (!FirstMI)

1321 FirstMI = MI;

1322 }

1323 assert(FirstMI && "Failed to find first MI in schedule");

1324

1325

1326

1328 if (LIS)

1330 (I++)->eraseFromParent();

1331 }

1332

1333

1335 if (MI.isPHI() || MI.isTerminator())

1336 continue;

1339 continue;

1342 }

1343 }

1344 EliminateDeadPhis(BB, MRI, LIS);

1345

1346

1347

1348

1349

1350 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {

1351 if (MI->isPHI()) {

1352 Register R = MI->getOperand(0).getReg();

1354 continue;

1355 }

1356

1359 if (MI.getParent() != BB) {

1361 break;

1362 }

1363 }

1364 }

1365 }

1366}

1367

1370 if (!Producer)

1371 return Reg;

1372

1373 int ConsumerStage = S.getStage(&MI);

1375

1376

1377 if (Producer->getParent() != BB)

1378

1379 return Reg;

1380 int ProducerStage = S.getStage(Producer);

1381 assert(ConsumerStage != -1 &&

1382 "In-loop consumer should always be scheduled!");

1383 assert(ConsumerStage >= ProducerStage);

1384 unsigned StageDiff = ConsumerStage - ProducerStage;

1385

1386 for (unsigned I = 0; I < StageDiff; ++I)

1387 Reg = phi(Reg);

1388 return Reg;

1389 }

1390

1391

1392

1395 auto LoopProducer = Producer;

1396 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {

1399 LoopProducer = MRI.getUniqueVRegDef(LoopReg);

1400 assert(LoopProducer);

1401 }

1402 int LoopProducerStage = S.getStage(LoopProducer);

1403

1404 std::optional IllegalPhiDefault;

1405

1406 if (LoopProducerStage == -1) {

1407

1408 } else if (LoopProducerStage > ConsumerStage) {

1409

1410

1411

1412

1413#ifndef NDEBUG

1414 int LoopProducerCycle = S.getCycle(LoopProducer);

1415 int ConsumerCycle = S.getCycle(&MI);

1416#endif

1417 assert(LoopProducerCycle <= ConsumerCycle);

1418 assert(LoopProducerStage == ConsumerStage + 1);

1419

1420

1421

1422

1423

1424

1425 IllegalPhiDefault = Defaults.front();

1427 } else {

1428 assert(ConsumerStage >= LoopProducerStage);

1429 int StageDiff = ConsumerStage - LoopProducerStage;

1430 if (StageDiff > 0) {

1431 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()

1432 << " to " << (Defaults.size() + StageDiff) << "\n");

1433

1434

1435

1436 Defaults.resize(Defaults.size() + StageDiff,

1437 Defaults.empty() ? std::optional()

1438 : Defaults.back());

1439 }

1440 }

1441

1442

1443 auto DefaultI = Defaults.rbegin();

1444 while (DefaultI != Defaults.rend())

1445 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));

1446

1447 if (IllegalPhiDefault) {

1448

1449

1450

1451

1452

1453 auto RC = MRI.getRegClass(Reg);

1457 .addReg(*IllegalPhiDefault)

1458 .addMBB(PreheaderBB)

1460 .addMBB(BB);

1461

1462

1463 S.setStage(IllegalPhi, LoopProducerStage);

1464 return R;

1465 }

1466

1467 return LoopReg;

1468}

1469

1470Register KernelRewriter::phi(Register LoopReg, std::optional InitReg,

1472

1473 if (InitReg) {

1474 auto I = Phis.find({LoopReg, *InitReg});

1475 if (I != Phis.end())

1476 return I->second;

1477 } else {

1478 for (auto &KV : Phis) {

1479 if (KV.first.first == LoopReg)

1480 return KV.second;

1481 }

1482 }

1483

1484

1485

1486 auto I = UndefPhis.find(LoopReg);

1487 if (I != UndefPhis.end()) {

1489 if (!InitReg)

1490

1491

1492 return R;

1493

1495 MI->getOperand(1).setReg(*InitReg);

1496 Phis.insert({{LoopReg, *InitReg}, R});

1498 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));

1499 assert(ConstrainRegClass && "Expected a valid constrained register class!");

1500 (void)ConstrainRegClass;

1501 UndefPhis.erase(I);

1502 return R;

1503 }

1504

1505

1506 if (!RC)

1507 RC = MRI.getRegClass(LoopReg);

1509 if (InitReg) {

1511 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));

1512 assert(ConstrainRegClass && "Expected a valid constrained register class!");

1513 (void)ConstrainRegClass;

1514 }

1515 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)

1516 .addReg(InitReg ? *InitReg : undef(RC))

1517 .addMBB(PreheaderBB)

1520 if (!InitReg)

1521 UndefPhis[LoopReg] = R;

1522 else

1523 Phis[{LoopReg, *InitReg}] = R;

1524 return R;

1525}

1526

1529 if (R == 0) {

1530

1531

1532

1533 R = MRI.createVirtualRegister(RC);

1534 auto *InsertBB = &PreheaderBB->getParent()->front();

1535 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),

1536 TII->get(TargetOpcode::IMPLICIT_DEF), R);

1537 }

1538 return R;

1539}

1540

1541namespace {

1542

1543

1544

1545class KernelOperandInfo {

1551

1552public:

1558 while (isRegInLoop(MO)) {

1560 if (MI->isFullCopy()) {

1561 MO = &MI->getOperand(1);

1562 continue;

1563 }

1564 if (MI->isPHI())

1565 break;

1566

1567 if (IllegalPhis.count(MI)) {

1568 MO = &MI->getOperand(3);

1569 continue;

1570 }

1571

1573 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)

1574 : &MI->getOperand(3);

1576 }

1578 }

1579

1581 return PhiDefaults.size() == Other.PhiDefaults.size();

1582 }

1583

1585 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "

1586 << *Source->getParent();

1587 }

1588

1589private:

1592 MRI.getVRegDef(MO->getReg())->getParent() == BB;

1593 }

1594};

1595}

1596

1602 else

1604 for (auto I = BB->begin(), NI = NewBB->begin(); I->isTerminator();

1605 ++I, ++NI) {

1610 }

1611 return NewBB;

1612}

1613

1615 int MinStage) {

1617 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {

1620 if (Stage == -1 || Stage >= MinStage)

1621 continue;

1622

1626

1627

1630 MI->getParent());

1632 }

1633 for (auto &Sub : Subs)

1634 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,

1636 }

1637 if (LIS)

1639 MI->eraseFromParent();

1640 }

1641}

1642

1649 if (MI.isPHI()) {

1650

1651

1653

1654

1655 Register PhiR = MI.getOperand(0).getReg();

1664 Remaps[PhiR] = NR;

1665 }

1666 }

1668 continue;

1669 MI.removeFromParent();

1670 DestBB->insert(InsertPt, &MI);

1673 BlockMIs.erase({SourceBB, KernelMI});

1674 }

1677 assert(MI.getNumOperands() == 3);

1679

1680

1681 if (getStage(Def) == Stage) {

1682 Register PhiReg = MI.getOperand(0).getReg();

1683 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),

1684 nullptr) != -1);

1686 MI.getOperand(0).setReg(PhiReg);

1688 }

1689 }

1690 for (auto *P : PhiToDelete)

1691 P->eraseFromParent();

1693

1694

1697 DestBB->insert(InsertPt, NewMI);

1698 Register OrigR = Phi->getOperand(0).getReg();

1703 Remaps[OrigR] = R;

1707 return R;

1708 };

1711 if (!MO.isReg())

1712 continue;

1715 else {

1716

1717

1719 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {

1722 }

1723 }

1724 }

1725 }

1726}

1727

1734 for (unsigned I = 0; I < distance; ++I) {

1737 unsigned LoopRegIdx = 3, InitRegIdx = 1;

1739 std::swap(LoopRegIdx, InitRegIdx);

1740 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();

1742 }

1743 return CanonicalUseReg;

1744}

1745

1751

1752

1753 LS.reset();

1755 LS[I] = true;

1759 }

1760

1761

1762

1763

1764

1765

1766

1768 EliminateDeadPhis(ExitingBB, MRI, LIS, true);

1769

1770

1771

1772

1773

1774

1775

1776

1777

1778

1779

1780

1781

1782

1783

1784

1789

1790

1791 EliminateDeadPhis(B, MRI, LIS, true);

1794 }

1795 for (size_t I = 0; I < Epilogs.size(); I++) {

1796 LS.reset();

1797 for (size_t J = I; J < Epilogs.size(); J++) {

1798 int Iteration = J;

1800

1801 for (size_t K = Iteration; K > I; K--)

1803 LS[Stage] = true;

1804 }

1807 }

1808

1809

1810

1811

1812 auto PI = Prologs.begin();

1813 auto EI = Epilogs.begin();

1815 for (; PI != Prologs.end(); ++PI, ++EI) {

1817 (*PI)->addSuccessor(*EI);

1819 Register Reg = MI.getOperand(1).getReg();

1821 if (Use && Use->getParent() == Pred) {

1823 if (CanonicalUse->isPHI()) {

1824

1825

1826

1828 }

1830 }

1833 }

1834 }

1835

1836

1841

1842

1844 for (auto I = B->instr_rbegin();

1845 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {

1848 }

1849 }

1851 if (LIS)

1853 MI->eraseFromParent();

1854 }

1856

1857

1859 EliminateDeadPhis(B, MRI, LIS);

1860 EliminateDeadPhis(ExitingBB, MRI, LIS);

1861}

1862

1866 if (Exit == BB)

1868

1871

1872

1875 Register OldR = MI.getOperand(3).getReg();

1879 if (Use.getParent() != BB)

1882 Use->substituteRegister(OldR, R, 0,

1889 }

1891 Exit->replacePhiUsesWith(BB, NewBB);

1893

1897 (void)CanAnalyzeBr;

1898 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");

1903 return NewBB;

1904}

1905

1910 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, nullptr);

1912}

1913

1915 if (MI->isPHI()) {

1916

1917

1918 Register PhiR = MI->getOperand(0).getReg();

1919 Register R = MI->getOperand(3).getReg();

1921 if (RMIStage != -1 && AvailableStages[MI->getParent()].test(RMIStage))

1922 R = MI->getOperand(1).getReg();

1925

1926

1927 MI->getOperand(0).setReg(PhiR);

1929 return;

1930 }

1931

1933 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||

1935

1936 return;

1937

1941

1942

1945 MI->getParent());

1947 }

1948 for (auto &Sub : Subs)

1949 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, 0,

1951 }

1952 if (LIS)

1954 MI->eraseFromParent();

1955}

1956

1958

1959 bool KernelDisposed = false;

1962 ++PI, ++EI, --TC) {

1968 std::optional StaticallyGreater =

1970 if (!StaticallyGreater) {

1971 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");

1972

1974 } else if (*StaticallyGreater == false) {

1975 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");

1976

1977

1978 Prolog->removeSuccessor(Fallthrough);

1980 P.removeOperand(2);

1981 P.removeOperand(1);

1982 }

1984 KernelDisposed = true;

1985 } else {

1986 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");

1987

1990 P.removeOperand(4);

1991 P.removeOperand(3);

1992 }

1993 }

1994 }

1995

1996 if (!KernelDisposed) {

1999 } else {

2001 }

2002}

2003

2006 KR.rewrite();

2007}

2008

2015

2019}

2020

2024

2025

2026

2027 std::string ScheduleDump;

2031

2032

2033

2034 assert(LIS && "Requires LiveIntervals!");

2039 if (!ExpandedKernel) {

2040

2042 return;

2043 }

2044

2046

2047

2049 KR.rewrite();

2051

2052

2053

2056 if (NI->isPHI())

2057 IllegalPhis.insert(&*NI);

2058 }

2059

2060

2061

2063 auto OI = ExpandedKernel->begin();

2065 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {

2066 while (OI->isPHI() || OI->isFullCopy())

2067 ++OI;

2068 while (NI->isPHI() || NI->isFullCopy())

2069 ++NI;

2070 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");

2071

2072 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();

2073 OOpI != OI->operands_end(); ++OOpI, ++NOpI)

2074 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),

2075 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));

2076 }

2077

2078 bool Failed = false;

2079 for (auto &OldAndNew : KOIs) {

2080 if (OldAndNew.first == OldAndNew.second)

2081 continue;

2083 errs() << "Modulo kernel validation error: [\n";

2084 errs() << " [golden] ";

2085 OldAndNew.first.print(errs());

2086 errs() << " ";

2087 OldAndNew.second.print(errs());

2088 errs() << "]\n";

2089 }

2090

2092 errs() << "Golden reference kernel:\n";

2094 errs() << "New kernel:\n";

2096 errs() << ScheduleDump;

2098 "Modulo kernel validation (-pipeliner-experimental-cg) failed");

2099 }

2100

2101

2102

2105}

2106

2109

2110

2112

2113 return NewMI;

2114}

2115

2116

2117

2118

2121 if (Exit->pred_size() == 1)

2122 return Exit;

2123

2126

2129 MF->insert(Loop->getIterator(), NewExit);

2130

2135 FBB = NewExit;

2136 else if (FBB == Loop)

2137 TBB = NewExit;

2138 else

2142 Loop->replaceSuccessor(Exit, NewExit);

2143 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());

2145

2146 Exit->replacePhiUsesWith(Loop, NewExit);

2147

2148 return NewExit;

2149}

2150

2151

2152

2153

2155 int RequiredTC,

2156 InstrMapTy &LastStage0Insts,

2160 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,

2161 LastStage0Insts);

2162

2164

2165

2169 } else {

2171 }

2172}

2173

2174

2175

2176

2177

2178

2179void ModuloScheduleExpanderMVE::generatePipelinedLoop() {

2180

2181

2182

2183

2184

2185

2186

2187

2188

2189

2190

2191

2192

2193

2194

2195

2196

2197

2198

2199

2200

2201

2202

2203

2204

2205

2206

2207

2208

2209

2210

2211

2212

2213

2214

2215

2216

2217

2218

2219

2220

2221

2222

2223

2224

2225

2226

2227

2228

2229

2230

2231

2232

2233

2234

2235

2236

2237

2238

2239

2240

2241

2242

2243

2244

2245

2246

2247

2248

2249

2250

2251

2252

2254 assert(LoopInfo && "Must be able to analyze loop!");

2255

2256 calcNumUnroll();

2257

2263

2269

2271

2274

2278

2281

2282 Prolog->addSuccessor(NewKernel);

2283

2286

2287 Epilog->addSuccessor(NewPreheader);

2288 Epilog->addSuccessor(NewExit);

2289

2290 InstrMapTy LastStage0Insts;

2291 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,

2292 LastStage0Insts, *Prolog, *NewPreheader);

2293

2294

2295

2297 generateProlog(PrologVRMap);

2298 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);

2299 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);

2300}

2301

2302

2303void ModuloScheduleExpanderMVE::updateInstrUse(

2307

2308

2309

2310

2311

2312

2314 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())

2315 continue;

2316 int DiffStage = 0;

2317 Register OrigReg = UseMO.getReg();

2319 if (!DefInst || DefInst->getParent() != OrigKernel)

2320 continue;

2321 unsigned InitReg = 0;

2322 unsigned DefReg = OrigReg;

2323 if (DefInst->isPHI()) {

2324 ++DiffStage;

2325 unsigned LoopReg;

2326 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);

2327

2328 DefReg = LoopReg;

2329 DefInst = MRI.getVRegDef(LoopReg);

2330 }

2331 unsigned DefStageNum = Schedule.getStage(DefInst);

2332 DiffStage += StageNum - DefStageNum;

2334 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))

2335

2336 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];

2337 else if (!PrevVRMap)

2338

2339

2340 NewReg = InitReg;

2341 else

2342

2343

2344

2345

2346 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];

2347

2349 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));

2350 if (NRC)

2351 UseMO.setReg(NewReg);

2352 else {

2353 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2354 BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),

2355 SplitReg)

2357 UseMO.setReg(SplitReg);

2358 }

2359 }

2360}

2361

2362

2363

2366 unsigned InitVal, LoopVal;

2368 if (LoopVal == Reg)

2369 return Φ

2370 }

2371 return nullptr;

2372}

2373

2374

2375void ModuloScheduleExpanderMVE::generatePhi(

2380 int StageNum = Schedule.getStage(OrigMI);

2381 bool UsePrologReg;

2382 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)

2383 UsePrologReg = true;

2384 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)

2385 UsePrologReg = false;

2386 else

2387 return;

2388

2389

2390

2391

2392

2393

2394

2395

2396

2397

2398

2399

2400

2401

2402

2403

2404

2405

2406

2407

2408

2409

2410

2411

2412

2413

2414

2415

2416

2417

2418

2419

2421 if (!DefMO.isReg() || DefMO.isDead())

2422 continue;

2423 Register OrigReg = DefMO.getReg();

2424 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);

2425 if (NewReg == KernelVRMap[UnrollNum].end())

2426 continue;

2428 if (UsePrologReg) {

2429 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;

2430 CorrespondReg = PrologVRMap[PrologNum][OrigReg];

2431 } else {

2433 if (!Phi)

2434 continue;

2435 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);

2436 }

2437

2439 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2441 TII->get(TargetOpcode::PHI), PhiReg)

2442 .addReg(NewReg->second)

2444 .addReg(CorrespondReg)

2446 PhiVRMap[UnrollNum][OrigReg] = PhiReg;

2447 }

2448}

2449

2452 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {

2453 if (Phi.getOperand(Idx).getReg() == OrigReg) {

2454 Phi.getOperand(Idx).setReg(NewReg);

2455 Phi.getOperand(Idx + 1).setMBB(NewMBB);

2456 return;

2457 }

2458 }

2459}

2460

2461

2462void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,

2467 E = MRI.use_end();

2468 I != E; ++I) {

2470 if (O.getParent()->getParent() != OrigKernel &&

2471 O.getParent()->getParent() != Prolog &&

2472 O.getParent()->getParent() != NewKernel &&

2473 O.getParent()->getParent() != Epilog)

2475 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())

2477 }

2478

2479

2480

2481 if (!UsesAfterLoop.empty()) {

2482 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));

2484 TII->get(TargetOpcode::PHI), PhiReg)

2489

2492

2495 }

2496

2497

2498

2499 if (!LoopPhis.empty()) {

2501 unsigned InitReg, LoopReg;

2502 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);

2503 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));

2505 TII->get(TargetOpcode::PHI), NewInit)

2510 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);

2511 }

2512 }

2513}

2514

2515void ModuloScheduleExpanderMVE::generateProlog(

2517 PrologVRMap.clear();

2520 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;

2521 ++PrologNum) {

2523 if (MI->isPHI())

2524 continue;

2525 int StageNum = Schedule.getStage(MI);

2526 if (StageNum > PrologNum)

2527 continue;

2529 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);

2530 NewMIMap[NewMI] = {PrologNum, StageNum};

2531 Prolog->push_back(NewMI);

2532 }

2533 }

2534

2535 for (auto I : NewMIMap) {

2537 int PrologNum = I.second.first;

2538 int StageNum = I.second.second;

2539 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);

2540 }

2541

2543 dbgs() << "prolog:\n";

2545 });

2546}

2547

2548void ModuloScheduleExpanderMVE::generateKernel(

2551 KernelVRMap.clear();

2552 KernelVRMap.resize(NumUnroll);

2554 PhiVRMap.resize(NumUnroll);

2557 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {

2559 if (MI->isPHI())

2560 continue;

2561 int StageNum = Schedule.getStage(MI);

2563 if (UnrollNum == NumUnroll - 1)

2564 LastStage0Insts[MI] = NewMI;

2565 updateInstrDef(NewMI, KernelVRMap[UnrollNum],

2566 (UnrollNum == NumUnroll - 1 && StageNum == 0));

2567 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);

2568 NewMIMap[NewMI] = {UnrollNum, StageNum};

2570 }

2571 }

2572

2573 for (auto I : NewMIMap) {

2575 int UnrollNum = I.second.first;

2576 int StageNum = I.second.second;

2577 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);

2578 }

2579

2580

2581 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,

2582 *Epilog);

2583

2585 dbgs() << "kernel:\n";

2586 NewKernel->dump();

2587 });

2588}

2589

2590void ModuloScheduleExpanderMVE::generateEpilog(

2593 EpilogVRMap.clear();

2596 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;

2597 ++EpilogNum) {

2599 if (MI->isPHI())

2600 continue;

2601 int StageNum = Schedule.getStage(MI);

2602 if (StageNum <= EpilogNum)

2603 continue;

2605 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);

2606 NewMIMap[NewMI] = {EpilogNum, StageNum};

2607 Epilog->push_back(NewMI);

2608 }

2609 }

2610

2611 for (auto I : NewMIMap) {

2613 int EpilogNum = I.second.first;

2614 int StageNum = I.second.second;

2615 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);

2616 }

2617

2618

2619

2620

2621

2622 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);

2623

2625 dbgs() << "epilog:\n";

2627 });

2628}

2629

2630

2631void ModuloScheduleExpanderMVE::calcNumUnroll() {

2633 NumUnroll = 1;

2636

2638 if (MI->isPHI())

2639 continue;

2640 int StageNum = Schedule.getStage(MI);

2643 continue;

2646 continue;

2647

2648 int NumUnrollLocal = 1;

2650 ++NumUnrollLocal;

2651

2652

2654 }

2655 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);

2656 if (Inst2Idx[MI] <= Inst2Idx[DefMI])

2657 --NumUnrollLocal;

2658 NumUnroll = std::max(NumUnroll, NumUnrollLocal);

2659 }

2660 }

2661 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");

2662}

2663

2664

2665

2666

2667void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,

2668 ValueMapTy &VRMap,

2669 bool LastDef) {

2672 continue;

2675 Register NewReg = MRI.createVirtualRegister(RC);

2677 VRMap[Reg] = NewReg;

2678 if (LastDef)

2679 mergeRegUsesAfterPipeline(Reg, NewReg);

2680 }

2681}

2682

2687

2689

2690 generatePipelinedLoop();

2691}

2692

2693

2695 if (!L.getExitBlock()) {

2696 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");

2697 return false;

2698 }

2699

2702

2703

2704

2707

2708

2712 if (Ref.getParent() != BB || Ref.isPHI()) {

2713 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "

2714 "referenced outside of the loop or by phi.\n");

2715 return false;

2716 }

2717

2718

2719

2720

2721 unsigned InitVal, LoopVal;

2723 if (Register(LoopVal).isVirtual() ||

2724 MRI.getVRegDef(LoopVal)->getParent() != BB) {

2726 dbgs() << "Can not apply MVE expander: A phi source value coming "

2727 "from the loop is not defined in the loop.\n");

2728 return false;

2729 }

2730 if (UsedByPhi.count(LoopVal)) {

2731 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "

2732 "loop is referenced by two or more phis.\n");

2733 return false;

2734 }

2735 UsedByPhi.insert(LoopVal);

2736 }

2737

2738 return true;

2739}

2740

2741

2742

2743

2744

2745

2746

2747

2748

2749

2750

2751

2752

2753

2754namespace {

2756public:

2757 static char ID;

2758

2761 }

2762

2763 bool runOnMachineFunction(MachineFunction &MF) override;

2765

2766 void getAnalysisUsage(AnalysisUsage &AU) const override {

2770 }

2771};

2772}

2773

2774char ModuloScheduleTest::ID = 0;

2775

2777 "Modulo Schedule test pass", false, false)

2782

2783bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {

2784 MachineLoopInfo &MLI = getAnalysis().getLI();

2785 for (auto *L : MLI) {

2786 if (L->getTopBlock() != L->getBottomBlock())

2787 continue;

2788 runOnLoop(MF, *L);

2789 return false;

2790 }

2791 return false;

2792}

2793

2795 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");

2796 std::pair<StringRef, StringRef> StageTokenAndValue =

2797 getToken(StageAndCycle.first, "-");

2798 std::pair<StringRef, StringRef> CycleTokenAndValue =

2799 getToken(StageAndCycle.second, "-");

2800 if (StageTokenAndValue.first != "Stage" ||

2801 CycleTokenAndValue.first != "_Cycle") {

2803 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");

2804 return;

2805 }

2806

2807 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);

2808 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);

2809

2810 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";

2811}

2812

2814 LiveIntervals &LIS = getAnalysis().getLIS();

2816 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";

2817

2819 std::vector<MachineInstr *> Instrs;

2821 if (MI.isTerminator())

2822 continue;

2823 Instrs.push_back(&MI);

2825 dbgs() << "Parsing post-instr symbol for " << MI;

2827 }

2828 }

2829

2831 std::move(Stage));

2834 MSE.expand();

2835 MSE.cleanup();

2836}

2837

2838

2839

2840

2841

2848 MI->setPostInstrSymbol(MF, Sym);

2849 }

2850}

unsigned const MachineRegisterInfo * MRI

MachineInstrBuilder & UseMI

MachineInstrBuilder MachineInstrBuilder & DefMI

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static const Function * getParent(const Value *V)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

std::optional< std::vector< StOtherPiece > > Other

DenseMap< Block *, BlockRelaxAux > Blocks

global merge Global merge function pass

const HexagonInstrInfo * TII

static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)

Return the register values for the operands of a Phi instruction.

unsigned const TargetRegisterInfo * TRI

This file provides utility analysis objects describing memory locations.

static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit)

Create a dedicated exit for Loop.

static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)

Remove the incoming block from the Phis in a basic block.

static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)

Return the Phi register value that comes the loop block.

static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)

Replace all uses of FromReg that appear outside the specified basic block with ToReg.

static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)

static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)

Return a phi if Reg is referenced by the phi.

static void parseSymbolString(StringRef S, int &Cycle, int &Stage)

static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))

static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)

Return true if the register has a use that occurs outside the specified loop.

static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)

Return the Phi register value that comes from the incoming block.

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB

const SmallVectorImpl< MachineOperand > & Cond

Remove Loads Into Fake Uses

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

This class represents an Operation in the Expression.

iterator find(const_arg_type_t< KeyT > Val)

DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

Implements a dense probed hash-table based set.

A possibly irreducible generalization of a Loop.

unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override

Remove the branching code at the end of the specific MBB.

bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override

Insert branch code into the end of the specified MachineBasicBlock.

bool hasInterval(Register Reg) const

void insertMBBInMaps(MachineBasicBlock *MBB)

void RemoveMachineInstrFromMaps(MachineInstr &MI)

LiveInterval & createEmptyInterval(Register Reg)

Interval creation.

static constexpr LocationSize beforeOrAfterPointer()

Any location before or after the base pointer (but still within the underlying object).

BlockT * getExitBlock() const

If getExitBlocks would return exactly one block, return that block.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

Represents a single loop in the control flow graph.

MCSymbol * getOrCreateSymbol(const Twine &Name)

Lookup the symbol inside with the specified Name.

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...

void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)

Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...

void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)

Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.

instr_iterator instr_begin()

void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)

Replace successor OLD with NEW and update probability info.

void transferSuccessors(MachineBasicBlock *FromMBB)

Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...

instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

iterator_range< iterator > phis()

Returns a range that iterates over the phis in the basic block.

reverse_instr_iterator instr_rbegin()

int getNumber() const

MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...

void push_back(MachineInstr *MI)

const BasicBlock * getBasicBlock() const

Return the LLVM basic block that this instance corresponded to originally.

succ_iterator succ_begin()

iterator getFirstTerminator()

Returns an iterator to the first terminator instruction of this basic block.

void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())

Add Succ as a successor of this MachineBasicBlock.

SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator

void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)

Remove successor from the successors list of this MachineBasicBlock.

iterator getFirstNonPHI()

Returns a pointer to the first instruction in this block that is not a PHINode instruction.

void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const

reverse_instr_iterator instr_rend()

Instructions::iterator instr_iterator

pred_iterator pred_begin()

void eraseFromParent()

This method unlinks 'this' from the containing function and deletes it.

instr_iterator instr_end()

const MachineFunction * getParent() const

Return the MachineFunction containing this basic block.

iterator_range< iterator > terminators()

instr_iterator getFirstInstrTerminator()

Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.

Instructions::reverse_iterator reverse_instr_iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)

getMachineMemOperand - Allocate a new MachineMemOperand.

MCContext & getContext() const

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

MachineInstr * CloneMachineInstr(const MachineInstr *Orig)

Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...

const MachineBasicBlock & front() const

MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)

CreateMachineBasicBlock - Allocate a new MachineBasicBlock.

void insert(iterator MBBI, MachineBasicBlock *MBB)

Register getReg(unsigned Idx) const

Get the register for the operand index.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

Representation of each machine instruction.

const MachineBasicBlock * getParent() const

unsigned getNumOperands() const

Retuns the total number of operands.

bool memoperands_empty() const

Return true if we don't have any memory operands which described the memory access done by this instr...

void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)

Assign this MachineInstr's memory reference descriptor list.

void dropMemRefs(MachineFunction &MF)

Clear this MachineInstr's memory reference descriptor list.

iterator_range< mop_iterator > operands()

iterator_range< mop_iterator > defs()

Returns a range over all explicit operands that are register definitions.

ArrayRef< MachineMemOperand * > memoperands() const

Access to memory operands of the instruction.

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

iterator_range< filtered_mop_iterator > all_defs()

Returns an iterator range over all operands that are (explicit or implicit) register defs.

MachineBasicBlock * getTopBlock()

Return the "top" block in the loop, which is the first block in the linear layout,...

A description of a memory reference used in the backend.

MachineOperand class - Representation of each machine instruction operand.

void setImm(int64_t immVal)

bool isReg() const

isReg - Tests if this is a MO_Register operand.

MachineBasicBlock * getMBB() const

void setReg(Register Reg)

Change the register this operand corresponds to.

MachineInstr * getParent()

getParent - Return the instruction that this operand belongs to.

void setMBB(MachineBasicBlock *MBB)

Register getReg() const

getReg - Returns the register number.

static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)

static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)

defusechain_iterator - This class provides iterator support for machine operands in the function that...

reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

const TargetRegisterClass * getRegClass(Register Reg) const

Return the register class of the specified virtual register.

MachineInstr * getVRegDef(Register Reg) const

getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...

use_instr_iterator use_instr_begin(Register RegNo) const

Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")

createVirtualRegister - Create and return a new virtual register in the function with the specified r...

static use_instr_iterator use_instr_end()

void setRegClass(Register Reg, const TargetRegisterClass *RC)

setRegClass - Set the register class of the specified virtual register.

iterator_range< use_instr_iterator > use_instructions(Register Reg) const

const TargetRegisterInfo * getTargetRegisterInfo() const

use_iterator use_begin(Register RegNo) const

static use_iterator use_end()

iterator_range< use_iterator > use_operands(Register Reg) const

void replaceRegWith(Register FromReg, Register ToReg)

replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.

MachineInstr * getUniqueVRegDef(Register Reg) const

getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...

static bool canApply(MachineLoop &L)

Check if ModuloScheduleExpanderMVE can be applied to L.

The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...

MachineBasicBlock * getRewrittenKernel()

Returns the newly rewritten kernel block, or nullptr if this was optimized away.

void cleanup()

Performs final cleanup after expansion.

void expand()

Performs the actual expansion.

void annotate()

Performs the annotation.

Represents a schedule for a single-block loop.

int getNumStages() const

Return the number of stages contained in this schedule, which is the largest stage index + 1.

MachineLoop * getLoop() const

Return the single-block loop being scheduled.

ArrayRef< MachineInstr * > getInstructions()

Return the rescheduled instructions in order.

void print(raw_ostream &OS)

int getCycle(MachineInstr *MI)

Return the cycle that MI is scheduled at, or -1.

void setStage(MachineInstr *MI, int MIStage)

Set the stage of a newly created instruction.

int getStage(MachineInstr *MI)

Return the stage that MI is scheduled in, or -1.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

std::deque< MachineBasicBlock * > PeeledBack

SmallVector< MachineInstr *, 4 > IllegalPhisToDelete

Illegal phis that need to be deleted once we re-link stages.

DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs

CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.

SmallVector< MachineBasicBlock *, 4 > Prologs

All prolog and epilog blocks.

MachineBasicBlock * peelKernel(LoopPeelDirection LPD)

Peels one iteration of the rewritten kernel (BB) in the specified direction.

ModuloSchedule & Schedule

std::deque< MachineBasicBlock * > PeeledFront

State passed from peelKernel to peelPrologAndEpilogs().

unsigned getStage(MachineInstr *MI)

Helper to get the stage of an instruction in the schedule.

void rewriteUsesOf(MachineInstr *MI)

Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).

SmallVector< MachineBasicBlock *, 4 > Epilogs

DenseMap< MachineBasicBlock *, BitVector > AvailableStages

For every block, the stages that are available.

DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs

Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)

All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...

MachineBasicBlock * Preheader

The original loop preheader.

void rewriteKernel()

Converts BB from the original loop body to the rewritten, pipelined steady-state.

DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration

When peeling the epilogue keep track of the distance between the phi nodes and the kernel.

DenseMap< MachineBasicBlock *, BitVector > LiveStages

For every block, the stages that are produced.

const TargetInstrInfo * TII

void filterInstructions(MachineBasicBlock *MB, int MinStage)

void peelPrologAndEpilogs()

Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.

MachineBasicBlock * BB

The original loop block that gets rewritten in-place.

void fixupBranches()

Insert branches between prologs, kernel and epilogs.

MachineBasicBlock * CreateLCSSAExitingBlock()

Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...

void validateAgainstModuloScheduleExpander()

Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...

Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)

Helper function to find the right canonical register for a phi instruction coming from a peeled out p...

MachineRegisterInfo & MRI

void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)

Wrapper class representing virtual and physical registers.

constexpr bool isValid() const

constexpr bool isVirtual() const

Return true if the specified register number is in the virtual register namespace.

constexpr bool isPhysical() const

Return true if the specified register number is in the physical register namespace.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

iterator erase(const_iterator CI)

void push_back(const T &Elt)

reverse_iterator rbegin()

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

TargetInstrInfo - Interface to description of machine instruction set.

virtual bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const

Reverses the branch condition of the specified condition list, returning false on success and true if...

virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const

Remove the branching code at the end of the specific MBB.

virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const

Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...

virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const

Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....

virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const

Return true if the instruction contains a base register and offset.

virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const

Insert branch code into the end of the specified MachineBasicBlock.

unsigned insertUnconditionalBranch(MachineBasicBlock &MBB, MachineBasicBlock *DestBB, const DebugLoc &DL, int *BytesAdded=nullptr) const

virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const

If the instruction is an increment of a constant value, return the amount.

bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const

Get the base operand and byte offset of an instruction that reads/writes memory.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

virtual const TargetRegisterInfo * getRegisterInfo() const

getRegisterInfo - If register information is available, return it.

virtual const TargetInstrInfo * getInstrInfo() const

Target - Wrapper for Target specific information.

A Use represents the edge between a Value definition and its users.

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

A raw_ostream that writes to an std::string.

A raw_ostream that writes to an SmallVector or SmallString.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Reg

All possible values of the reg field in the ModR/M byte.

initializer< Ty > init(const Ty &Val)

NodeAddr< PhiNode * > Phi

NodeAddr< DefNode * > Def

const_iterator end(StringRef path LLVM_LIFETIME_BOUND)

Get end iterator over path.

This is an optimization pass for GlobalISel generic memory operations.

MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)

Peels a single block loop.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

testing::Matcher< const detail::ErrorHolder & > Failed()

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

auto reverse(ContainerTy &&C)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

@ Ref

The access may reference the value stored in memory.

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

OutputIt copy(R &&Range, OutputIt Out)

void initializeModuloScheduleTestPass(PassRegistry &)

@ LPD_Back

Peel the last iteration of the loop.

@ LPD_Front

Peel the first iteration of the loop.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...