LLVM: lib/Target/ARM/ARMLowOverheadLoops.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

70

71using namespace llvm;

72

73#define DEBUG_TYPE "arm-low-overhead-loops"

74#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"

75

78 cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),

80

83 cl::desc("Disable omitting 'dls lr, lr' instructions"),

85

88 return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;

89}

90

92 return MI->findRegisterDefOperandIdx(ARM::VPR, nullptr) != -1;

93}

94

96 return MI.findRegisterUseOperandIdx(ARM::VPR, nullptr) != -1;

97}

98

103

109

111 if (MI.isDebugInstr())

112 return false;

114}

115

121

122namespace {

123

125

126 class PostOrderLoopTraversal {

127 MachineLoop &ML;

128 MachineLoopInfo &MLI;

129 SmallPtrSet<MachineBasicBlock*, 4> Visited;

130 SmallVector<MachineBasicBlock*, 4> Order;

131

132 public:

133 PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI)

134 : ML(ML), MLI(MLI) { }

135

136 const SmallVectorImpl<MachineBasicBlock*> &getOrder() const {

137 return Order;

138 }

139

140

141

142 void ProcessLoop() {

143 std::function<void(MachineBasicBlock *)> Search =

144 [this, &Search](MachineBasicBlock *MBB) -> void {

145 if (!Visited.insert(MBB).second)

146 return;

147

149 if (!ML.contains(Succ))

150 continue;

151 Search(Succ);

152 }

153 Order.push_back(MBB);

154 };

155

156

158 ML.getExitBlocks(ExitBlocks);

160

161

162 Search(ML.getHeader());

163

164

165 std::function<void(MachineBasicBlock*)> GetPredecessor =

166 [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void {

167 Order.push_back(MBB);

170 };

171

172 if (auto *Preheader = ML.getLoopPreheader())

173 GetPredecessor(Preheader);

174 else if (auto *Preheader = MLI.findLoopPreheader(&ML, true, true))

175 GetPredecessor(Preheader);

176 }

177 };

178

179 class VPTBlock {

180 SmallVector<MachineInstr *, 4> Insts;

181

182 public:

183 VPTBlock(MachineInstr *MI) { Insts.push_back(MI); }

184

185

186

187 bool hasUniformPredicate() { return getDivergent() == nullptr; }

188

189

190

191 MachineInstr *getDivergent() {

192 SmallVectorImpl<MachineInstr *> &Insts = getInsts();

193 for (unsigned i = 1; i < Insts.size(); ++i) {

194 MachineInstr *Next = Insts[i];

196 return Next;

197 }

198 return nullptr;

199 }

200

201 void insert(MachineInstr *MI) {

202 Insts.push_back(MI);

203

204 assert(Insts.size() <= 5 && "Too many instructions in VPT block!");

205 }

206

208

209 unsigned size() const { return Insts.size(); }

210 SmallVectorImpl<MachineInstr *> &getInsts() { return Insts; }

211 };

212

213

214

215

216

217

218 class VPTState {

219 friend struct LowOverheadLoop;

220

222 SetVector<MachineInstr *> CurrentPredicates;

223 std::map<MachineInstr *, SetVector<MachineInstr *>> PredicatedInsts;

224

226 assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))

227 && "Can't begin VPT without predicate");

228 Blocks.emplace_back(MI);

229

230

231

232 PredicatedInsts[MI] = CurrentPredicates;

233 }

234

235 void addInst(MachineInstr *MI) {

236 Blocks.back().insert(MI);

237 PredicatedInsts[MI] = CurrentPredicates;

238 }

239

240 void addPredicate(MachineInstr *MI) {

241 LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);

242 CurrentPredicates.insert(MI);

243 }

244

245 void resetPredicate(MachineInstr *MI) {

246 LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);

247 CurrentPredicates.clear();

248 CurrentPredicates.insert(MI);

249 }

250

251 public:

252

253 bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {

254 SetVector<MachineInstr *> &Predicates = PredicatedInsts[MI];

255 if (Exclusive && Predicates.size() != 1)

256 return false;

257

259 return false;

261 }

262

263

264 bool isEntryPredicatedOnVCTP(VPTBlock &Block, bool Exclusive = false) {

265 SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();

266 return isPredicatedOnVCTP(Insts.front(), Exclusive);

267 }

268

269

270

271

272 bool hasImplicitlyValidVPT(VPTBlock &Block, ReachingDefInfo &RDI) {

273 SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();

274 MachineInstr *VPT = Insts.front();

276 "Expected VPT block to begin with VPT/VPST");

277

278 if (VPT->getOpcode() == ARM::MVE_VPST)

279 return false;

280

281

282

283

285 all_of(Block.getInsts(), [](const MachineInstr *MI) {

286 return !MI->mayStore() && !MI->mayLoad() &&

287 !isHorizontalReduction(*MI) && !isVCTP(MI);

288 }))

289 return true;

290

291 auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {

293 return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);

294 };

295

296 auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) {

297 MachineOperand &MO = MI->getOperand(Idx);

299 return true;

300

301 SmallPtrSet<MachineInstr *, 2> Defs;

303 if (Defs.empty())

304 return true;

305

306 for (auto *Def : Defs)

308 return false;

309 return true;

310 };

311

312

313

314 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&

315 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&

316 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));

317 }

318

320

321

322

323 for (auto &Block : Blocks) {

324 if (isEntryPredicatedOnVCTP(Block, false) &&

326 return getVPTInstrPredicate(*MI) == ARMVCC::Else;

327 }))

328 continue;

329 if (hasImplicitlyValidVPT(Block, RDI))

330 continue;

331

332 SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();

333

334

336 "Expected VPT block to start with a VPST or VPT!");

337 if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&

339 return false;

340

341 for (auto *MI : Insts) {

342

344 return false;

345

347 continue;

348

349

350

351 if (!isPredicatedOnVCTP(MI)) {

353 return false;

354 }

355 }

356 }

357 return true;

358 }

359 };

360

361 struct LowOverheadLoop {

362

363 MachineLoop &ML;

364 MachineBasicBlock *Preheader = nullptr;

365 MachineLoopInfo &MLI;

366 ReachingDefInfo &RDI;

367 const TargetRegisterInfo &TRI;

368 const ARMBaseInstrInfo &TII;

369 MachineFunction *MF = nullptr;

371 MachineBasicBlock *StartInsertBB = nullptr;

372 MachineInstr *Start = nullptr;

373 MachineInstr *Dec = nullptr;

374 MachineInstr *End = nullptr;

375 MachineOperand TPNumElements;

376 SmallVector<MachineInstr *, 4> VCTPs;

377 SmallPtrSet<MachineInstr *, 4> ToRemove;

378 SmallPtrSet<MachineInstr *, 4> BlockMasksToRecompute;

379 SmallPtrSet<MachineInstr *, 4> DoubleWidthResultInstrs;

380 SmallPtrSet<MachineInstr *, 4> VMOVCopies;

381 bool Revert = false;

382 bool CannotTailPredicate = false;

383 VPTState VPTstate;

384

385 LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI, ReachingDefInfo &RDI,

386 const TargetRegisterInfo &TRI, const ARMBaseInstrInfo &TII)

387 : ML(ML), MLI(MLI), RDI(RDI), TRI(TRI), TII(TII),

388 TPNumElements(MachineOperand::CreateImm(0)) {

389 MF = ML.getHeader()->getParent();

390 if (auto *MBB = ML.getLoopPreheader())

391 Preheader = MBB;

392 else if (auto *MBB = MLI.findLoopPreheader(&ML, true, true))

393 Preheader = MBB;

394 }

395

396

397

398

399 bool ValidateMVEInst(MachineInstr *MI);

400

401 void AnalyseMVEInst(MachineInstr *MI) {

402 CannotTailPredicate = !ValidateMVEInst(MI);

403 }

404

405 bool IsTailPredicationLegal() const {

406

407

408 return !Revert && FoundAllComponents() && !VCTPs.empty() &&

409 !CannotTailPredicate && ML.getNumBlocks() == 1;

410 }

411

412

413

414 bool AddVCTP(MachineInstr *MI);

415

416

417

418

419 bool ValidateTailPredicate();

420

421

422

423 bool ValidateLiveOuts();

424

425

426

427 void Validate(ARMBasicBlockUtils *BBUtils);

428

429 bool FoundAllComponents() const {

430 return Start && Dec && End;

431 }

432

433 SmallVectorImpl &getVPTBlocks() { return VPTstate.Blocks; }

434

435

436

437 MachineOperand &getLoopStartOperand() {

438 if (IsTailPredicationLegal())

439 return TPNumElements;

440 return Start->getOperand(1);

441 }

442

443 unsigned getStartOpcode() const {

445 if (!IsTailPredicationLegal())

446 return IsDo ? ARM::t2DLS : ARM::t2WLS;

447

449 }

450

451 void dump() const {

452 if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;

453 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;

454 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;

455 if (!VCTPs.empty()) {

456 dbgs() << "ARM Loops: Found VCTP(s):\n";

457 for (auto *MI : VCTPs)

458 dbgs() << " - " << *MI;

459 }

460 if (!FoundAllComponents())

461 dbgs() << "ARM Loops: Not a low-overhead loop.\n";

462 else if (!(Start && Dec && End))

463 dbgs() << "ARM Loops: Failed to find all loop components.\n";

464 }

465 };

466

468 MachineFunction *MF = nullptr;

469 MachineLoopInfo *MLI = nullptr;

470 ReachingDefInfo *RDI = nullptr;

471 const ARMBaseInstrInfo *TII = nullptr;

472 MachineRegisterInfo *MRI = nullptr;

473 const TargetRegisterInfo *TRI = nullptr;

474 std::unique_ptr BBUtils = nullptr;

475

476 public:

477 static char ID;

478

479 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }

480

481 void getAnalysisUsage(AnalysisUsage &AU) const override {

483 AU.addRequired();

484 AU.addRequired();

486 }

487

488 bool runOnMachineFunction(MachineFunction &MF) override;

489

490 MachineFunctionProperties getRequiredProperties() const override {

491 return MachineFunctionProperties().setNoVRegs().setTracksLiveness();

492 }

493

494 StringRef getPassName() const override {

496 }

497

498 private:

499 bool ProcessLoop(MachineLoop *ML);

500

501 bool RevertNonLoops();

502

503 void RevertWhile(MachineInstr *MI) const;

504 void RevertDo(MachineInstr *MI) const;

505

507

508 void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;

509

510 void RevertLoopEndDec(MachineInstr *MI) const;

511

512 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);

513

514 MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);

515

516 void Expand(LowOverheadLoop &LoLoop);

517

518 void IterationCountDCE(LowOverheadLoop &LoLoop);

519 };

520}

521

522char ARMLowOverheadLoops::ID = 0;

523

525 false, false)

526

529

530

531

533

535 for (auto *Dead : Killed)

536 BasicBlocks.insert(Dead->getParent());

537

538

539 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;

540 for (auto *MBB : BasicBlocks) {

541 for (auto &IT : *MBB) {

542 if (IT.getOpcode() != ARM::t2IT)

543 continue;

545 ITBlocks[&IT]);

546 }

547 }

548

549

550

553 for (auto *Dead : Killed) {

555 Dead->findRegisterUseOperand(ARM::ITSTATE, nullptr)) {

558 auto &CurrentBlock = ITBlocks[IT];

559 CurrentBlock.erase(Dead);

560 if (CurrentBlock.empty())

562 else

564 }

565 }

566 if (!ModifiedITs.empty())

567 return false;

569 return true;

570 };

571

574 return false;

575

579 << " - can also remove:\n";

581 dbgs() << " - " << *Use);

582

584 RDI.collectKilledOperands(MI, Killed);

585 if (WontCorruptITs(Killed, RDI)) {

586 ToRemove.insert_range(Killed);

588 dbgs() << " - " << *Dead);

589 }

590 return true;

591 }

592 return false;

593}

594

595bool LowOverheadLoop::ValidateTailPredicate() {

596 if (!IsTailPredicationLegal()) {

598 dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";

599 dbgs() << "ARM Loops: Tail-predication is not valid.\n");

600 return false;

601 }

602

603 assert(!VCTPs.empty() && "VCTP instruction expected but is not set");

604 assert(ML.getBlocks().size() == 1 &&

605 "Shouldn't be processing a loop with more than one block");

606

608 LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");

609 return false;

610 }

611

612 if (!VPTstate.isValid(RDI)) {

613 LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");

614 return false;

615 }

616

617 if (!ValidateLiveOuts()) {

618 LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");

619 return false;

620 }

621

622

623

624

625

626 MachineInstr *VCTP = VCTPs.back();

627 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||

628 Start->getOpcode() == ARM::t2WhileLoopStartTP) {

629 TPNumElements = Start->getOperand(2);

630 StartInsertPt = Start;

631 StartInsertBB = Start->getParent();

632 } else {

634 MCRegister NumElements = TPNumElements.getReg().asMCReg();

635

636

637

638

640 LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");

641 return false;

642 }

643

644

645

646

647

648 if (StartInsertPt != StartInsertBB->end() &&

650 if (auto *ElemDef =

653 ElemDef->removeFromParent();

654 StartInsertBB->insert(StartInsertPt, ElemDef);

656 << "ARM Loops: Moved element count def: " << *ElemDef);

658 StartInsertPt->removeFromParent();

660 &*StartInsertPt);

661 LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);

662 } else {

663

664

665

666 MachineOperand Operand = ElemDef->getOperand(1);

671 TPNumElements = Operand;

672 NumElements = TPNumElements.getReg();

673 } else {

675 << "ARM Loops: Unable to move element count to loop "

676 << "start instruction.\n");

677 return false;

678 }

679 }

680 }

681 }

682

683

684

685

686 auto CannotProvideElements = [this](MachineBasicBlock *MBB,

687 MCRegister NumElements) {

689 return false;

690

692 return true;

693

694

696 return true;

697

698 return false;

699 };

700

701

702 MachineBasicBlock *MBB = Preheader;

703 while (MBB && MBB != StartInsertBB) {

704 if (CannotProvideElements(MBB, NumElements)) {

705 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");

706 return false;

707 }

709 }

710 }

711

712

713

714

715

716

717

718

719

720

721 if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {

722 LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");

723 return false;

724 }

725

726

727

728

729

730 unsigned VCTPVecSize = getVecSize(*VCTP);

731 for (MachineInstr *MI : DoubleWidthResultInstrs) {

733 if (InstrVecSize > VCTPVecSize) {

734 LLVM_DEBUG(dbgs() << "ARM Loops: Double width result larger than VCTP "

735 << "VecSize:\n" << *MI);

736 return false;

737 }

738 }

739

740

741

742

743

744 auto IsValidSub = [](MachineInstr *MI, int ExpectedVecWidth) {

746 };

747

749

750

751

752

755 SmallPtrSet<MachineInstr*, 2> ElementChain;

756 SmallPtrSet<MachineInstr*, 2> Ignore;

758

759 Ignore.insert_range(VCTPs);

760

761 if (TryRemove(Def, RDI, ElementChain, Ignore)) {

762 bool FoundSub = false;

763

764 for (auto *MI : ElementChain) {

766 continue;

767

769 if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {

770 LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"

771 " count: " << *MI);

772 return false;

773 }

774 FoundSub = true;

775 } else {

776 LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"

777 " count: " << *MI);

778 return false;

779 }

780 }

781 ToRemove.insert_range(ElementChain);

782 }

783 }

784

785

786

787

788 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||

789 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&

790 Preheader && !Preheader->empty() &&

796 }

797 }

798

799 return true;

800}

801

806

807

808

809

815

816

817

818

824

825

826

827

828

830

831

832

834 return true;

835

836 switch (MI.getOpcode()) {

837 default:

838 break;

839

840

841

842 case ARM::MVE_VMVN:

843 case ARM::MVE_VORN:

844

845 case ARM::MVE_VCLZs8:

846 case ARM::MVE_VCLZs16:

847 case ARM::MVE_VCLZs32:

848 return true;

849 }

850 return false;

851}

852

853

854

855

856

857

861 InstSet &FalseLanesZero) {

863 return false;

864

866

867

868 if (MI.mayLoad())

869 return isPredicated;

870

873 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&

874 Def->getOperand(1).getImm() == 0;

875 };

876

878 for (auto &MO : MI.operands()) {

880 continue;

882 continue;

883

885 if (PIdx != -1 && MO.getOperandNo() == PIdx + ARM::SUBOP_vpred_n_tp_reg)

886 continue;

887

888

889

890

891

893 RDI.getGlobalReachingDefs(&MI, MO.getReg(), Defs);

894 if (Defs.empty())

895 return false;

896 for (auto *Def : Defs) {

897 if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))

898 continue;

899 if (MO.isUse() && isPredicated)

900 continue;

901 return false;

902 }

903 }

904 LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);

905 return true;

906}

907

908bool LowOverheadLoop::ValidateLiveOuts() {

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930 const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID);

931 SetVector<MachineInstr *> FalseLanesUnknown;

933 SmallPtrSet<MachineInstr *, 4> Predicated;

934 MachineBasicBlock *Header = ML.getHeader();

935

936 LLVM_DEBUG(dbgs() << "ARM Loops: Validating Live outs\n");

937

938 for (auto &MI : *Header) {

940 continue;

941

943 continue;

944

946 bool retainsOrReduces =

948

949 if (isPredicated)

953 else if (MI.getNumDefs() == 0)

954 continue;

955 else if (!isPredicated && retainsOrReduces) {

956 LLVM_DEBUG(dbgs() << " Unpredicated instruction that retainsOrReduces: " << MI);

957 return false;

958 } else if (!isPredicated && MI.getOpcode() != ARM::MQPRCopy)

959 FalseLanesUnknown.insert(&MI);

960 }

961

963 dbgs() << " Predicated:\n";

965 dbgs() << " " << *I;

966 dbgs() << " FalseLanesZero:\n";

967 for (auto *I : FalseLanesZero)

968 dbgs() << " " << *I;

969 dbgs() << " FalseLanesUnknown:\n";

970 for (auto *I : FalseLanesUnknown)

971 dbgs() << " " << *I;

972 });

973

974 auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,

975 SmallPtrSetImpl<MachineInstr *> &Predicated) {

976 SmallPtrSet<MachineInstr *, 2> Uses;

978 for (auto *Use : Uses) {

980 return false;

981 }

982 return true;

983 };

984

985

986

987

988

989

990

991 SmallPtrSet<MachineInstr*, 2> NonPredicated;

992 for (auto *MI : reverse(FalseLanesUnknown)) {

993 for (auto &MO : MI->operands()) {

995 continue;

996 if (!HasPredicatedUsers(MI, MO, Predicated)) {

998 << TRI.getRegAsmName(MO.getReg()) << " at " << *MI);

1000 break;

1001 }

1002 }

1003

1006 }

1007

1008 SmallPtrSet<MachineInstr *, 2> LiveOutMIs;

1010 ML.getExitBlocks(ExitBlocks);

1011 assert(ML.getNumBlocks() == 1 && "Expected single block loop!");

1012 assert(ExitBlocks.size() == 1 && "Expected a single exit block");

1013 MachineBasicBlock *ExitBB = ExitBlocks.front();

1014 for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {

1015

1016

1017 if (RegMask.PhysReg == ARM::VPR) {

1018 LLVM_DEBUG(dbgs() << " VPR is live in to the exit block.");

1019 return false;

1020 }

1021

1022

1023 if (QPRs->contains(RegMask.PhysReg))

1026 }

1027

1028

1029

1030

1031

1032

1033

1034

1036 while (!Worklist.empty()) {

1037 MachineInstr *MI = Worklist.pop_back_val();

1038 if (MI->getOpcode() == ARM::MQPRCopy) {

1039 LLVM_DEBUG(dbgs() << " Must generate copy as VMOV: " << *MI);

1041 MachineInstr *CopySrc =

1043 if (CopySrc)

1044 Worklist.push_back(CopySrc);

1045 } else if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {

1046 LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);

1047 VMOVCopies.clear();

1048 return false;

1050

1051

1053 if (InactiveIdx != -1) {

1055 MI, MI->getOperand(InactiveIdx).getReg());

1056 if (FalseSrc) {

1058 << " Must check source of false lanes for: " << *MI);

1059 Worklist.push_back(FalseSrc);

1060 }

1061 }

1062 }

1063 }

1064

1065 return true;

1066}

1067

1068void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {

1069 if (Revert)

1070 return;

1071

1072

1073

1074 auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End,

1075 ARMBasicBlockUtils *BBUtils, MachineLoop &ML) {

1076 MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd

1077 ? End->getOperand(1).getMBB()

1078 : End->getOperand(2).getMBB();

1079

1080

1081 if (TgtBB != ML.getHeader()) {

1082 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");

1083 return false;

1084 }

1085

1086

1087

1088 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||

1089 !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {

1090 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");

1091 return false;

1092 }

1093

1096 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||

1097 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {

1098 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");

1099 return false;

1100 }

1101 }

1102 return true;

1103 };

1104

1106 StartInsertBB = Start->getParent();

1107 LLVM_DEBUG(dbgs() << "ARM Loops: Will insert LoopStart at "

1108 << *StartInsertPt);

1109

1110 Revert = !ValidateRanges(Start, End, BBUtils, ML);

1111 CannotTailPredicate = !ValidateTailPredicate();

1112}

1113

1114bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {

1116 if (VCTPs.empty()) {

1118 return true;

1119 }

1120

1121

1122

1123 MachineInstr *Prev = VCTPs.back();

1126 LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "

1127 "definition from the main VCTP");

1128 return false;

1129 }

1131 return true;

1132}

1133

1135

1137 const PseudoSourceValue *PseudoValue = Operand->getPseudoValue();

1140 return FS->getFrameIndex();

1141 }

1142 }

1143 return -1;

1144 };

1145

1146 auto IsStackOp = [GetFrameIndex](MachineInstr *I) {

1147 switch (I->getOpcode()) {

1148 case ARM::MVE_VSTRWU32:

1149 case ARM::MVE_VLDRWU32: {

1150 return I->getOperand(1).getReg() == ARM::SP &&

1151 I->memoperands().size() == 1 &&

1152 GetFrameIndex(I->memoperands().front()) >= 0;

1153 }

1154 default:

1155 return false;

1156 }

1157 };

1158

1159

1160

1161 if (MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(MI))

1162 return false;

1163

1164

1165

1166

1167 if (MI->memoperands().size() == 0)

1168 return false;

1169 int FI = GetFrameIndex(MI->memoperands().front());

1170

1171 auto &FrameInfo = MI->getParent()->getParent()->getFrameInfo();

1172 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))

1173 return false;

1174

1176 ML->getExitBlocks(Frontier);

1178 unsigned Idx = 0;

1179 while (Idx < Frontier.size()) {

1181 bool LookAtSuccessors = true;

1182 for (auto &I : *BB) {

1183 if (!IsStackOp(&I) || I.memoperands().size() == 0)

1184 continue;

1185 if (GetFrameIndex(I.memoperands().front()) != FI)

1186 continue;

1187

1188

1189 if (I.getOpcode() == ARM::MVE_VSTRWU32) {

1190 LookAtSuccessors = false;

1191 break;

1192 }

1193

1194

1195 if (I.getOpcode() == ARM::MVE_VLDRWU32)

1196 return false;

1197 }

1198

1199 if (LookAtSuccessors) {

1200 for (auto *Succ : BB->successors()) {

1203 }

1204 }

1206 Idx++;

1207 }

1208

1209 return true;

1210}

1211

1212bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {

1213 if (CannotTailPredicate)

1214 return false;

1215

1217 return true;

1218

1219 if (MI->getOpcode() == ARM::MVE_VPSEL ||

1220 MI->getOpcode() == ARM::MVE_VPNOT) {

1221

1222

1223

1224

1225

1226

1227

1228

1229 return false;

1230 }

1231

1232

1234 return false;

1235

1236

1237

1238 const MCInstrDesc &MCID = MI->getDesc();

1239 bool IsUse = false;

1240 unsigned LastOpIdx = MI->getNumOperands() - 1;

1242 const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());

1244 continue;

1245

1247 VPTstate.addInst(MI);

1248 IsUse = true;

1249 } else if (MI->getOpcode() != ARM::MVE_VPST) {

1250 LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);

1251 return false;

1252 }

1253 }

1254

1255

1256

1257

1258 bool RequiresExplicitPredication =

1260 if (isDomainMVE(MI) && RequiresExplicitPredication) {

1261 if (MI->getOpcode() == ARM::MQPRCopy)

1262 return true;

1264 DoubleWidthResultInstrs.insert(MI);

1265 return true;

1266 }

1267

1269 << "ARM Loops: Can't tail predicate: " << *MI);

1270 return IsUse;

1271 }

1272

1273

1274

1276 return IsUse;

1277

1278

1279

1281

1282

1284 VPTstate.resetPredicate(MI);

1285 else

1286 VPTstate.addPredicate(MI);

1287 }

1288

1289

1290

1292 VPTstate.CreateVPTBlock(MI);

1293

1294 return true;

1295}

1296

1297bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {

1298 const ARMSubtarget &ST = mf.getSubtarget();

1299 if (ST.hasLOB())

1300 return false;

1301

1302 MF = &mf;

1304

1305 MLI = &getAnalysis().getLI();

1306 RDI = &getAnalysis().getRDI();

1309 TII = ST.getInstrInfo();

1310 TRI = ST.getRegisterInfo();

1311 BBUtils = std::make_unique(*MF);

1312 BBUtils->computeAllBlockSizes();

1313 BBUtils->adjustBBOffsetsAfter(&MF->front());

1314

1316 for (auto *ML : *MLI) {

1317 if (ML->isOutermost())

1319 }

1320 Changed |= RevertNonLoops();

1322}

1323

1324bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {

1326

1327

1328 for (MachineLoop *L : *ML)

1329 Changed |= ProcessLoop(L);

1330

1332 dbgs() << "ARM Loops: Processing loop containing:\n";

1333 if (auto *Preheader = ML->getLoopPreheader())

1335 else if (auto *Preheader = MLI->findLoopPreheader(ML, true, true))

1337 for (auto *MBB : ML->getBlocks())

1339 });

1340

1341

1342

1343 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =

1344 [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {

1345 for (auto &MI : *MBB) {

1347 return &MI;

1348 }

1351 return nullptr;

1352 };

1353

1354 LowOverheadLoop LoLoop(*ML, *MLI, *RDI, *TRI, *TII);

1355

1356

1357

1358 if (LoLoop.Preheader)

1359 LoLoop.Start = SearchForStart(LoLoop.Preheader);

1360 else

1362

1363

1364

1365

1366 for (auto *MBB : reverse(ML->getBlocks())) {

1367 for (auto &MI : *MBB) {

1368 if (MI.isDebugValue())

1369 continue;

1370 else if (MI.getOpcode() == ARM::t2LoopDec)

1371 LoLoop.Dec = &MI;

1372 else if (MI.getOpcode() == ARM::t2LoopEnd)

1373 LoLoop.End = &MI;

1374 else if (MI.getOpcode() == ARM::t2LoopEndDec)

1375 LoLoop.End = LoLoop.Dec = &MI;

1377 LoLoop.Start = &MI;

1378 else if (MI.getDesc().isCall()) {

1379

1380

1381

1382 LoLoop.Revert = true;

1384 } else {

1385

1386

1387 LoLoop.AnalyseMVEInst(&MI);

1388 }

1389 }

1390 }

1391

1393 if (!LoLoop.FoundAllComponents()) {

1394 LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n");

1396 }

1397

1398 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&

1399 "Expected t2WhileLoopStart to be removed before regalloc!");

1400

1401

1402

1403

1404 if (LoLoop.Dec != LoLoop.End) {

1405 SmallPtrSet<MachineInstr *, 2> Uses;

1407 if (Uses.size() > 1 || Uses.count(LoLoop.End)) {

1408 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");

1409 LoLoop.Revert = true;

1410 }

1411 }

1412 LoLoop.Validate(BBUtils.get());

1413 Expand(LoLoop);

1414 return true;

1415}

1416

1417

1418

1419

1420

1421void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {

1422 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);

1424 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?

1425 ARM::tBcc : ARM::t2Bcc;

1426

1428}

1429

1430void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {

1431 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);

1433}

1434

1435bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {

1436 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);

1437 MachineBasicBlock *MBB = MI->getParent();

1438 SmallPtrSet<MachineInstr*, 1> Ignore;

1440 if (I->getOpcode() == ARM::t2LoopEnd) {

1442 break;

1443 }

1444 }

1445

1446

1447 bool SetFlags =

1449

1451 return SetFlags;

1452}

1453

1454

1455void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {

1456 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);

1457

1458 MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();

1459 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?

1460 ARM::tBcc : ARM::t2Bcc;

1461

1463}

1464

1465

1466void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {

1467 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);

1468 assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");

1469 MachineBasicBlock *MBB = MI->getParent();

1470

1471 MachineInstrBuilder MIB =

1473 MIB.addDef(ARM::LR);

1474 MIB.add(MI->getOperand(1));

1477 MIB.addReg(ARM::NoRegister);

1478 MIB.addReg(ARM::CPSR);

1480

1481 MachineBasicBlock *DestBB = MI->getOperand(2).getMBB();

1482 unsigned BrOpc =

1483 BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;

1484

1485

1487 MIB.add(MI->getOperand(2));

1489 MIB.addReg(ARM::CPSR);

1490

1491 MI->eraseFromParent();

1492}

1493

1494

1495

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509

1510

1511

1512

1513

1514

1515

1516

1517

1518void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {

1519 if (!LoLoop.IsTailPredicationLegal())

1520 return;

1521

1522 LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");

1523

1525 if (!Def) {

1526 LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");

1527 return;

1528 }

1529

1530

1531 SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,

1532 LoLoop.End };

1533 if (!TryRemove(Def, *RDI, LoLoop.ToRemove, Killed))

1534 LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");

1535}

1536

1537MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {

1538 LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");

1539

1540

1541 IterationCountDCE(LoLoop);

1542

1544 MachineInstr *Start = LoLoop.Start;

1545 MachineBasicBlock *MBB = LoLoop.StartInsertBB;

1546 unsigned Opc = LoLoop.getStartOpcode();

1547 MachineOperand &Count = LoLoop.getLoopStartOperand();

1548

1549

1550 MachineInstr* NewStart;

1552 Count.getReg() == ARM::LR) {

1553 LLVM_DEBUG(dbgs() << "ARM Loops: Didn't insert start: DLS lr, lr");

1554 NewStart = nullptr;

1555 } else {

1556 MachineInstrBuilder MIB =

1558

1559 MIB.addDef(ARM::LR);

1563

1564 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);

1565 NewStart = &*MIB;

1566 }

1567

1568 LoLoop.ToRemove.insert(Start);

1569 return NewStart;

1570}

1571

1572void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {

1573 auto RemovePredicate = [](MachineInstr *MI) {

1574 if (MI->isDebugInstr())

1575 return;

1576 LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);

1578 assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");

1580 "Expected Then predicate!");

1582 MI->getOperand(PIdx + 1).setReg(0);

1583 };

1584

1585 for (auto &Block : LoLoop.getVPTBlocks()) {

1586 SmallVectorImpl<MachineInstr *> &Insts = Block.getInsts();

1587

1588 auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {

1589 assert(TheVCMP && "Replacing a removed or non-existent VCMP");

1590

1591 MachineInstrBuilder MIB =

1592 BuildMI(*At->getParent(), At, At->getDebugLoc(),

1595

1597

1599

1601 LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);

1602 LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());

1603 LoLoop.ToRemove.insert(TheVCMP);

1604 TheVCMP = nullptr;

1605 };

1606

1607 if (LoLoop.VPTstate.isEntryPredicatedOnVCTP(Block, true)) {

1608 MachineInstr *VPST = Insts.front();

1609 if (Block.hasUniformPredicate()) {

1610

1611

1612

1613

1614 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);

1615 for (unsigned i = 1; i < Insts.size(); ++i)

1616 RemovePredicate(Insts[i]);

1617 } else {

1618

1619

1620

1621

1622

1623

1624

1625 MachineInstr *Divergent = Block.getDivergent();

1626 MachineBasicBlock *MBB = Divergent->getParent();

1628 while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())

1629 ++DivergentNext;

1630

1631 bool DivergentNextIsPredicated =

1632 DivergentNext != MBB->end() &&

1634

1636 I != E; ++I)

1637 RemovePredicate(&*I);

1638

1639

1640

1641 MachineInstr *VCMP =

1643

1644 if (DivergentNextIsPredicated) {

1645

1646

1647

1648 if (!VCMP) {

1649

1650

1651 MachineInstrBuilder MIB =

1655 LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);

1656 LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());

1657 } else {

1658

1659

1660 ReplaceVCMPWithVPT(VCMP, VCMP);

1661 }

1662 }

1663 }

1664 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);

1665 LoLoop.ToRemove.insert(VPST);

1666 } else if (Block.containsVCTP()) {

1667

1668

1669 MachineInstr *VPST = Insts.front();

1670 if (Block.size() == 2) {

1672 "Found a VPST in an otherwise empty vpt block");

1673 LoLoop.ToRemove.insert(VPST);

1674 } else

1675 LoLoop.BlockMasksToRecompute.insert(VPST);

1676 } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {

1677

1678

1679

1680 MachineInstr *VPST = Insts.front();

1683 "The instruction after a VPST must be predicated");

1687 !LoLoop.ToRemove.contains(VprDef)) {

1688 MachineInstr *VCMP = VprDef;

1689

1690

1691

1692

1693

1698 ReplaceVCMPWithVPT(VCMP, VPST);

1699 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);

1700 LoLoop.ToRemove.insert(VPST);

1701 }

1702 }

1703 }

1704 }

1705

1706 LoLoop.ToRemove.insert_range(LoLoop.VCTPs);

1707}

1708

1709void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {

1710

1711

1712 auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) {

1713 MachineInstr *End = LoLoop.End;

1715 unsigned Opc = LoLoop.IsTailPredicationLegal() ?

1716 ARM::MVE_LETP : ARM::t2LEUpdate;

1719 MIB.addDef(ARM::LR);

1720 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;

1723 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);

1724 LoLoop.ToRemove.insert(LoLoop.Dec);

1725 LoLoop.ToRemove.insert(End);

1726 return &*MIB;

1727 };

1728

1729

1730

1731

1732

1733

1734 auto RemoveDeadBranch = [](MachineInstr *I) {

1735 MachineBasicBlock *BB = I->getParent();

1737 if (Terminator->isUnconditionalBranch() && I != Terminator) {

1738 MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();

1740 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);

1742 }

1743 }

1744 };

1745

1746

1747

1748 auto ExpandVMOVCopies = [this](SmallPtrSet<MachineInstr *, 4> &VMOVCopies) {

1749 for (auto *MI : VMOVCopies) {

1751 assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");

1752 MachineBasicBlock *MBB = MI->getParent();

1753 Register Dst = MI->getOperand(0).getReg();

1754 Register Src = MI->getOperand(1).getReg();

1756 ARM::D0 + (Dst - ARM::Q0) * 2)

1757 .addReg(ARM::D0 + (Src - ARM::Q0) * 2)

1759 (void)MIB1;

1762 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)

1763 .addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)

1766 (void)MIB2;

1767 MI->eraseFromParent();

1768 }

1769 };

1770

1771 if (LoLoop.Revert) {

1773 RevertWhile(LoLoop.Start);

1774 else

1775 RevertDo(LoLoop.Start);

1776 if (LoLoop.Dec == LoLoop.End)

1777 RevertLoopEndDec(LoLoop.End);

1778 else

1780 } else {

1781 ExpandVMOVCopies(LoLoop.VMOVCopies);

1782 LoLoop.Start = ExpandLoopStart(LoLoop);

1783 if (LoLoop.Start)

1784 RemoveDeadBranch(LoLoop.Start);

1785 LoLoop.End = ExpandLoopEnd(LoLoop);

1786 RemoveDeadBranch(LoLoop.End);

1787 if (LoLoop.IsTailPredicationLegal())

1788 ConvertVPTBlocks(LoLoop);

1789 for (auto *I : LoLoop.ToRemove) {

1791 I->eraseFromParent();

1792 }

1793 for (auto *I : LoLoop.BlockMasksToRecompute) {

1794 LLVM_DEBUG(dbgs() << "ARM Loops: Recomputing VPT/VPST Block Mask: " << *I);

1797 }

1798 }

1799

1800 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);

1801 DFS.ProcessLoop();

1802 const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();

1804

1805 for (auto *MBB : reverse(PostOrder))

1807

1808

1810}

1811

1812bool ARMLowOverheadLoops::RevertNonLoops() {

1813 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");

1815

1816 for (auto &MBB : *MF) {

1817 SmallVector<MachineInstr*, 4> Starts;

1818 SmallVector<MachineInstr*, 4> Decs;

1819 SmallVector<MachineInstr*, 4> Ends;

1820 SmallVector<MachineInstr *, 4> EndDecs;

1821 SmallVector<MachineInstr *, 4> MQPRCopies;

1822

1823 for (auto &I : MBB) {

1826 else if (I.getOpcode() == ARM::t2LoopDec)

1828 else if (I.getOpcode() == ARM::t2LoopEnd)

1830 else if (I.getOpcode() == ARM::t2LoopEndDec)

1832 else if (I.getOpcode() == ARM::MQPRCopy)

1834 }

1835

1837 MQPRCopies.empty())

1838 continue;

1839

1841

1842 for (auto *Start : Starts) {

1844 RevertWhile(Start);

1845 else

1846 RevertDo(Start);

1847 }

1848 for (auto *Dec : Decs)

1850

1851 for (auto *End : Ends)

1853 for (auto *End : EndDecs)

1854 RevertLoopEndDec(End);

1855 for (auto *MI : MQPRCopies) {

1857 assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");

1858 MachineBasicBlock *MBB = MI->getParent();

1860 MI->getOperand(0).getReg())

1861 .add(MI->getOperand(1))

1862 .add(MI->getOperand(1));

1864 MI->eraseFromParent();

1865 }

1866 }

1868}

1869

1871 return new ARMLowOverheadLoops();

1872}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static int getVecSize(const AMDGPULibFunc &FInfo)

static bool isDomainMVE(MachineInstr *MI)

Definition ARMLowOverheadLoops.cpp:99

static bool isVectorPredicated(MachineInstr *MI)

Definition ARMLowOverheadLoops.cpp:86

ReachingDefInfo & RDI

Definition ARMLowOverheadLoops.cpp:527

static bool canGenerateNonZeros(const MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:829

static bool isHorizontalReduction(const MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:116

static bool producesDoubleWidthResult(const MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:819

static bool hasVPRUse(MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:95

static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)

Definition ARMLowOverheadLoops.cpp:802

static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)

Definition ARMLowOverheadLoops.cpp:1134

static bool isVectorPredicate(MachineInstr *MI)

Definition ARMLowOverheadLoops.cpp:91

static bool retainsPreviousHalfElement(const MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:810

static bool shouldInspect(MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:110

static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefInfo &RDI, InstSet &FalseLanesZero)

Definition ARMLowOverheadLoops.cpp:858

static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))

ReachingDefInfo InstSet InstSet & Ignore

Definition ARMLowOverheadLoops.cpp:528

if(!RDI.isSafeToRemove(MI, Uses, Ignore)) return false

static int getVecSize(const MachineInstr &MI)

Definition ARMLowOverheadLoops.cpp:104

#define ARM_LOW_OVERHEAD_LOOPS_NAME

Definition ARMLowOverheadLoops.cpp:74

static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))

ReachingDefInfo InstSet & ToRemove

Definition ARMLowOverheadLoops.cpp:527

static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))

static const Function * getParent(const Value *V)

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

This file implements the LivePhysRegs utility for tracking liveness of physical registers.

static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

Remove Loads Into Fake Uses

static bool isValid(const char C)

Returns true if C is a valid mangled character: <0-9a-zA-Z_>.

This file implements a set that has insertion order iteration characteristics.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

FunctionPass class - This class is used to implement most global optimizations.

Describe properties that are true of each instruction in the target description file.

ArrayRef< MCOperandInfo > operands() const

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

static MCRegister from(unsigned Val)

Check the provided unsigned value is a valid MCRegister.

unsigned pred_size() const

LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

iterator_range< livein_iterator > liveins() const

MachineInstr & instr_back()

pred_iterator pred_begin()

LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...

iterator_range< succ_iterator > successors()

iterator insertAfter(iterator I, MachineInstr *MI)

Insert MI into the instruction list after I.

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineFunctionProperties & getProperties() const

Get the function properties.

const MachineBasicBlock & front() const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

A description of a memory reference used in the backend.

MachineOperand class - Representation of each machine instruction operand.

LLVM_ABI unsigned getOperandNo() const

Returns the index of this operand in the instruction that it belongs to.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const

Returns true if this operand is identical to the specified operand except for liveness related flags ...

LLVM_ABI void setIsDef(bool Val=true)

Change a def to a use, or a use to a def.

Special value supplied for machine level alias analysis.

This class provides the reaching def analysis.

MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, Register Reg) const

If a single MachineInstr creates the reaching definition, then return it.

bool isReachingDefLiveOut(MachineInstr *MI, Register Reg) const

Return whether the reaching def for MI also is live out of its parent block.

bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const

Return whether From can be moved forwards to just before To.

void getReachingLocalUses(MachineInstr *MI, Register Reg, InstSet &Uses) const

Provides the uses, in the same block as MI, of register that MI defines.

bool hasLocalDefBefore(MachineInstr *MI, Register Reg) const

Provide whether the register has been defined in the same basic block as, and before,...

void reset()

Re-run the analysis.

void getGlobalUses(MachineInstr *MI, Register Reg, InstSet &Uses) const

Collect the users of the value stored in Reg, which is defined by MI.

MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const

If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.

bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const

Return whether From can be moved backwards to just after To.

bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, Register Reg) const

Return whether A and B use the same def of Reg.

void getGlobalReachingDefs(MachineInstr *MI, Register Reg, InstSet &Defs) const

Collect all possible definitions of the value stored in Reg, which is used by MI.

MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, Register Reg) const

Return the local MI that produces the live out value for Reg, or nullptr for a non-live out or non-lo...

bool isSafeToDefRegAt(MachineInstr *MI, Register Reg) const

Return whether a MachineInstr could be inserted at MI and safely define the given register without af...

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

size_type size() const

Determine the number of elements in the SetVector.

bool contains(const_arg_type key) const

Check if the SetVector contains the given key.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

void insert_range(Range &&R)

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

bool contains(Register Reg) const

Return true if the specified register is included in this register class.

A Use represents the edge between a Value definition and its users.

@ ValidForTailPredication

@ RetainsPreviousHalfElement

bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)

bool isVpred(OperandType op)

initializer< Ty > init(const Ty &Val)

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

static bool isDoLoopStart(const MachineInstr &MI)

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

int findVPTInactiveOperandIdx(const MachineInstr &MI)

int findFirstVPTPredOperandIdx(const MachineInstr &MI)

ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

static bool isVCTP(const MachineInstr *MI)

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

constexpr from_range_t from_range

static bool isVPTOpcode(int Opc)

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

static unsigned getTailPredVectorWidth(unsigned Opcode)

static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)

Get the operands corresponding to the given Pred value.

FunctionPass * createARMLowOverheadLoopsPass()

Definition ARMLowOverheadLoops.cpp:1870

static bool isMovRegOpcode(int Opc)

static bool isSubImmOpcode(int Opc)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

FunctionAddr VTableAddr Count

static bool isLoopStart(const MachineInstr &MI)

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)

void recomputeLivenessFlags(MachineBasicBlock &MBB)

Recomputes dead and kill flags in MBB.

static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)

void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)

FunctionAddr VTableAddr Next

DWARFExpression::Operation Op

void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)

void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)

MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

static bool isWhileLoopStart(const MachineInstr &MI)

static unsigned VCMPOpcodeToVPT(unsigned Opcode)

void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)

int getAddSubImmediate(MachineInstr &MI)

void recomputeVPTBlockMask(MachineInstr &Instr)

void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)

Convenience function for recomputing live-in's for a set of MBBs until the computation converges.

LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.