LLVM: lib/Target/ARM/ARMLowOverheadLoops.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

70

71using namespace llvm;

72

73#define DEBUG_TYPE "arm-low-overhead-loops"

74#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"

75

78 cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"),

80

83 cl::desc("Disable omitting 'dls lr, lr' instructions"),

85

88 return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR;

89}

90

92 return MI->findRegisterDefOperandIdx(ARM::VPR, nullptr) != -1;

93}

94

96 return MI.findRegisterUseOperandIdx(ARM::VPR, nullptr) != -1;

97}

98

102}

103

108}

109

111 if (MI.isDebugInstr())

112 return false;

114}

115

120}

121

122namespace {

123

125

126 class PostOrderLoopTraversal {

131

132 public:

134 : ML(ML), MLI(MLI) { }

135

137 return Order;

138 }

139

140

141

142 void ProcessLoop() {

146 return;

147

149 if (ML.contains(Succ))

150 continue;

151 Search(Succ);

152 }

154 };

155

156

158 ML.getExitBlocks(ExitBlocks);

160

161

162 Search(ML.getHeader());

163

164

170 };

171

172 if (auto *Preheader = ML.getLoopPreheader())

173 GetPredecessor(Preheader);

174 else if (auto *Preheader = MLI.findLoopPreheader(&ML, true, true))

175 GetPredecessor(Preheader);

176 }

177 };

178

179 class VPTBlock {

181

182 public:

184

185

186

187 bool hasUniformPredicate() { return getDivergent() == nullptr; }

188

189

190

193 for (unsigned i = 1; i < Insts.size(); ++i) {

196 return Next;

197 }

198 return nullptr;

199 }

200

203

204 assert(Insts.size() <= 5 && "Too many instructions in VPT block!");

205 }

206

208

209 unsigned size() const { return Insts.size(); }

211 };

212

213

214

215

216

217

218 class VPTState {

219 friend struct LowOverheadLoop;

220

223 std::map<MachineInstr *, SetVector<MachineInstr *>> PredicatedInsts;

224

226 assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR))

227 && "Can't begin VPT without predicate");

229

230

231

232 PredicatedInsts[MI] = CurrentPredicates;

233 }

234

237 PredicatedInsts[MI] = CurrentPredicates;

238 }

239

241 LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI);

242 CurrentPredicates.insert(MI);

243 }

244

246 LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI);

247 CurrentPredicates.clear();

248 CurrentPredicates.insert(MI);

249 }

250

251 public:

252

253 bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) {

255 if (Exclusive && Predicates.size() != 1)

256 return false;

257

259 return false;

261 }

262

263

264 bool isEntryPredicatedOnVCTP(VPTBlock &Block, bool Exclusive = false) {

266 return isPredicatedOnVCTP(Insts.front(), Exclusive);

267 }

268

269

270

271

276 "Expected VPT block to begin with VPT/VPST");

277

278 if (VPT->getOpcode() == ARM::MVE_VPST)

279 return false;

280

281

282

283

286 return !MI->mayStore() && !MI->mayLoad() &&

287 !isHorizontalReduction(*MI) && !isVCTP(MI);

288 }))

289 return true;

290

293 return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);

294 };

295

299 return true;

300

303 if (Defs.empty())

304 return true;

305

306 for (auto *Def : Defs)

308 return false;

309 return true;

310 };

311

312

313

314 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&

315 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&

316 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));

317 }

318

320

321

322

324 if (isEntryPredicatedOnVCTP(Block, false) &&

326 return getVPTInstrPredicate(*MI) == ARMVCC::Else;

327 }))

328 continue;

329 if (hasImplicitlyValidVPT(Block, RDA))

330 continue;

331

333

334

336 "Expected VPT block to start with a VPST or VPT!");

337 if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&

339 return false;

340

341 for (auto *MI : Insts) {

342

344 return false;

345

347 continue;

348

349

350

351 if (!isPredicatedOnVCTP(MI)) {

353 return false;

354 }

355 }

356 }

357 return true;

358 }

359 };

360

361 struct LowOverheadLoop {

362

381 bool Revert = false;

382 bool CannotTailPredicate = false;

383 VPTState VPTstate;

384

390 MF = ML.getHeader()->getParent();

391 if (auto *MBB = ML.getLoopPreheader())

392 Preheader = MBB;

394 Preheader = MBB;

395 }

396

397

398

399

401

403 CannotTailPredicate = !ValidateMVEInst(MI);

404 }

405

406 bool IsTailPredicationLegal() const {

407

408

409 return !Revert && FoundAllComponents() && !VCTPs.empty() &&

410 !CannotTailPredicate && ML.getNumBlocks() == 1;

411 }

412

413

414

416

417

418

419

420 bool ValidateTailPredicate();

421

422

423

424 bool ValidateLiveOuts();

425

426

427

429

430 bool FoundAllComponents() const {

431 return Start && Dec && End;

432 }

433

435

436

437

439 if (IsTailPredicationLegal())

440 return TPNumElements;

441 return Start->getOperand(1);

442 }

443

444 unsigned getStartOpcode() const {

446 if (!IsTailPredicationLegal())

447 return IsDo ? ARM::t2DLS : ARM::t2WLS;

448

450 }

451

452 void dump() const {

453 if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;

454 if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;

455 if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;

456 if (!VCTPs.empty()) {

457 dbgs() << "ARM Loops: Found VCTP(s):\n";

458 for (auto *MI : VCTPs)

459 dbgs() << " - " << *MI;

460 }

461 if (!FoundAllComponents())

462 dbgs() << "ARM Loops: Not a low-overhead loop.\n";

463 else if (!(Start && Dec && End))

464 dbgs() << "ARM Loops: Failed to find all loop components.\n";

465 }

466 };

467

475 std::unique_ptr BBUtils = nullptr;

476

477 public:

478 static char ID;

479

481

487 }

488

490

493 MachineFunctionProperties::Property::NoVRegs).set(

494 MachineFunctionProperties::Property::TracksLiveness);

495 }

496

499 }

500

501 private:

503

504 bool RevertNonLoops();

505

508

510

512

514

515 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);

516

517 MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);

518

519 void Expand(LowOverheadLoop &LoLoop);

520

521 void IterationCountDCE(LowOverheadLoop &LoLoop);

522 };

523}

524

525char ARMLowOverheadLoops::ID = 0;

526

528 false, false)

529

532

533

534

536

538 for (auto *Dead : Killed)

539 BasicBlocks.insert(Dead->getParent());

540

541

542 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;

543 for (auto *MBB : BasicBlocks) {

544 for (auto &IT : *MBB) {

545 if (IT.getOpcode() != ARM::t2IT)

546 continue;

548 ITBlocks[&IT]);

549 }

550 }

551

552

553

556 for (auto *Dead : Killed) {

558 Dead->findRegisterUseOperand(ARM::ITSTATE, nullptr)) {

561 auto &CurrentBlock = ITBlocks[IT];

562 CurrentBlock.erase(Dead);

563 if (CurrentBlock.empty())

565 else

567 }

568 }

569 if (!ModifiedITs.empty())

570 return false;

571 Killed.insert(RemoveITs.begin(), RemoveITs.end());

572 return true;

573 };

574

577 return false;

578

582 << " - can also remove:\n";

584 dbgs() << " - " << *Use);

585

588 if (WontCorruptITs(Killed, RDA)) {

591 dbgs() << " - " << *Dead);

592 }

593 return true;

594 }

595 return false;

596}

597

598bool LowOverheadLoop::ValidateTailPredicate() {

599 if (!IsTailPredicationLegal()) {

601 dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";

602 dbgs() << "ARM Loops: Tail-predication is not valid.\n");

603 return false;

604 }

605

606 assert(!VCTPs.empty() && "VCTP instruction expected but is not set");

607 assert(ML.getBlocks().size() == 1 &&

608 "Shouldn't be processing a loop with more than one block");

609

611 LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n");

612 return false;

613 }

614

615 if (!VPTstate.isValid(RDA)) {

616 LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n");

617 return false;

618 }

619

620 if (!ValidateLiveOuts()) {

621 LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n");

622 return false;

623 }

624

625

626

627

628

630 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||

631 Start->getOpcode() == ARM::t2WhileLoopStartTP) {

632 TPNumElements = Start->getOperand(2);

633 StartInsertPt = Start;

634 StartInsertBB = Start->getParent();

635 } else {

638

639

640

641

643 LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n");

644 return false;

645 }

646

647

648

649

650

651 if (StartInsertPt != StartInsertBB->end() &&

653 if (auto *ElemDef =

656 ElemDef->removeFromParent();

657 StartInsertBB->insert(StartInsertPt, ElemDef);

659 << "ARM Loops: Moved element count def: " << *ElemDef);

661 StartInsertPt->removeFromParent();

663 &*StartInsertPt);

664 LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);

665 } else {

666

667

668

674 TPNumElements = Operand;

675 NumElements = TPNumElements.getReg();

676 } else {

678 << "ARM Loops: Unable to move element count to loop "

679 << "start instruction.\n");

680 return false;

681 }

682 }

683 }

684 }

685

686

687

688

692 return false;

693

695 return true;

696

697

699 return true;

700

701 return false;

702 };

703

704

706 while (MBB && MBB != StartInsertBB) {

707 if (CannotProvideElements(MBB, NumElements)) {

708 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");

709 return false;

710 }

712 }

713 }

714

715

716

717

718

719

720

721

722

723

724 if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) {

725 LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n");

726 return false;

727 }

728

729

730

731

732

733 unsigned VCTPVecSize = getVecSize(*VCTP);

736 if (InstrVecSize > VCTPVecSize) {

737 LLVM_DEBUG(dbgs() << "ARM Loops: Double width result larger than VCTP "

738 << "VecSize:\n" << *MI);

739 return false;

740 }

741 }

742

743

744

745

746

747 auto IsValidSub = [](MachineInstr *MI, int ExpectedVecWidth) {

749 };

750

752

753

754

755

761

763

764 if (TryRemove(Def, RDA, ElementChain, Ignore)) {

765 bool FoundSub = false;

766

767 for (auto *MI : ElementChain) {

769 continue;

770

772 if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) {

773 LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"

774 " count: " << *MI);

775 return false;

776 }

777 FoundSub = true;

778 } else {

779 LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element"

780 " count: " << *MI);

781 return false;

782 }

783 }

784 ToRemove.insert(ElementChain.begin(), ElementChain.end());

785 }

786 }

787

788

789

790

791 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||

792 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&

793 Preheader && !Preheader->empty() &&

800 }

801 }

802

803 return true;

804}

805

808 return MO.isReg() && MO.getReg() && Class->contains(MO.getReg());

809}

810

811

812

813

818}

819

820

821

822

827}

828

829

830

831

832

834

835

836

838 return true;

839

840 switch (MI.getOpcode()) {

841 default:

842 break;

843

844

845

846 case ARM::MVE_VMVN:

847 case ARM::MVE_VORN:

848

849 case ARM::MVE_VCLZs8:

850 case ARM::MVE_VCLZs16:

851 case ARM::MVE_VCLZs32:

852 return true;

853 }

854 return false;

855}

856

857

858

859

860

861

865 InstSet &FalseLanesZero) {

867 return false;

868

870

871

872 if (MI.mayLoad())

873 return isPredicated;

874

877 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&

878 Def->getOperand(1).getImm() == 0;

879 };

880

882 for (auto &MO : MI.operands()) {

884 continue;

886 continue;

887

889 if (PIdx != -1 && (int)MO.getOperandNo() == PIdx + 2)

890 continue;

891

892

893

894

895

898 if (Defs.empty())

899 return false;

900 for (auto *Def : Defs) {

901 if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def))

902 continue;

903 if (MO.isUse() && isPredicated)

904 continue;

905 return false;

906 }

907 }

908 LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI);

909 return true;

910}

911

912bool LowOverheadLoop::ValidateLiveOuts() {

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

939

940 LLVM_DEBUG(dbgs() << "ARM Loops: Validating Live outs\n");

941

942 for (auto &MI : *Header) {

944 continue;

945

947 continue;

948

950 bool retainsOrReduces =

952

953 if (isPredicated)

957 else if (MI.getNumDefs() == 0)

958 continue;

959 else if (!isPredicated && retainsOrReduces) {

960 LLVM_DEBUG(dbgs() << " Unpredicated instruction that retainsOrReduces: " << MI);

961 return false;

962 } else if (!isPredicated && MI.getOpcode() != ARM::MQPRCopy)

963 FalseLanesUnknown.insert(&MI);

964 }

965

967 dbgs() << " Predicated:\n";

969 dbgs() << " " << *I;

970 dbgs() << " FalseLanesZero:\n";

971 for (auto *I : FalseLanesZero)

972 dbgs() << " " << *I;

973 dbgs() << " FalseLanesUnknown:\n";

974 for (auto *I : FalseLanesUnknown)

975 dbgs() << " " << *I;

976 });

977

984 return false;

985 }

986 return true;

987 };

988

989

990

991

992

993

994

996 for (auto *MI : reverse(FalseLanesUnknown)) {

997 for (auto &MO : MI->operands()) {

999 continue;

1000 if (!HasPredicatedUsers(MI, MO, Predicated)) {

1002 << TRI.getRegAsmName(MO.getReg()) << " at " << *MI);

1004 break;

1005 }

1006 }

1007

1010 }

1011

1014 ML.getExitBlocks(ExitBlocks);

1015 assert(ML.getNumBlocks() == 1 && "Expected single block loop!");

1016 assert(ExitBlocks.size() == 1 && "Expected a single exit block");

1019

1020

1021 if (RegMask.PhysReg == ARM::VPR) {

1022 LLVM_DEBUG(dbgs() << " VPR is live in to the exit block.");

1023 return false;

1024 }

1025

1026

1027 if (QPRs->contains(RegMask.PhysReg))

1030 }

1031

1032

1033

1034

1035

1036

1037

1038

1040 while (!Worklist.empty()) {

1042 if (MI->getOpcode() == ARM::MQPRCopy) {

1046 if (CopySrc)

1047 Worklist.push_back(CopySrc);

1048 } else if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {

1049 LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);

1050 VMOVCopies.clear();

1051 return false;

1052 }

1053 }

1054

1055 return true;

1056}

1057

1059 if (Revert)

1060 return;

1061

1062

1063

1067 ? End->getOperand(1).getMBB()

1068 : End->getOperand(2).getMBB();

1069

1070

1071 if (TgtBB != ML.getHeader()) {

1072 LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n");

1073 return false;

1074 }

1075

1076

1077

1078 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) ||

1079 !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) {

1080 LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");

1081 return false;

1082 }

1083

1086 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||

1087 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {

1088 LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");

1089 return false;

1090 }

1091 }

1092 return true;

1093 };

1094

1096 StartInsertBB = Start->getParent();

1097 LLVM_DEBUG(dbgs() << "ARM Loops: Will insert LoopStart at "

1098 << *StartInsertPt);

1099

1100 Revert = !ValidateRanges(Start, End, BBUtils, ML);

1101 CannotTailPredicate = !ValidateTailPredicate();

1102}

1103

1106 if (VCTPs.empty()) {

1108 return true;

1109 }

1110

1111

1112

1116 LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching "

1117 "definition from the main VCTP");

1118 return false;

1119 }

1121 return true;

1122}

1123

1125

1127 const PseudoSourceValue *PseudoValue = Operand->getPseudoValue();

1129 if (const auto *FS = dyn_cast(PseudoValue)) {

1130 return FS->getFrameIndex();

1131 }

1132 }

1133 return -1;

1134 };

1135

1136 auto IsStackOp = [GetFrameIndex](MachineInstr *I) {

1137 switch (I->getOpcode()) {

1138 case ARM::MVE_VSTRWU32:

1139 case ARM::MVE_VLDRWU32: {

1140 return I->getOperand(1).getReg() == ARM::SP &&

1141 I->memoperands().size() == 1 &&

1142 GetFrameIndex(I->memoperands().front()) >= 0;

1143 }

1144 default:

1145 return false;

1146 }

1147 };

1148

1149

1150

1151 if (MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(MI))

1152 return false;

1153

1154

1155

1156

1157 if (MI->memoperands().size() == 0)

1158 return false;

1159 int FI = GetFrameIndex(MI->memoperands().front());

1160

1161 auto &FrameInfo = MI->getParent()->getParent()->getFrameInfo();

1162 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))

1163 return false;

1164

1166 ML->getExitBlocks(Frontier);

1168 unsigned Idx = 0;

1169 while (Idx < Frontier.size()) {

1171 bool LookAtSuccessors = true;

1172 for (auto &I : *BB) {

1173 if (!IsStackOp(&I) || I.memoperands().size() == 0)

1174 continue;

1175 if (GetFrameIndex(I.memoperands().front()) != FI)

1176 continue;

1177

1178

1179 if (I.getOpcode() == ARM::MVE_VSTRWU32) {

1180 LookAtSuccessors = false;

1181 break;

1182 }

1183

1184

1185 if (I.getOpcode() == ARM::MVE_VLDRWU32)

1186 return false;

1187 }

1188

1189 if (LookAtSuccessors) {

1190 for (auto *Succ : BB->successors()) {

1193 }

1194 }

1197 }

1198

1199 return true;

1200}

1201

1202bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {

1203 if (CannotTailPredicate)

1204 return false;

1205

1207 return true;

1208

1209 if (MI->getOpcode() == ARM::MVE_VPSEL ||

1210 MI->getOpcode() == ARM::MVE_VPNOT) {

1211

1212

1213

1214

1215

1216

1217

1218

1219 return false;

1220 }

1221

1222

1224 return false;

1225

1226

1227

1229 bool IsUse = false;

1230 unsigned LastOpIdx = MI->getNumOperands() - 1;

1234 continue;

1235

1237 VPTstate.addInst(MI);

1238 IsUse = true;

1239 } else if (MI->getOpcode() != ARM::MVE_VPST) {

1240 LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI);

1241 return false;

1242 }

1243 }

1244

1245

1246

1247

1248 bool RequiresExplicitPredication =

1250 if (isDomainMVE(MI) && RequiresExplicitPredication) {

1251 if (MI->getOpcode() == ARM::MQPRCopy)

1252 return true;

1254 DoubleWidthResultInstrs.insert(MI);

1255 return true;

1256 }

1257

1259 << "ARM Loops: Can't tail predicate: " << *MI);

1260 return IsUse;

1261 }

1262

1263

1264

1266 return IsUse;

1267

1268

1269

1271

1272

1274 VPTstate.resetPredicate(MI);

1275 else

1276 VPTstate.addPredicate(MI);

1277 }

1278

1279

1280

1282 VPTstate.CreateVPTBlock(MI);

1283

1284 return true;

1285}

1286

1287bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {

1289 if (ST.hasLOB())

1290 return false;

1291

1292 MF = &mf;

1294

1295 MLI = &getAnalysis().getLI();

1296 RDA = &getAnalysis();

1297 MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);

1300 TRI = ST.getRegisterInfo();

1301 BBUtils = std::make_unique(*MF);

1302 BBUtils->computeAllBlockSizes();

1303 BBUtils->adjustBBOffsetsAfter(&MF->front());

1304

1305 bool Changed = false;

1306 for (auto *ML : *MLI) {

1307 if (ML->isOutermost())

1308 Changed |= ProcessLoop(ML);

1309 }

1310 Changed |= RevertNonLoops();

1311 return Changed;

1312}

1313

1314bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {

1315 bool Changed = false;

1316

1317

1319 Changed |= ProcessLoop(L);

1320

1322 dbgs() << "ARM Loops: Processing loop containing:\n";

1323 if (auto *Preheader = ML->getLoopPreheader())

1325 else if (auto *Preheader = MLI->findLoopPreheader(ML, true, true))

1327 for (auto *MBB : ML->getBlocks())

1329 });

1330

1331

1332

1335 for (auto &MI : *MBB) {

1337 return &MI;

1338 }

1341 return nullptr;

1342 };

1343

1344 LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI, *TII);

1345

1346

1347

1348 if (LoLoop.Preheader)

1349 LoLoop.Start = SearchForStart(LoLoop.Preheader);

1350 else

1351 return Changed;

1352

1353

1354

1355

1356 for (auto *MBB : reverse(ML->getBlocks())) {

1357 for (auto &MI : *MBB) {

1358 if (MI.isDebugValue())

1359 continue;

1360 else if (MI.getOpcode() == ARM::t2LoopDec)

1361 LoLoop.Dec = &MI;

1362 else if (MI.getOpcode() == ARM::t2LoopEnd)

1363 LoLoop.End = &MI;

1364 else if (MI.getOpcode() == ARM::t2LoopEndDec)

1365 LoLoop.End = LoLoop.Dec = &MI;

1367 LoLoop.Start = &MI;

1368 else if (MI.getDesc().isCall()) {

1369

1370

1371

1372 LoLoop.Revert = true;

1374 } else {

1375

1376

1377 LoLoop.AnalyseMVEInst(&MI);

1378 }

1379 }

1380 }

1381

1383 if (!LoLoop.FoundAllComponents()) {

1384 LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n");

1385 return Changed;

1386 }

1387

1388 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&

1389 "Expected t2WhileLoopStart to be removed before regalloc!");

1390

1391

1392

1393

1394 if (LoLoop.Dec != LoLoop.End) {

1397 if (Uses.size() > 1 || Uses.count(LoLoop.End)) {

1398 LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n");

1399 LoLoop.Revert = true;

1400 }

1401 }

1402 LoLoop.Validate(BBUtils.get());

1403 Expand(LoLoop);

1404 return true;

1405}

1406

1407

1408

1409

1410

1411void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {

1412 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);

1414 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?

1415 ARM::tBcc : ARM::t2Bcc;

1416

1418}

1419

1420void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const {

1421 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI);

1423}

1424

1425bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const {

1426 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);

1430 if (I->getOpcode() == ARM::t2LoopEnd) {

1432 break;

1433 }

1434 }

1435

1436

1437 bool SetFlags =

1439

1441 return SetFlags;

1442}

1443

1444

1445void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {

1446 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);

1447

1449 unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?

1450 ARM::tBcc : ARM::t2Bcc;

1451

1453}

1454

1455

1456void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const {

1457 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI);

1458 assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!");

1460

1463 MIB.addDef(ARM::LR);

1464 MIB.add(MI->getOperand(1));

1467 MIB.addReg(ARM::NoRegister);

1468 MIB.addReg(ARM::CPSR);

1470

1472 unsigned BrOpc =

1473 BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;

1474

1475

1477 MIB.add(MI->getOperand(2));

1479 MIB.addReg(ARM::CPSR);

1480

1481 MI->eraseFromParent();

1482}

1483

1484

1485

1486

1487

1488

1489

1490

1491

1492

1493

1494

1495

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {

1509 if (!LoLoop.IsTailPredicationLegal())

1510 return;

1511

1512 LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n");

1513

1515 if (!Def) {

1516 LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n");

1517 return;

1518 }

1519

1520

1522 LoLoop.End };

1523 if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))

1524 LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");

1525}

1526

1527MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {

1528 LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n");

1529

1530

1531 IterationCountDCE(LoLoop);

1532

1536 unsigned Opc = LoLoop.getStartOpcode();

1537 MachineOperand &Count = LoLoop.getLoopStartOperand();

1538

1539

1542 Count.getReg() == ARM::LR) {

1543 LLVM_DEBUG(dbgs() << "ARM Loops: Didn't insert start: DLS lr, lr");

1544 NewStart = nullptr;

1545 } else {

1547 BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));

1548

1549 MIB.addDef(ARM::LR);

1550 MIB.add(Count);

1553

1554 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);

1555 NewStart = &*MIB;

1556 }

1557

1558 LoLoop.ToRemove.insert(Start);

1559 return NewStart;

1560}

1561

1562void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {

1564 if (MI->isDebugInstr())

1565 return;

1566 LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI);

1568 assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction");

1570 "Expected Then predicate!");

1572 MI->getOperand(PIdx + 1).setReg(0);

1573 };

1574

1575 for (auto &Block : LoLoop.getVPTBlocks()) {

1577

1579 assert(TheVCMP && "Replacing a removed or non-existent VCMP");

1580

1582 BuildMI(*At->getParent(), At, At->getDebugLoc(),

1585

1587

1589

1591 LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB);

1592 LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());

1593 LoLoop.ToRemove.insert(TheVCMP);

1594 TheVCMP = nullptr;

1595 };

1596

1597 if (LoLoop.VPTstate.isEntryPredicatedOnVCTP(Block, true)) {

1599 if (Block.hasUniformPredicate()) {

1600

1601

1602

1603

1604 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);

1605 for (unsigned i = 1; i < Insts.size(); ++i)

1606 RemovePredicate(Insts[i]);

1607 } else {

1608

1609

1610

1611

1612

1613

1614

1618 while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr())

1619 ++DivergentNext;

1620

1621 bool DivergentNextIsPredicated =

1622 DivergentNext != MBB->end() &&

1624

1626 I != E; ++I)

1627 RemovePredicate(&*I);

1628

1629

1630

1633

1634 if (DivergentNextIsPredicated) {

1635

1636

1637

1638 if (!VCMP) {

1639

1640

1645 LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB);

1646 LoLoop.BlockMasksToRecompute.insert(MIB.getInstr());

1647 } else {

1648

1649

1650 ReplaceVCMPWithVPT(VCMP, VCMP);

1651 }

1652 }

1653 }

1654 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);

1655 LoLoop.ToRemove.insert(VPST);

1656 } else if (Block.containsVCTP()) {

1657

1658

1660 if (Block.size() == 2) {

1662 "Found a VPST in an otherwise empty vpt block");

1663 LoLoop.ToRemove.insert(VPST);

1664 } else

1665 LoLoop.BlockMasksToRecompute.insert(VPST);

1666 } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {

1667

1668

1669

1673 "The instruction after a VPST must be predicated");

1674 (void)Next;

1677 !LoLoop.ToRemove.contains(VprDef)) {

1679

1680

1681

1682

1683

1688 ReplaceVCMPWithVPT(VCMP, VPST);

1689 LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST);

1690 LoLoop.ToRemove.insert(VPST);

1691 }

1692 }

1693 }

1694 }

1695

1696 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());

1697}

1698

1699void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {

1700

1701

1702 auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) {

1705 unsigned Opc = LoLoop.IsTailPredicationLegal() ?

1706 ARM::MVE_LETP : ARM::t2LEUpdate;

1708 TII->get(Opc));

1709 MIB.addDef(ARM::LR);

1710 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;

1711 MIB.add(End->getOperand(Off + 0));

1712 MIB.add(End->getOperand(Off + 1));

1713 LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);

1714 LoLoop.ToRemove.insert(LoLoop.Dec);

1715 LoLoop.ToRemove.insert(End);

1716 return &*MIB;

1717 };

1718

1719

1720

1721

1722

1723

1730 LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);

1732 }

1733 }

1734 };

1735

1736

1737

1739 for (auto *MI : VMOVCopies) {

1741 assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");

1743 Register Dst = MI->getOperand(0).getReg();

1744 Register Src = MI->getOperand(1).getReg();

1745 auto MIB1 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),

1746 ARM::D0 + (Dst - ARM::Q0) * 2)

1747 .addReg(ARM::D0 + (Src - ARM::Q0) * 2)

1749 (void)MIB1;

1751 auto MIB2 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),

1752 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)

1753 .addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)

1756 (void)MIB2;

1757 MI->eraseFromParent();

1758 }

1759 };

1760

1761 if (LoLoop.Revert) {

1763 RevertWhile(LoLoop.Start);

1764 else

1765 RevertDo(LoLoop.Start);

1766 if (LoLoop.Dec == LoLoop.End)

1767 RevertLoopEndDec(LoLoop.End);

1768 else

1770 } else {

1771 ExpandVMOVCopies(LoLoop.VMOVCopies);

1772 LoLoop.Start = ExpandLoopStart(LoLoop);

1773 if (LoLoop.Start)

1774 RemoveDeadBranch(LoLoop.Start);

1775 LoLoop.End = ExpandLoopEnd(LoLoop);

1776 RemoveDeadBranch(LoLoop.End);

1777 if (LoLoop.IsTailPredicationLegal())

1778 ConvertVPTBlocks(LoLoop);

1779 for (auto *I : LoLoop.ToRemove) {

1781 I->eraseFromParent();

1782 }

1783 for (auto *I : LoLoop.BlockMasksToRecompute) {

1784 LLVM_DEBUG(dbgs() << "ARM Loops: Recomputing VPT/VPST Block Mask: " << *I);

1787 }

1788 }

1789

1790 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);

1791 DFS.ProcessLoop();

1794

1795 for (auto *MBB : reverse(PostOrder))

1797

1798

1800}

1801

1802bool ARMLowOverheadLoops::RevertNonLoops() {

1803 LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");

1804 bool Changed = false;

1805

1806 for (auto &MBB : *MF) {

1812

1813 for (auto &I : MBB) {

1816 else if (I.getOpcode() == ARM::t2LoopDec)

1818 else if (I.getOpcode() == ARM::t2LoopEnd)

1820 else if (I.getOpcode() == ARM::t2LoopEndDec)

1822 else if (I.getOpcode() == ARM::MQPRCopy)

1824 }

1825

1827 MQPRCopies.empty())

1828 continue;

1829

1830 Changed = true;

1831

1832 for (auto *Start : Starts) {

1834 RevertWhile(Start);

1835 else

1836 RevertDo(Start);

1837 }

1838 for (auto *Dec : Decs)

1840

1841 for (auto *End : Ends)

1843 for (auto *End : EndDecs)

1844 RevertLoopEndDec(End);

1845 for (auto *MI : MQPRCopies) {

1847 assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");

1849 auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::MVE_VORR),

1850 MI->getOperand(0).getReg())

1851 .add(MI->getOperand(1))

1852 .add(MI->getOperand(1));

1854 MI->eraseFromParent();

1855 }

1856 }

1857 return Changed;

1858}

1859

1861 return new ARMLowOverheadLoops();

1862}

unsigned const MachineRegisterInfo * MRI

static bool isDomainMVE(MachineInstr *MI)

SmallPtrSet< MachineInstr *, 2 > Uses

static bool isVectorPredicated(MachineInstr *MI)

ReachingDefAnalysis & RDA

static bool canGenerateNonZeros(const MachineInstr &MI)

static bool isHorizontalReduction(const MachineInstr &MI)

ReachingDefAnalysis InstSet & ToRemove

static bool producesDoubleWidthResult(const MachineInstr &MI)

static bool hasVPRUse(MachineInstr &MI)

static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)

static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)

static bool isVectorPredicate(MachineInstr *MI)

static bool retainsPreviousHalfElement(const MachineInstr &MI)

static bool shouldInspect(MachineInstr &MI)

static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))

static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)

static int getVecSize(const MachineInstr &MI)

#define ARM_LOW_OVERHEAD_LOOPS_NAME

static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))

ReachingDefAnalysis InstSet InstSet & Ignore

static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

DenseMap< Block *, BlockRelaxAux > Blocks

const HexagonInstrInfo * TII

This file implements the LivePhysRegs utility for tracking liveness of physical registers.

static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)

unsigned const TargetRegisterInfo * TRI

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static bool isValid(const char C)

Returns true if C is a valid mangled character: <0-9a-zA-Z_>.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesCFG()

This function should be called by the pass, iff they do not:

This class represents an Operation in the Expression.

FunctionPass class - This class is used to implement most global optimizations.

Describe properties that are true of each instruction in the target description file.

ArrayRef< MCOperandInfo > operands() const

Wrapper class representing physical registers. Should be passed by value.

static MCRegister from(unsigned Val)

Check the provided unsigned value is a valid MCRegister.

unsigned pred_size() const

instr_iterator insert(instr_iterator I, MachineInstr *M)

Insert MI into the instruction list before I, possibly inside a bundle.

iterator_range< livein_iterator > liveins() const

MachineInstr & instr_back()

pred_iterator pred_begin()

bool isLayoutSuccessor(const MachineBasicBlock *MBB) const

Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...

iterator_range< succ_iterator > successors()

iterator insertAfter(iterator I, MachineInstr *MI)

Insert MI into the instruction list after I.

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

virtual bool runOnMachineFunction(MachineFunction &MF)=0

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

virtual MachineFunctionProperties getRequiredProperties() const

Properties which a MachineFunction may have at a given point in time.

MachineFunctionProperties & set(Property P)

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

StringRef getName() const

getName - Return the name of the corresponding LLVM function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

const MachineFunctionProperties & getProperties() const

Get the function properties.

const MachineBasicBlock & front() const

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & add(const MachineOperand &MO) const

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const

MachineInstr * getInstr() const

If conversion operators fail, use this method to get the MachineInstr explicitly.

const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const

Add a virtual register definition operand.

Representation of each machine instruction.

unsigned getOpcode() const

Returns the opcode of this MachineInstr.

const MachineBasicBlock * getParent() const

const DebugLoc & getDebugLoc() const

Returns the debug location id of this MachineInstr.

const MachineOperand & getOperand(unsigned i) const

MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const

Find the block that either is the loop preheader, or could speculatively be used as the preheader.

A description of a memory reference used in the backend.

MachineOperand class - Representation of each machine instruction operand.

unsigned getOperandNo() const

Returns the index of this operand in the instruction that it belongs to.

bool isReg() const

isReg - Tests if this is a MO_Register operand.

Register getReg() const

getReg - Returns the register number.

bool isIdenticalTo(const MachineOperand &Other) const

Returns true if this operand is identical to the specified operand except for liveness related flags ...

void setIsDef(bool Val=true)

Change a def to a use, or a use to a def.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

Special value supplied for machine level alias analysis.

This class provides the reaching def analysis.

MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, Register Reg) const

Return the local MI that produces the live out value for Reg, or nullptr for a non-live out or non-lo...

bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const

Return whether From can be moved forwards to just before To.

bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const

Return whether removing this instruction will have no effect on the program, returning the redundant ...

MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const

If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.

void reset()

Re-run the analysis.

void getGlobalUses(MachineInstr *MI, Register Reg, InstSet &Uses) const

Collect the users of the value stored in Reg, which is defined by MI.

void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const

Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...

bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const

Return whether From can be moved backwards to just after To.

bool hasLocalDefBefore(MachineInstr *MI, Register Reg) const

Provide whether the register has been defined in the same basic block as, and before,...

bool isReachingDefLiveOut(MachineInstr *MI, Register Reg) const

Return whether the reaching def for MI also is live out of its parent block.

bool isSafeToDefRegAt(MachineInstr *MI, Register Reg) const

Return whether a MachineInstr could be inserted at MI and safely define the given register without af...

bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, Register Reg) const

Return whether A and B use the same def of Reg.

void getReachingLocalUses(MachineInstr *MI, Register Reg, InstSet &Uses) const

Provides the uses, in the same block as MI, of register that MI defines.

MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, Register Reg) const

If a single MachineInstr creates the reaching definition, then return it.

void getGlobalReachingDefs(MachineInstr *MI, Register Reg, InstSet &Defs) const

Collect all possible definitions of the value stored in Reg, which is used by MI.

Wrapper class representing virtual and physical registers.

MCRegister asMCReg() const

Utility to check-convert this value to a MCRegister.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

void clear()

Completely clear the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

bool contains(const key_type &key) const

Check if the SetVector contains the given key.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

bool contains(Register Reg) const

Return true if the specified register is included in this register class.

TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...

A Use represents the edge between a Value definition and its users.

@ ValidForTailPredication

@ RetainsPreviousHalfElement

bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)

bool isVpred(OperandType op)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

NodeAddr< DefNode * > Def

This is an optimization pass for GlobalISel generic memory operations.

static bool isDoLoopStart(const MachineInstr &MI)

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

int findFirstVPTPredOperandIdx(const MachineInstr &MI)

ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

static bool isVCTP(const MachineInstr *MI)

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

static bool isVPTOpcode(int Opc)

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

static unsigned getTailPredVectorWidth(unsigned Opcode)

static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)

Get the operands corresponding to the given Pred value.

FunctionPass * createARMLowOverheadLoopsPass()

static bool isMovRegOpcode(int Opc)

static bool isSubImmOpcode(int Opc)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

static bool isLoopStart(const MachineInstr &MI)

void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)

void recomputeLivenessFlags(MachineBasicBlock &MBB)

Recomputes dead and kill flags in MBB.

static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)

void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)

void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)

void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)

MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

static bool isWhileLoopStart(const MachineInstr &MI)

static unsigned VCMPOpcodeToVPT(unsigned Opcode)

void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)

int getAddSubImmediate(MachineInstr &MI)

void recomputeVPTBlockMask(MachineInstr &Instr)

void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)

Convenience function for recomputing live-in's for a set of MBBs until the computation converges.

Printable printMBBReference(const MachineBasicBlock &MBB)

Prints a machine basic block reference.

Pair of physical register and lane mask.