LLVM: lib/Transforms/Scalar/LoopStrengthReduce.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

118#include

119#include

120#include

121#include

122#include

123#include

124#include

125#include

126#include

127#include

128

129using namespace llvm;

131

132#define DEBUG_TYPE "loop-reduce"

133

134

135

136

137

139

140

141

142

143

145

146

149 cl::desc("Enable LSR phi elimination"));

150

151

154 cl::desc("Add instruction count to a LSR cost model"));

155

156

159 cl::desc("Narrow LSR complex solution using"

160 " expectation of registers number"));

161

162

163

166 cl::desc("Narrow LSR search space by filtering non-optimal formulae"

167 " with the same ScaledReg and Scale"));

168

171 cl::desc("A flag that overrides the target's preferred addressing mode."),

175 "Prefer pre-indexed addressing mode"),

177 "Prefer post-indexed addressing mode"),

179

182 cl::init(std::numeric_limits<uint16_t>::max()),

183 cl::desc("LSR search space complexity limit"));

184

187 cl::desc("The limit on recursion depth for LSRs setup cost"));

188

191 cl::desc("Attempt to drop solution if it is less profitable"));

192

195 cl::desc("Enable analysis of vscale-relative immediates in LSR"));

196

199 cl::desc("Avoid using scaled registers with vscale-relative addressing"));

200

201#ifndef NDEBUG

202

205 cl::desc("Stress test LSR IV chains"));

206#else

208#endif

209

210namespace {

211

212struct MemAccessTy {

213

215 std::numeric_limits::max();

216

217 Type *MemTy = nullptr;

219

220 MemAccessTy() = default;

221 MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}

222

224 return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;

225 }

226

228

229 static MemAccessTy getUnknown(LLVMContext &Ctx,

230 unsigned AS = UnknownAddressSpace) {

231 return MemAccessTy(Type::getVoidTy(Ctx), AS);

232 }

233

235};

236

237

238class RegSortData {

239public:

240

241

242 SmallBitVector UsedByIndices;

243

244 void print(raw_ostream &OS) const;

245 void dump() const;

246};

247

248

249

251 constexpr Immediate(ScalarTy MinVal, bool Scalable)

252 : FixedOrScalableQuantity(MinVal, Scalable) {}

253

254 constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)

255 : FixedOrScalableQuantity(V) {}

256

257public:

258 constexpr Immediate() = delete;

259

260 static constexpr Immediate getFixed(ScalarTy MinVal) {

261 return {MinVal, false};

262 }

263 static constexpr Immediate getScalable(ScalarTy MinVal) {

264 return {MinVal, true};

265 }

266 static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {

267 return {MinVal, Scalable};

268 }

269 static constexpr Immediate getZero() { return {0, false}; }

270 static constexpr Immediate getFixedMin() {

271 return {std::numeric_limits<int64_t>::min(), false};

272 }

273 static constexpr Immediate getFixedMax() {

274 return {std::numeric_limits<int64_t>::max(), false};

275 }

276 static constexpr Immediate getScalableMin() {

277 return {std::numeric_limits<int64_t>::min(), true};

278 }

279 static constexpr Immediate getScalableMax() {

280 return {std::numeric_limits<int64_t>::max(), true};

281 }

282

283 constexpr bool isLessThanZero() const { return Quantity < 0; }

284

285 constexpr bool isGreaterThanZero() const { return Quantity > 0; }

286

287 constexpr bool isCompatibleImmediate(const Immediate &Imm) const {

288 return isZero() || Imm.isZero() || Imm.Scalable == Scalable;

289 }

290

291 constexpr bool isMin() const {

292 return Quantity == std::numeric_limits::min();

293 }

294

295 constexpr bool isMax() const {

296 return Quantity == std::numeric_limits::max();

297 }

298

299

300 constexpr Immediate addUnsigned(const Immediate &RHS) const {

301 assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");

302 ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();

303 return {Value, Scalable || RHS.isScalable()};

304 }

305

306 constexpr Immediate subUnsigned(const Immediate &RHS) const {

307 assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");

308 ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();

309 return {Value, Scalable || RHS.isScalable()};

310 }

311

312

313 constexpr Immediate mulUnsigned(const ScalarTy RHS) const {

314 ScalarTy Value = (uint64_t)Quantity * RHS;

315 return {Value, Scalable};

316 }

317

318

319 const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {

320 const SCEV *S = SE.getConstant(Ty, Quantity);

321 if (Scalable)

323 return S;

324 }

325

326 const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {

327 const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);

328 if (Scalable)

330 return NegS;

331 }

332

333 const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {

335 if (Scalable)

337 return SU;

338 }

339};

340

341

342

343

344

345struct KeyOrderTargetImmediate {

346 bool operator()(const Immediate &LHS, const Immediate &RHS) const {

347 if (LHS.isScalable() && RHS.isScalable())

348 return false;

349 if (LHS.isScalable() && RHS.isScalable())

350 return true;

351 return LHS.getKnownMinValue() < RHS.getKnownMinValue();

352 }

353};

354

355

356

357

358struct KeyOrderSizeTAndImmediate {

359 bool operator()(const std::pair<size_t, Immediate> &LHS,

360 const std::pair<size_t, Immediate> &RHS) const {

361 size_t LSize = LHS.first;

362 size_t RSize = RHS.first;

363 if (LSize != RSize)

364 return LSize < RSize;

365 return KeyOrderTargetImmediate()(LHS.second, RHS.second);

366 }

367};

368}

369

370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

371void RegSortData::print(raw_ostream &OS) const {

372 OS << "[NumUses=" << UsedByIndices.count() << ']';

373}

374

377}

378#endif

379

380namespace {

381

382

383class RegUseTracker {

384 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;

385

386 RegUsesTy RegUsesMap;

388

389public:

390 void countRegister(const SCEV *Reg, size_t LUIdx);

391 void dropRegister(const SCEV *Reg, size_t LUIdx);

392 void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);

393

394 bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;

395

396 const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;

397

398 void clear();

399

402

406 const_iterator end() const { return RegSequence.end(); }

407};

408

409}

410

411void

412RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {

413 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Reg);

414 RegSortData &RSD = Pair.first->second;

415 if (Pair.second)

417 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));

418 RSD.UsedByIndices.set(LUIdx);

419}

420

421void

422RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {

423 RegUsesTy::iterator It = RegUsesMap.find(Reg);

424 assert(It != RegUsesMap.end());

425 RegSortData &RSD = It->second;

426 assert(RSD.UsedByIndices.size() > LUIdx);

427 RSD.UsedByIndices.reset(LUIdx);

428}

429

430void

431RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {

432 assert(LUIdx <= LastLUIdx);

433

434

435

436 for (auto &Pair : RegUsesMap) {

437 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;

438 if (LUIdx < UsedByIndices.size())

439 UsedByIndices[LUIdx] =

440 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;

441 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));

442 }

443}

444

445bool

446RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {

447 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);

448 if (I == RegUsesMap.end())

449 return false;

450 const SmallBitVector &UsedByIndices = I->second.UsedByIndices;

452 if (i == -1) return false;

453 if ((size_t)i != LUIdx) return true;

454 return UsedByIndices.find_next(i) != -1;

455}

456

457const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {

458 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);

459 assert(I != RegUsesMap.end() && "Unknown register!");

460 return I->second.UsedByIndices;

461}

462

463void RegUseTracker::clear() {

464 RegUsesMap.clear();

466}

467

468namespace {

469

470

471

472struct Formula {

473

474 GlobalValue *BaseGV = nullptr;

475

476

477 Immediate BaseOffset = Immediate::getZero();

478

479

480 bool HasBaseReg = false;

481

482

483 int64_t Scale = 0;

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

500

501

502

503 const SCEV *ScaledReg = nullptr;

504

505

506

507

508 Immediate UnfoldedOffset = Immediate::getZero();

509

510 Formula() = default;

511

512 void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);

513

515

516 void canonicalize(const Loop &L);

517

518 bool unscale();

519

520 bool hasZeroEnd() const;

521

522 bool countsDownToZero() const;

523

524 size_t getNumRegs() const;

526

527 void deleteBaseReg(const SCEV *&S);

528

529 bool referencesReg(const SCEV *S) const;

530 bool hasRegsUsedByUsesOtherThan(size_t LUIdx,

531 const RegUseTracker &RegUses) const;

532

533 void print(raw_ostream &OS) const;

534 void dump() const;

535};

536

537}

538

539

544

547 return;

548 }

549

550

552 for (const SCEV *S : Add->operands())

554 return;

555 }

556

557

558 const SCEV *Start, *Step;

559 const Loop *ARLoop;

562 !Start->isZero()) {

565

567 L, Good, Bad, SE);

568 return;

569 }

570

571

573 if (Mul->getOperand(0)->isAllOnesValue()) {

576

582 for (const SCEV *S : MyGood)

584 for (const SCEV *S : MyBad)

586 return;

587 }

588

589

590

592}

593

594

595

596void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {

600 if (!Good.empty()) {

601 const SCEV *Sum = SE.getAddExpr(Good);

603 BaseRegs.push_back(Sum);

604 HasBaseReg = true;

605 }

606 if (!Bad.empty()) {

607 const SCEV *Sum = SE.getAddExpr(Bad);

609 BaseRegs.push_back(Sum);

610 HasBaseReg = true;

611 }

612 canonicalize(*L);

613}

614

620

621

622

623

624bool Formula::isCanonical(const Loop &L) const {

625 assert((Scale == 0 || ScaledReg) &&

626 "ScaledReg must be non-null if Scale is non-zero");

627

628 if (!ScaledReg)

629 return BaseRegs.size() <= 1;

630

631 if (Scale != 1)

632 return true;

633

634 if (Scale == 1 && BaseRegs.empty())

635 return false;

636

638 return true;

639

640

641

642

643 return none_of(BaseRegs, [&L](const SCEV *S) {

645 });

646}

647

648

649

650

651

652

653

654void Formula::canonicalize(const Loop &L) {

656 return;

657

658 if (BaseRegs.empty()) {

659

660 assert(ScaledReg && "Expected 1*reg => reg");

661 assert(Scale == 1 && "Expected 1*reg => reg");

662 BaseRegs.push_back(ScaledReg);

663 Scale = 0;

664 ScaledReg = nullptr;

665 return;

666 }

667

668

669 if (!ScaledReg) {

670 ScaledReg = BaseRegs.pop_back_val();

671 Scale = 1;

672 }

673

674

675

676

678 auto I = find_if(BaseRegs, [&L](const SCEV *S) {

680 });

681 if (I != BaseRegs.end())

683 }

685}

686

687

688

689

690

691bool Formula::unscale() {

692 if (Scale != 1)

693 return false;

694 Scale = 0;

695 BaseRegs.push_back(ScaledReg);

696 ScaledReg = nullptr;

697 return true;

698}

699

700bool Formula::hasZeroEnd() const {

701 if (UnfoldedOffset || BaseOffset)

702 return false;

703 if (BaseRegs.size() != 1 || ScaledReg)

704 return false;

705 return true;

706}

707

708bool Formula::countsDownToZero() const {

709 if (!hasZeroEnd())

710 return false;

711 assert(BaseRegs.size() == 1 && "hasZeroEnd should mean one BaseReg");

712 const APInt *StepInt;

714 return false;

716}

717

718

719

720size_t Formula::getNumRegs() const {

721 return !!ScaledReg + BaseRegs.size();

722}

723

724

725

726Type *Formula::getType() const {

727 return !BaseRegs.empty() ? BaseRegs.front()->getType() :

728 ScaledReg ? ScaledReg->getType() :

729 BaseGV ? BaseGV->getType() :

730 nullptr;

731}

732

733

734void Formula::deleteBaseReg(const SCEV *&S) {

735 if (&S != &BaseRegs.back())

737 BaseRegs.pop_back();

738}

739

740

741bool Formula::referencesReg(const SCEV *S) const {

742 return S == ScaledReg || is_contained(BaseRegs, S);

743}

744

745

746

747bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,

748 const RegUseTracker &RegUses) const {

749 if (ScaledReg)

750 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))

751 return true;

752 for (const SCEV *BaseReg : BaseRegs)

753 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))

754 return true;

755 return false;

756}

757

758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

759void Formula::print(raw_ostream &OS) const {

760 bool First = true;

761 if (BaseGV) {

762 if (First) OS << " + "; else First = false;

764 }

765 if (BaseOffset.isNonZero()) {

766 if (First) OS << " + "; else First = false;

767 OS << BaseOffset;

768 }

769 for (const SCEV *BaseReg : BaseRegs) {

770 if (First) OS << " + "; else First = false;

771 OS << "reg(" << *BaseReg << ')';

772 }

773 if (HasBaseReg && BaseRegs.empty()) {

774 if (First) OS << " + "; else First = false;

775 OS << "**error: HasBaseReg**";

776 } else if (!HasBaseReg && !BaseRegs.empty()) {

777 if (First) OS << " + "; else First = false;

778 OS << "**error: !HasBaseReg**";

779 }

780 if (Scale != 0) {

781 if (First) OS << " + "; else First = false;

782 OS << Scale << "*reg(";

783 if (ScaledReg)

784 OS << *ScaledReg;

785 else

786 OS << "";

787 OS << ')';

788 }

789 if (UnfoldedOffset.isNonZero()) {

790 if (First) OS << " + ";

791 OS << "imm(" << UnfoldedOffset << ')';

792 }

793}

794

797}

798#endif

799

800

801

807

808

809

815

816

817

824

825

826

827

828

829

832 bool IgnoreSignificantBits = false) {

833

836

837

839 if (RC) {

841

842

843 if (RA.isAllOnes()) {

844 if (LHS->getType()->isPointerTy())

845 return nullptr;

847 }

848

849 if (RA == 1)

850 return LHS;

851 }

852

853

855 if (!RC)

856 return nullptr;

857 const APInt &LA = C->getAPInt();

859 if (LA.srem(RA) != 0)

860 return nullptr;

862 }

863

864

866 if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {

868 IgnoreSignificantBits);

869 if (!Step) return nullptr;

871 IgnoreSignificantBits);

872 if (!Start) return nullptr;

873

874

875

877 }

878 return nullptr;

879 }

880

881

885 for (const SCEV *S : Add->operands()) {

887 if (Op) return nullptr;

888 Ops.push_back(Op);

889 }

891 }

892 return nullptr;

893 }

894

895

898

900 if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {

904 if (LC && RC) {

907 if (LOps == ROps)

908 return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);

909 }

910 }

911 }

912

914 bool Found = false;

915 for (const SCEV *S : Mul->operands()) {

916 if (!Found)

918 IgnoreSignificantBits)) {

919 S = Q;

920 Found = true;

921 }

922 Ops.push_back(S);

923 }

925 }

926 return nullptr;

927 }

928

929

930 return nullptr;

931}

932

933

934

938 if (C->getSignificantBits() <= 64) {

940 return Immediate::getFixed(C->getSExtValue());

941 }

945 if (Result.isNonZero())

947 return Result;

951 if (Result.isNonZero())

953

955 return Result;

959 return Immediate::getScalable(C->getSExtValue());

960 }

961 return Immediate::getZero();

962}

963

964

965

970 return GV;

971 }

975 if (Result)

977 return Result;

981 if (Result)

983

985 return Result;

986 }

987 return nullptr;

988}

989

990

991

996 if (SI->getPointerOperand() == OperandVal)

997 isAddress = true;

999

1000

1001 switch (II->getIntrinsicID()) {

1002 case Intrinsic::memset:

1003 case Intrinsic::prefetch:

1004 case Intrinsic::masked_load:

1005 if (II->getArgOperand(0) == OperandVal)

1006 isAddress = true;

1007 break;

1008 case Intrinsic::masked_store:

1009 if (II->getArgOperand(1) == OperandVal)

1010 isAddress = true;

1011 break;

1012 case Intrinsic::memmove:

1013 case Intrinsic::memcpy:

1014 if (II->getArgOperand(0) == OperandVal ||

1015 II->getArgOperand(1) == OperandVal)

1016 isAddress = true;

1017 break;

1018 default: {

1020 if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {

1021 if (IntrInfo.PtrVal == OperandVal)

1022 isAddress = true;

1023 }

1024 }

1025 }

1027 if (RMW->getPointerOperand() == OperandVal)

1028 isAddress = true;

1030 if (CmpX->getPointerOperand() == OperandVal)

1031 isAddress = true;

1032 }

1033 return isAddress;

1034}

1035

1036

1039 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());

1040

1041

1043 AccessTy.MemTy = Ty;

1044

1045

1047 AccessTy.AddrSpace = SI->getPointerAddressSpace();

1049 AccessTy.AddrSpace = LI->getPointerAddressSpace();

1051 AccessTy.AddrSpace = RMW->getPointerAddressSpace();

1053 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();

1055 switch (II->getIntrinsicID()) {

1056 case Intrinsic::prefetch:

1057 case Intrinsic::memset:

1058 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();

1059 AccessTy.MemTy = OperandVal->getType();

1060 break;

1061 case Intrinsic::memmove:

1062 case Intrinsic::memcpy:

1064 AccessTy.MemTy = OperandVal->getType();

1065 break;

1066 case Intrinsic::masked_load:

1067 AccessTy.AddrSpace =

1068 II->getArgOperand(0)->getType()->getPointerAddressSpace();

1069 break;

1070 case Intrinsic::masked_store:

1071 AccessTy.AddrSpace =

1072 II->getArgOperand(1)->getType()->getPointerAddressSpace();

1073 break;

1074 default: {

1076 if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {

1077 AccessTy.AddrSpace

1079 }

1080

1081 break;

1082 }

1083 }

1084 }

1085

1086 return AccessTy;

1087}

1088

1089

1096 return true;

1097 }

1098 return false;

1099}

1100

1101

1102

1103

1104

1105

1106

1107

1108

1109

1113

1118 return false;

1121 Processed, SE);

1124 Processed, SE);

1127 Processed, SE);

1128 default:

1129 break;

1130 }

1131

1132 if (!Processed.insert(S).second)

1133 return false;

1134

1136 for (const SCEV *S : Add->operands()) {

1138 return true;

1139 }

1140 return false;

1141 }

1142

1143 const SCEV *Op0, *Op1;

1145

1148

1149

1150

1152 Value *UVal = U->getValue();

1153 for (User *UR : UVal->users()) {

1154

1156 if (UI && UI->getOpcode() == Instruction::Mul &&

1158 return SE.getSCEV(UI) == S;

1159 }

1160 }

1161 }

1162 }

1163

1166 return false;

1167 }

1168

1169

1170 return true;

1171}

1172

1173namespace {

1174

1175class LSRUse;

1176

1177}

1178

1179

1180

1181

1182

1183

1184

1185

1186

1187

1189 const LSRUse &LU, const Formula &F);

1190

1191

1193 const LSRUse &LU, const Formula &F,

1194 const Loop &L);

1195

1196namespace {

1197

1198

1200 const Loop *L = nullptr;

1201 ScalarEvolution *SE = nullptr;

1202 const TargetTransformInfo *TTI = nullptr;

1203 TargetTransformInfo::LSRCost C;

1205

1206public:

1207 Cost() = delete;

1208 Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,

1210 L(L), SE(&SE), TTI(&TTI), AMK(AMK) {

1211 C.Insns = 0;

1212 C.NumRegs = 0;

1213 C.AddRecCost = 0;

1214 C.NumIVMuls = 0;

1215 C.NumBaseAdds = 0;

1216 C.ImmCost = 0;

1217 C.SetupCost = 0;

1218 C.ScaleCost = 0;

1219 }

1220

1221 bool isLess(const Cost &Other) const;

1222

1223 void Lose();

1224

1225#ifndef NDEBUG

1226

1228 return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds

1229 | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)

1230 || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds

1231 & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);

1232 }

1233#endif

1234

1235 bool isLoser() {

1237 return C.NumRegs == ~0u;

1238 }

1239

1240 void RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,

1241 const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU,

1242 bool HardwareLoopProfitable,

1243 SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);

1244

1245 void print(raw_ostream &OS) const;

1246 void dump() const;

1247

1248private:

1249 void RateRegister(const Formula &F, const SCEV *Reg,

1250 SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,

1251 bool HardwareLoopProfitable);

1252 void RatePrimaryRegister(const Formula &F, const SCEV *Reg,

1253 SmallPtrSetImpl<const SCEV *> &Regs,

1254 const LSRUse &LU, bool HardwareLoopProfitable,

1255 SmallPtrSetImpl<const SCEV *> *LoserRegs);

1256};

1257

1258

1259

1260struct LSRFixup {

1261

1263

1264

1265

1266 Value *OperandValToReplace = nullptr;

1267

1268

1269

1270

1272

1273

1274

1275

1276 Immediate Offset = Immediate::getZero();

1277

1278 LSRFixup() = default;

1279

1280 bool isUseFullyOutsideLoop(const Loop *L) const;

1281

1282 void print(raw_ostream &OS) const;

1283 void dump() const;

1284};

1285

1286

1287

1288

1289

1290

1291class LSRUse {

1292 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;

1293

1294public:

1295

1296

1298 Basic,

1299 Special,

1300 Address,

1301 ICmpZero

1302

1303 };

1304

1305 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;

1306

1308 MemAccessTy AccessTy;

1309

1310

1312

1313

1314 Immediate MinOffset = Immediate::getFixedMax();

1315 Immediate MaxOffset = Immediate::getFixedMin();

1316

1317

1318

1319 bool AllFixupsOutsideLoop = true;

1320

1321

1322

1323

1324 bool AllFixupsUnconditional = true;

1325

1326

1327

1328

1329

1330

1331 bool RigidFormula = false;

1332

1333

1334

1335

1336

1337 Type *WidestFixupType = nullptr;

1338

1339

1340

1341

1343

1344

1345 SmallPtrSet<const SCEV *, 4> Regs;

1346

1347 LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}

1348

1349 LSRFixup &getNewFixup() {

1350 Fixups.push_back(LSRFixup());

1351 return Fixups.back();

1352 }

1353

1354 void pushFixup(LSRFixup &f) {

1355 Fixups.push_back(f);

1356 if (Immediate::isKnownGT(f.Offset, MaxOffset))

1357 MaxOffset = f.Offset;

1358 if (Immediate::isKnownLT(f.Offset, MinOffset))

1359 MinOffset = f.Offset;

1360 }

1361

1362 bool HasFormulaWithSameRegs(const Formula &F) const;

1363 float getNotSelectedProbability(const SCEV *Reg) const;

1364 bool InsertFormula(const Formula &F, const Loop &L);

1365 void DeleteFormula(Formula &F);

1366 void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);

1367

1368 void print(raw_ostream &OS) const;

1369 void dump() const;

1370};

1371

1372}

1373

1375 LSRUse::KindType Kind, MemAccessTy AccessTy,

1376 GlobalValue *BaseGV, Immediate BaseOffset,

1377 bool HasBaseReg, int64_t Scale,

1378 Instruction *Fixup = nullptr);

1379

1382 return 1;

1384 return 0;

1390 return std::accumulate(S->operands().begin(), S->operands().end(), 0,

1391 [&](unsigned i, const SCEV *Reg) {

1392 return i + getSetupCost(Reg, Depth - 1);

1393 });

1397 return 0;

1398}

1399

1400

1401void Cost::RateRegister(const Formula &F, const SCEV *Reg,

1402 SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,

1403 bool HardwareLoopProfitable) {

1405

1406

1407

1408 if (AR->getLoop() != L) {

1409

1411 return;

1412

1413

1414

1415 if (!AR->getLoop()->contains(L)) {

1416 Lose();

1417 return;

1418 }

1419

1420

1421 ++C.NumRegs;

1422 return;

1423 }

1424

1425 unsigned LoopCost = 1;

1428 const SCEV *Start;

1429 const APInt *Step;

1431

1432

1434 F.BaseOffset.isFixed() &&

1435 *Step == F.BaseOffset.getFixedValue();

1439

1440 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)

1441 LoopCost = 0;

1442 }

1443 }

1444

1445

1446

1447 if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&

1448 HardwareLoopProfitable)

1449 LoopCost = 0;

1450 C.AddRecCost += LoopCost;

1451

1452

1453

1455 if (!Regs.count(AR->getOperand(1))) {

1456 RateRegister(F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);

1457 if (isLoser())

1458 return;

1459 }

1460 }

1461 }

1462 ++C.NumRegs;

1463

1464

1465

1467

1468 C.SetupCost = std::min(C.SetupCost, 1 << 16);

1469

1472}

1473

1474

1475

1476

1477void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,

1478 SmallPtrSetImpl<const SCEV *> &Regs,

1479 const LSRUse &LU, bool HardwareLoopProfitable,

1480 SmallPtrSetImpl<const SCEV *> *LoserRegs) {

1481 if (LoserRegs && LoserRegs->count(Reg)) {

1482 Lose();

1483 return;

1484 }

1486 RateRegister(F, Reg, Regs, LU, HardwareLoopProfitable);

1487 if (LoserRegs && isLoser())

1489 }

1490}

1491

1492void Cost::RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,

1493 const DenseSet<const SCEV *> &VisitedRegs,

1494 const LSRUse &LU, bool HardwareLoopProfitable,

1495 SmallPtrSetImpl<const SCEV *> *LoserRegs) {

1496 if (isLoser())

1497 return;

1498 assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");

1499

1500 unsigned PrevAddRecCost = C.AddRecCost;

1501 unsigned PrevNumRegs = C.NumRegs;

1502 unsigned PrevNumBaseAdds = C.NumBaseAdds;

1503 if (const SCEV *ScaledReg = F.ScaledReg) {

1504 if (VisitedRegs.count(ScaledReg)) {

1505 Lose();

1506 return;

1507 }

1508 RatePrimaryRegister(F, ScaledReg, Regs, LU, HardwareLoopProfitable,

1509 LoserRegs);

1510 if (isLoser())

1511 return;

1512 }

1513 for (const SCEV *BaseReg : F.BaseRegs) {

1514 if (VisitedRegs.count(BaseReg)) {

1515 Lose();

1516 return;

1517 }

1518 RatePrimaryRegister(F, BaseReg, Regs, LU, HardwareLoopProfitable,

1519 LoserRegs);

1520 if (isLoser())

1521 return;

1522 }

1523

1524

1525 size_t NumBaseParts = F.getNumRegs();

1526 if (NumBaseParts > 1)

1527

1528

1529 C.NumBaseAdds +=

1531 C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());

1532

1533

1535

1536

1537 for (const LSRFixup &Fixup : LU.Fixups) {

1538 if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {

1539 Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);

1540 if (F.BaseGV)

1541 C.ImmCost += 64;

1542

1543 else if (Offset.isNonZero())

1544 C.ImmCost +=

1545 APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();

1546

1547

1548

1549 if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&

1552 C.NumBaseAdds++;

1553 } else {

1554

1555 C.ImmCost += 2048;

1556 }

1557 }

1558

1559

1562 return;

1563 }

1564

1565

1566

1567

1570 if (C.NumRegs > TTIRegNum) {

1571

1572

1573 if (PrevNumRegs > TTIRegNum)

1574 C.Insns += (C.NumRegs - PrevNumRegs);

1575 else

1576 C.Insns += (C.NumRegs - TTIRegNum);

1577 }

1578

1579

1580

1581

1582

1583

1584

1585

1586

1587

1588

1589 if (LU.Kind == LSRUse::ICmpZero && F.hasZeroEnd() &&

1591 C.Insns++;

1592

1593 C.Insns += (C.AddRecCost - PrevAddRecCost);

1594

1595

1596 if (LU.Kind != LSRUse::ICmpZero)

1597 C.Insns += C.NumBaseAdds - PrevNumBaseAdds;

1599}

1600

1601

1602void Cost::Lose() {

1603 C.Insns = std::numeric_limits::max();

1604 C.NumRegs = std::numeric_limits::max();

1605 C.AddRecCost = std::numeric_limits::max();

1606 C.NumIVMuls = std::numeric_limits::max();

1607 C.NumBaseAdds = std::numeric_limits::max();

1608 C.ImmCost = std::numeric_limits::max();

1609 C.SetupCost = std::numeric_limits::max();

1610 C.ScaleCost = std::numeric_limits::max();

1611}

1612

1613

1614bool Cost::isLess(const Cost &Other) const {

1616 C.Insns != Other.C.Insns)

1617 return C.Insns < Other.C.Insns;

1619}

1620

1621#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1622void Cost::print(raw_ostream &OS) const {

1624 OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");

1625 OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");

1626 if (C.AddRecCost != 0)

1627 OS << ", with addrec cost " << C.AddRecCost;

1628 if (C.NumIVMuls != 0)

1629 OS << ", plus " << C.NumIVMuls << " IV mul"

1630 << (C.NumIVMuls == 1 ? "" : "s");

1631 if (C.NumBaseAdds != 0)

1632 OS << ", plus " << C.NumBaseAdds << " base add"

1633 << (C.NumBaseAdds == 1 ? "" : "s");

1634 if (C.ScaleCost != 0)

1635 OS << ", plus " << C.ScaleCost << " scale cost";

1636 if (C.ImmCost != 0)

1637 OS << ", plus " << C.ImmCost << " imm cost";

1638 if (C.SetupCost != 0)

1639 OS << ", plus " << C.SetupCost << " setup cost";

1640}

1641

1644}

1645#endif

1646

1647

1648bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {

1649

1651 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)

1652 if (PN->getIncomingValue(i) == OperandValToReplace &&

1653 L->contains(PN->getIncomingBlock(i)))

1654 return false;

1655 return true;

1656 }

1657

1658 return L->contains(UserInst);

1659}

1660

1661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1662void LSRFixup::print(raw_ostream &OS) const {

1663 OS << "UserInst=";

1664

1666 OS << "store ";

1667 Store->getOperand(0)->printAsOperand(OS, false);

1670 else

1672

1673 OS << ", OperandValToReplace=";

1674 OperandValToReplace->printAsOperand(OS, false);

1675

1676 for (const Loop *PIL : PostIncLoops) {

1677 OS << ", PostIncLoop=";

1678 PIL->getHeader()->printAsOperand(OS, false);

1679 }

1680

1681 if (Offset.isNonZero())

1682 OS << ", Offset=" << Offset;

1683}

1684

1687}

1688#endif

1689

1690

1691

1692bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {

1694 if (F.ScaledReg) Key.push_back(F.ScaledReg);

1695

1697 return Uniquifier.count(Key);

1698}

1699

1700

1701float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {

1702 unsigned FNum = 0;

1703 for (const Formula &F : Formulae)

1704 if (F.referencesReg(Reg))

1705 FNum++;

1706 return ((float)(Formulae.size() - FNum)) / Formulae.size();

1707}

1708

1709

1710

1711bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {

1712 assert(F.isCanonical(L) && "Invalid canonical representation");

1713

1714 if (!Formulae.empty() && RigidFormula)

1715 return false;

1716

1718 if (F.ScaledReg) Key.push_back(F.ScaledReg);

1719

1721

1722 if (!Uniquifier.insert(Key).second)

1723 return false;

1724

1725

1726 assert((F.ScaledReg || F.ScaledReg->isZero()) &&

1727 "Zero allocated in a scaled register!");

1728#ifndef NDEBUG

1729 for (const SCEV *BaseReg : F.BaseRegs)

1730 assert(BaseReg->isZero() && "Zero allocated in a base register!");

1731#endif

1732

1733

1734 Formulae.push_back(F);

1735

1736

1738 if (F.ScaledReg)

1739 Regs.insert(F.ScaledReg);

1740

1741 return true;

1742}

1743

1744

1745void LSRUse::DeleteFormula(Formula &F) {

1746 if (&F != &Formulae.back())

1748 Formulae.pop_back();

1749}

1750

1751

1752void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {

1753

1754 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);

1756 for (const Formula &F : Formulae) {

1757 if (F.ScaledReg) Regs.insert(F.ScaledReg);

1759 }

1760

1761

1762 for (const SCEV *S : OldRegs)

1763 if (!Regs.count(S))

1764 RegUses.dropRegister(S, LUIdx);

1765}

1766

1767#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1768void LSRUse::print(raw_ostream &OS) const {

1769 OS << "LSR Use: Kind=";

1770 switch (Kind) {

1771 case Basic: OS << "Basic"; break;

1772 case Special: OS << "Special"; break;

1773 case ICmpZero: OS << "ICmpZero"; break;

1775 OS << "Address of ";

1777 OS << "pointer";

1778 else {

1779 OS << *AccessTy.MemTy;

1780 }

1781

1782 OS << " in addrspace(" << AccessTy.AddrSpace << ')';

1783 }

1784

1785 OS << ", Offsets={";

1786 bool NeedComma = false;

1787 for (const LSRFixup &Fixup : Fixups) {

1788 if (NeedComma) OS << ',';

1789 OS << Fixup.Offset;

1790 NeedComma = true;

1791 }

1792 OS << '}';

1793

1794 if (AllFixupsOutsideLoop)

1795 OS << ", all-fixups-outside-loop";

1796

1797 if (AllFixupsUnconditional)

1798 OS << ", all-fixups-unconditional";

1799

1800 if (WidestFixupType)

1801 OS << ", widest fixup type: " << *WidestFixupType;

1802}

1803

1806}

1807#endif

1808

1810 LSRUse::KindType Kind, MemAccessTy AccessTy,

1811 GlobalValue *BaseGV, Immediate BaseOffset,

1812 bool HasBaseReg, int64_t Scale,

1814 switch (Kind) {

1815 case LSRUse::Address: {

1816 int64_t FixedOffset =

1817 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();

1818 int64_t ScalableOffset =

1819 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;

1820 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,

1821 HasBaseReg, Scale, AccessTy.AddrSpace,

1822 Fixup, ScalableOffset);

1823 }

1824 case LSRUse::ICmpZero:

1825

1826

1827 if (BaseGV)

1828 return false;

1829

1830

1831 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())

1832 return false;

1833

1834

1835

1836 if (Scale != 0 && Scale != -1)

1837 return false;

1838

1839

1840

1841 if (BaseOffset.isNonZero()) {

1842

1843

1844 if (BaseOffset.isScalable())

1845 return false;

1846

1847

1848

1849

1850

1851 if (Scale == 0)

1852

1853

1854 BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());

1855 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());

1856 }

1857

1858

1859 return true;

1860

1861 case LSRUse::Basic:

1862

1863 return !BaseGV && Scale == 0 && BaseOffset.isZero();

1864

1865 case LSRUse::Special:

1866

1867 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();

1868 }

1869

1871}

1872

1874 Immediate MinOffset, Immediate MaxOffset,

1875 LSRUse::KindType Kind, MemAccessTy AccessTy,

1876 GlobalValue *BaseGV, Immediate BaseOffset,

1877 bool HasBaseReg, int64_t Scale) {

1878 if (BaseOffset.isNonZero() &&

1879 (BaseOffset.isScalable() != MinOffset.isScalable() ||

1880 BaseOffset.isScalable() != MaxOffset.isScalable()))

1881 return false;

1882

1883 int64_t Base = BaseOffset.getKnownMinValue();

1884 int64_t Min = MinOffset.getKnownMinValue();

1885 int64_t Max = MaxOffset.getKnownMinValue();

1887 return false;

1888 MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());

1890 return false;

1891 MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());

1892

1894 HasBaseReg, Scale) &&

1896 HasBaseReg, Scale);

1897}

1898

1900 Immediate MinOffset, Immediate MaxOffset,

1901 LSRUse::KindType Kind, MemAccessTy AccessTy,

1902 const Formula &F, const Loop &L) {

1903

1904

1905

1906

1907

1908

1909

1910 assert((F.isCanonical(L) || F.Scale != 0));

1912 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);

1913}

1914

1915

1917 Immediate MaxOffset, LSRUse::KindType Kind,

1918 MemAccessTy AccessTy, GlobalValue *BaseGV,

1919 Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {

1920

1922 BaseOffset, HasBaseReg, Scale) ||

1923

1924

1925 (Scale == 1 &&

1927 BaseGV, BaseOffset, true, 0));

1928}

1929

1931 Immediate MaxOffset, LSRUse::KindType Kind,

1932 MemAccessTy AccessTy, const Formula &F) {

1933 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,

1934 F.BaseOffset, F.HasBaseReg, F.Scale);

1935}

1936

1939 if (Offset.isScalable())

1940 return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());

1941

1942 return TTI.isLegalAddImmediate(Offset.getFixedValue());

1943}

1944

1946 const LSRUse &LU, const Formula &F) {

1947

1948 if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {

1949 for (const LSRFixup &Fixup : LU.Fixups)

1951 (F.BaseOffset + Fixup.Offset), F.HasBaseReg,

1952 F.Scale, Fixup.UserInst))

1953 return false;

1954 return true;

1955 }

1956

1958 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,

1959 F.Scale);

1960}

1961

1963 const LSRUse &LU, const Formula &F,

1964 const Loop &L) {

1965 if (F.Scale)

1966 return 0;

1967

1968

1969

1971 LU.AccessTy, F, L))

1972 return F.Scale != 1;

1973

1974 switch (LU.Kind) {

1975 case LSRUse::Address: {

1976

1977 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;

1978 if (F.BaseOffset.isScalable()) {

1979 ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();

1980 ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();

1981 } else {

1982 FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();

1983 FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();

1984 }

1986 LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),

1987 F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);

1989 LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),

1990 F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);

1991

1993 "Legal addressing mode has an illegal cost!");

1994 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);

1995 }

1996 case LSRUse::ICmpZero:

1997 case LSRUse::Basic:

1998 case LSRUse::Special:

1999

2000

2001 return 0;

2002 }

2003

2005}

2006

2008 LSRUse::KindType Kind, MemAccessTy AccessTy,

2009 GlobalValue *BaseGV, Immediate BaseOffset,

2010 bool HasBaseReg) {

2011

2012 if (BaseOffset.isZero() && !BaseGV)

2013 return true;

2014

2015

2016

2017 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;

2018

2019

2020

2021 if (!HasBaseReg && Scale == 1) {

2022 Scale = 0;

2023 HasBaseReg = true;

2024 }

2025

2026

2027

2028

2029

2030

2031 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&

2033 Scale = 0;

2034

2036 HasBaseReg, Scale);

2037}

2038

2041 Immediate MaxOffset, LSRUse::KindType Kind,

2042 MemAccessTy AccessTy, const SCEV *S,

2043 bool HasBaseReg) {

2044

2045 if (S->isZero()) return true;

2046

2047

2048

2051

2052

2053 if (!S->isZero()) return false;

2054

2055

2056 if (BaseOffset.isZero() && !BaseGV)

2057 return true;

2058

2059 if (BaseOffset.isScalable())

2060 return false;

2061

2062

2063

2064 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;

2065

2067 BaseOffset, HasBaseReg, Scale);

2068}

2069

2070namespace {

2071

2072

2073

2074

2075

2076

2077

2078

2079

2080

2081struct IVInc {

2083 Value* IVOperand;

2084 const SCEV *IncExpr;

2085

2086 IVInc(Instruction *U, Value *O, const SCEV *E)

2087 : UserInst(U), IVOperand(O), IncExpr(E) {}

2088};

2089

2090

2091

2092struct IVChain {

2094 const SCEV *ExprBase = nullptr;

2095

2096 IVChain() = default;

2097 IVChain(const IVInc &Head, const SCEV *Base)

2098 : Incs(1, Head), ExprBase(Base) {}

2099

2101

2102

2103 const_iterator begin() const {

2105 return std::next(Incs.begin());

2106 }

2107 const_iterator end() const {

2108 return Incs.end();

2109 }

2110

2111

2112 bool hasIncs() const { return Incs.size() >= 2; }

2113

2114

2116

2117

2118 Instruction *tailUserInst() const { return Incs.back().UserInst; }

2119

2120

2121 bool isProfitableIncrement(const SCEV *OperExpr,

2122 const SCEV *IncExpr,

2123 ScalarEvolution&);

2124};

2125

2126

2127

2128

2129struct ChainUsers {

2130 SmallPtrSet<Instruction*, 4> FarUsers;

2131 SmallPtrSet<Instruction*, 4> NearUsers;

2132};

2133

2134

2135class LSRInstance {

2136 IVUsers &IU;

2137 ScalarEvolution &SE;

2138 DominatorTree &DT;

2139 LoopInfo &LI;

2140 AssumptionCache &AC;

2141 TargetLibraryInfo &TLI;

2142 const TargetTransformInfo &TTI;

2143 Loop *const L;

2144 MemorySSAUpdater *MSSAU;

2146 mutable SCEVExpander Rewriter;

2148 bool HardwareLoopProfitable = false;

2149

2150

2151

2152

2153

2155

2156

2157

2158

2159

2160

2161

2162 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;

2163

2164

2165

2166 Cost BaselineCost;

2167

2168

2169 SmallSetVector<Type *, 4> Types;

2170

2171

2173

2174

2175 RegUseTracker RegUses;

2176

2177

2178

2179

2180 static const unsigned MaxChains = 8;

2181

2182

2184

2185

2186 SmallPtrSet<Use*, MaxChains> IVIncSet;

2187

2188

2189 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;

2190

2191

2192

2193

2194

2195 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;

2196

2197 void OptimizeShadowIV();

2198 bool FindIVUserForCond(Instruction *Cond, IVStrideUse *&CondUse);

2199 Instruction *OptimizeMax(ICmpInst *Cond, IVStrideUse *&CondUse);

2200 void OptimizeLoopTermCond();

2201

2202 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,

2203 SmallVectorImpl &ChainUsersVec);

2204 void FinalizeChain(IVChain &Chain);

2205 void CollectChains();

2206 void GenerateIVChain(const IVChain &Chain,

2207 SmallVectorImpl &DeadInsts);

2208

2209 void CollectInterestingTypesAndFactors();

2210 void CollectFixupsAndInitialFormulae();

2211

2212

2213 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;

2214 UseMapTy UseMap;

2215

2216 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,

2217 LSRUse::KindType Kind, MemAccessTy AccessTy);

2218

2219 std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,

2220 MemAccessTy AccessTy);

2221

2222 void DeleteUse(LSRUse &LU, size_t LUIdx);

2223

2224 LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);

2225

2226 void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);

2227 void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);

2228 void CountRegisters(const Formula &F, size_t LUIdx);

2229 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);

2230 bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;

2231

2232 void CollectLoopInvariantFixupsAndFormulae();

2233

2234 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,

2235 unsigned Depth = 0);

2236

2237 void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,

2238 const Formula &Base, unsigned Depth,

2239 size_t Idx, bool IsScaledReg = false);

2240 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);

2241 void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,

2242 const Formula &Base, size_t Idx,

2243 bool IsScaledReg = false);

2244 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);

2245 void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,

2246 const Formula &Base,

2247 const SmallVectorImpl &Worklist,

2248 size_t Idx, bool IsScaledReg = false);

2249 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);

2250 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);

2251 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);

2252 void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);

2253 void GenerateCrossUseConstantOffsets();

2254 void GenerateAllReuseFormulae();

2255

2256 void FilterOutUndesirableDedicatedRegisters();

2257

2258 size_t EstimateSearchSpaceComplexity() const;

2259 void NarrowSearchSpaceByDetectingSupersets();

2260 void NarrowSearchSpaceByCollapsingUnrolledCode();

2261 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

2262 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

2263 void NarrowSearchSpaceByFilterPostInc();

2264 void NarrowSearchSpaceByDeletingCostlyFormulas();

2265 void NarrowSearchSpaceByPickingWinnerRegs();

2266 void NarrowSearchSpaceUsingHeuristics();

2267

2268 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

2269 Cost &SolutionCost,

2270 SmallVectorImpl<const Formula *> &Workspace,

2271 const Cost &CurCost,

2272 const SmallPtrSet<const SCEV *, 16> &CurRegs,

2273 DenseSet<const SCEV *> &VisitedRegs) const;

2274 void Solve(SmallVectorImpl<const Formula *> &Solution) const;

2275

2278 const SmallVectorImpl<Instruction *> &Inputs) const;

2280 const LSRFixup &LF,

2281 const LSRUse &LU) const;

2282

2283 Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,

2285 SmallVectorImpl &DeadInsts) const;

2286 void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,

2287 const Formula &F,

2288 SmallVectorImpl &DeadInsts);

2289 void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,

2290 SmallVectorImpl &DeadInsts);

2291 void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);

2292

2293public:

2294 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,

2295 LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,

2296 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);

2297

2298 bool getChanged() const { return Changed; }

2299 const SmallVectorImpl &getScalarEvolutionIVs() const {

2300 return ScalarEvolutionIVs;

2301 }

2302

2303 void print_factors_and_types(raw_ostream &OS) const;

2304 void print_fixups(raw_ostream &OS) const;

2305 void print_uses(raw_ostream &OS) const;

2306 void print(raw_ostream &OS) const;

2307 void dump() const;

2308};

2309

2310}

2311

2312

2313

2314void LSRInstance::OptimizeShadowIV() {

2317 return;

2318

2320 UI != E; ) {

2322 ++UI;

2323 Instruction *ShadowUse = CandidateUI->getUser();

2324 Type *DestTy = nullptr;

2325 bool IsSigned = false;

2326

2327

2328

2329

2330

2331

2332

2333

2334

2335

2336

2337

2338

2340 IsSigned = false;

2341 DestTy = UCast->getDestTy();

2342 }

2343 else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser())) {

2344 IsSigned = true;

2345 DestTy = SCast->getDestTy();

2346 }

2347 if (!DestTy) continue;

2348

2349

2350

2352

2354 if (!PH) continue;

2356

2357

2358

2359

2361 if (!AR) continue;

2364

2367 if (Mantissa == -1) continue;

2369 continue;

2370

2371 unsigned Entry, Latch;

2374 Latch = 1;

2375 } else {

2377 Latch = 0;

2378 }

2379

2381 if (!Init) continue;

2382 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?

2385

2386 BinaryOperator *Incr =

2388 if (!Incr) continue;

2389 if (Incr->getOpcode() != Instruction::Add

2390 && Incr->getOpcode() != Instruction::Sub)

2391 continue;

2392

2393

2394 ConstantInt *C = nullptr;

2399 else

2400 continue;

2401

2402 if (C) continue;

2403

2404

2405

2406 if (C->getValue().isStrictlyPositive())

2407 continue;

2408

2409

2412

2413

2414 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());

2416 Incr->getOpcode() == Instruction::Add ? Instruction::FAdd

2417 : Instruction::FSub,

2418 NewPH, CFP, "IV.S.next.", Incr->getIterator());

2420

2423

2424

2428 break;

2429 }

2430}

2431

2432

2433

2434bool LSRInstance::FindIVUserForCond(Instruction *Cond, IVStrideUse *&CondUse) {

2435 for (IVStrideUse &U : IU)

2436 if (U.getUser() == Cond) {

2437

2438

2439

2440 CondUse = &U;

2441 return true;

2442 }

2443 return false;

2444}

2445

2446

2447

2448

2449

2450

2451

2452

2453

2454

2455

2456

2457

2458

2459

2460

2461

2462

2463

2464

2465

2466

2467

2468

2469

2470

2471

2472

2473

2474

2475

2476

2477

2478

2479

2480

2481

2482

2483

2484

2485

2486

2487

2488

2489

2490

2491

2492

2493

2494Instruction *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse *&CondUse) {

2495

2498 return Cond;

2499

2502

2505 return Cond;

2507

2508

2509 const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);

2510 if (IterationCount != SE.getSCEV(Sel)) return Cond;

2511

2512

2513

2514

2516 const SCEVNAryExpr *Max = nullptr;

2518 Pred = ICmpInst::ICMP_SLE;

2519 Max = S;

2521 Pred = ICmpInst::ICMP_SLT;

2522 Max = S;

2524 Pred = ICmpInst::ICMP_ULT;

2526 } else {

2527

2528 return Cond;

2529 }

2530

2531

2532

2533 if (Max->getNumOperands() != 2)

2534 return Cond;

2535

2536 const SCEV *MaxLHS = Max->getOperand(0);

2537 const SCEV *MaxRHS = Max->getOperand(1);

2538

2539

2540

2541 if (!MaxLHS ||

2542 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))

2543 return Cond;

2544

2545

2546

2547 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));

2550 return Cond;

2551

2553 "Loop condition operand is an addrec in a different loop!");

2554

2555

2556

2557 Value *NewRHS = nullptr;

2558 if (ICmpInst::isTrueWhenEqual(Pred)) {

2559

2562 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

2563 NewRHS = BO->getOperand(0);

2566 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)

2567 NewRHS = BO->getOperand(0);

2568 if (!NewRHS)

2569 return Cond;

2575 NewRHS = SU->getValue();

2576 else

2577

2578 return Cond;

2579

2580

2581

2584

2585

2586

2587 ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,

2588 Cond->getOperand(0), NewRHS, "scmp");

2589

2590

2592 Cond->replaceAllUsesWith(NewCond);

2593 CondUse->setUser(NewCond);

2595 Cond->eraseFromParent();

2597 if (Cmp->use_empty()) {

2599 Cmp->eraseFromParent();

2600 }

2601 return NewCond;

2602}

2603

2604

2605void

2606LSRInstance::OptimizeLoopTermCond() {

2607 SmallPtrSet<Instruction *, 4> PostIncs;

2608

2609

2610

2611

2612

2613

2614

2615

2616

2617

2618

2619

2620

2621 BasicBlock *LatchBlock = L->getLoopLatch();

2622 SmallVector<BasicBlock*, 8> ExitingBlocks;

2623 L->getExitingBlocks(ExitingBlocks);

2625

2627 return;

2628 }

2629

2630

2631 for (BasicBlock *ExitingBlock : ExitingBlocks) {

2632

2633

2634

2635

2636

2639 continue;

2640

2642

2643

2645 if (Extract)

2647

2648

2650 continue;

2651

2652

2653 IVStrideUse *CondUse = nullptr;

2654 if (!FindIVUserForCond(Cond, CondUse))

2655 continue;

2656

2657

2658

2659

2660

2661

2662

2664 Cond = OptimizeMax(Cmp, CondUse);

2665

2666

2667

2668

2669 if (!DT.dominates(ExitingBlock, LatchBlock))

2670 continue;

2671

2672

2673

2674 if (LatchBlock != ExitingBlock)

2675 for (const IVStrideUse &UI : IU)

2676

2677

2678 if (&UI != CondUse &&

2679 !DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {

2680

2681

2682 const SCEV *A = IU.getStride(*CondUse, L);

2683 const SCEV *B = IU.getStride(UI, L);

2684 if (A || B) continue;

2690 else

2692 }

2693 if (const SCEVConstant *D =

2695 const ConstantInt *C = D->getValue();

2696

2697 if (C->isOne() || C->isMinusOne())

2698 goto decline_post_inc;

2699

2700 if (C->getValue().getSignificantBits() >= 64 ||

2701 C->getValue().isMinSignedValue())

2702 goto decline_post_inc;

2703

2704 if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {

2705 MemAccessTy AccessTy =

2706 getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());

2707 int64_t Scale = C->getSExtValue();

2709 0,

2710 true, Scale,

2711 AccessTy.AddrSpace))

2712 goto decline_post_inc;

2713 Scale = -Scale;

2715 0,

2716 true, Scale,

2717 AccessTy.AddrSpace))

2718 goto decline_post_inc;

2719 }

2720 }

2721 }

2722

2723 LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "

2724 << *Cond << '\n');

2725

2726

2727

2728

2730 !Extract) {

2731 if (Cond->hasOneUse()) {

2733 } else {

2734

2737 Cond->setName(L->getHeader()->getName() + ".termcond");

2739

2740

2743 }

2744 }

2745

2746

2747

2748

2751

2753 decline_post_inc:;

2754 }

2755

2756

2757

2758

2759 IVIncInsertPos = L->getLoopLatch()->getTerminator();

2760 for (Instruction *Inst : PostIncs)

2762}

2763

2764

2765

2766bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,

2767 bool HasBaseReg, LSRUse::KindType Kind,

2768 MemAccessTy AccessTy) {

2769 Immediate NewMinOffset = LU.MinOffset;

2770 Immediate NewMaxOffset = LU.MaxOffset;

2771 MemAccessTy NewAccessTy = AccessTy;

2772

2773

2774

2775

2776 if (LU.Kind != Kind)

2777 return false;

2778

2779

2780

2781

2782 if (Kind == LSRUse::Address) {

2783 if (AccessTy.MemTy != LU.AccessTy.MemTy) {

2784 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),

2785 AccessTy.AddrSpace);

2786 }

2787 }

2788

2789

2790 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {

2792 LU.MaxOffset - NewOffset, HasBaseReg))

2793 return false;

2794 NewMinOffset = NewOffset;

2795 } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {

2797 NewOffset - LU.MinOffset, HasBaseReg))

2798 return false;

2799 NewMaxOffset = NewOffset;

2800 }

2801

2802

2803

2804

2805 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&

2806 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))

2807 return false;

2808

2809

2810 LU.MinOffset = NewMinOffset;

2811 LU.MaxOffset = NewMaxOffset;

2812 LU.AccessTy = NewAccessTy;

2813 return true;

2814}

2815

2816

2817

2818

2819std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,

2820 LSRUse::KindType Kind,

2821 MemAccessTy AccessTy) {

2822 const SCEV *Copy = Expr;

2824

2825

2827 Offset, true)) {

2828 Expr = Copy;

2829 Offset = Immediate::getFixed(0);

2830 }

2831

2832 std::pair<UseMapTy::iterator, bool> P =

2833 UseMap.try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));

2834 if (P.second) {

2835

2836 size_t LUIdx = P.first->second;

2837 LSRUse &LU = Uses[LUIdx];

2838 if (reconcileNewOffset(LU, Offset, true, Kind, AccessTy))

2839

2840 return std::make_pair(LUIdx, Offset);

2841 }

2842

2843

2844 size_t LUIdx = Uses.size();

2845 P.first->second = LUIdx;

2846 Uses.push_back(LSRUse(Kind, AccessTy));

2847 LSRUse &LU = Uses[LUIdx];

2848

2849 LU.MinOffset = Offset;

2850 LU.MaxOffset = Offset;

2851 return std::make_pair(LUIdx, Offset);

2852}

2853

2854

2855void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {

2856 if (&LU != &Uses.back())

2858 Uses.pop_back();

2859

2860

2861 RegUses.swapAndDropUse(LUIdx, Uses.size());

2862}

2863

2864

2865

2866LSRUse *

2867LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,

2868 const LSRUse &OrigLU) {

2869

2870 for (LSRUse &LU : Uses) {

2871

2872

2873

2874

2875

2876 if (&LU != &OrigLU &&

2877 LU.Kind != LSRUse::ICmpZero &&

2878 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&

2879 LU.WidestFixupType == OrigLU.WidestFixupType &&

2880 LU.HasFormulaWithSameRegs(OrigF)) {

2881

2882 for (const Formula &F : LU.Formulae) {

2883

2884

2885 if (F.BaseRegs == OrigF.BaseRegs &&

2886 F.ScaledReg == OrigF.ScaledReg &&

2887 F.BaseGV == OrigF.BaseGV &&

2888 F.Scale == OrigF.Scale &&

2889 F.UnfoldedOffset == OrigF.UnfoldedOffset) {

2890 if (F.BaseOffset.isZero())

2891 return &LU;

2892

2893

2894

2895 break;

2896 }

2897 }

2898 }

2899 }

2900

2901

2902 return nullptr;

2903}

2904

2905void LSRInstance::CollectInterestingTypesAndFactors() {

2906 SmallSetVector<const SCEV *, 4> Strides;

2907

2908

2910 for (const IVStrideUse &U : IU) {

2911 const SCEV *Expr = IU.getExpr(U);

2912 if (!Expr)

2913 continue;

2914

2915

2917

2918

2920 do {

2928 }

2929 } while (!Worklist.empty());

2930 }

2931

2932

2933 for (SmallSetVector<const SCEV *, 4>::const_iterator

2934 I = Strides.begin(), E = Strides.end(); I != E; ++I)

2935 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =

2936 std::next(I); NewStrideIter != E; ++NewStrideIter) {

2937 const SCEV *OldStride = *I;

2938 const SCEV *NewStride = *NewStrideIter;

2939

2945 else

2947 }

2948 if (const SCEVConstant *Factor =

2950 SE, true))) {

2951 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

2952 Factors.insert(Factor->getAPInt().getSExtValue());

2953 } else if (const SCEVConstant *Factor =

2955 NewStride,

2956 SE, true))) {

2957 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())

2958 Factors.insert(Factor->getAPInt().getSExtValue());

2959 }

2960 }

2961

2962

2963

2964 if (Types.size() == 1)

2965 Types.clear();

2966

2968}

2969

2970

2971

2972

2976 for(; OI != OE; ++OI) {

2978 if (!SE.isSCEVable(Oper->getType()))

2979 continue;

2980

2984 break;

2985 }

2986 }

2987 }

2988 return OI;

2989}

2990

2991

2992

2995 return Trunc->getOperand(0);

2996 return Oper;

2997}

2998

2999

3000

3001

3002

3003

3004

3005

3006

3007

3008

3011 default:

3012 return S;

3015 return nullptr;

3023

3024

3025

3027 for (const SCEV *SubExpr : reverse(Add->operands())) {

3028 if (SubExpr->getSCEVType() == scAddExpr)

3030

3031 if (SubExpr->getSCEVType() != scMulExpr)

3032 return SubExpr;

3033 }

3034 return S;

3035 }

3038 }

3040}

3041

3042

3043

3044

3045

3046

3047bool IVChain::isProfitableIncrement(const SCEV *OperExpr,

3048 const SCEV *IncExpr,

3049 ScalarEvolution &SE) {

3050

3052 return true;

3053

3054

3055

3059 return false;

3060 }

3061

3062 SmallPtrSet<const SCEV*, 8> Processed;

3064}

3065

3066

3067

3068

3069

3070

3071

3072

3073

3074

3075

3081 return true;

3082

3083 if (!Chain.hasIncs())

3084 return false;

3085

3086 if (Users.empty()) {

3087 LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";

3089 : Users) { dbgs() << " " << *Inst << "\n"; });

3090 return false;

3091 }

3092 assert(!Chain.Incs.empty() && "empty IV chains are not allowed");

3093

3094

3095 int cost = 1;

3096

3097

3098

3099

3101 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {

3102 --cost;

3103 }

3104 const SCEV *LastIncExpr = nullptr;

3105 unsigned NumConstIncrements = 0;

3106 unsigned NumVarIncrements = 0;

3107 unsigned NumReusedIncrements = 0;

3108

3109 if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))

3110 return true;

3111

3112 for (const IVInc &Inc : Chain) {

3113 if (TTI.isProfitableLSRChainElement(Inc.UserInst))

3114 return true;

3115 if (Inc.IncExpr->isZero())

3116 continue;

3117

3118

3119

3121 ++NumConstIncrements;

3122 continue;

3123 }

3124

3125 if (Inc.IncExpr == LastIncExpr)

3126 ++NumReusedIncrements;

3127 else

3128 ++NumVarIncrements;

3129

3130 LastIncExpr = Inc.IncExpr;

3131 }

3132

3133

3134

3135 if (NumConstIncrements > 1)

3136 --cost;

3137

3138

3139

3140

3141

3142 cost += NumVarIncrements;

3143

3144

3145

3146 cost -= NumReusedIncrements;

3147

3148 LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost

3149 << "\n");

3150

3151 return cost < 0;

3152}

3153

3154

3155void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,

3156 SmallVectorImpl &ChainUsersVec) {

3157

3158

3160 const SCEV *const OperExpr = SE.getSCEV(NextIV);

3161 const SCEV *const OperExprBase = getExprBase(OperExpr);

3162

3163

3164

3165 unsigned ChainIdx = 0, NChains = IVChainVec.size();

3166 const SCEV *LastIncExpr = nullptr;

3167 for (; ChainIdx < NChains; ++ChainIdx) {

3168 IVChain &Chain = IVChainVec[ChainIdx];

3169

3170

3171

3172

3173

3174 if (StressIVChain && Chain.ExprBase != OperExprBase)

3175 continue;

3176

3179 continue;

3180

3181

3183 continue;

3184

3185

3186 const SCEV *PrevExpr = SE.getSCEV(PrevIV);

3187 const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);

3189 continue;

3190

3191 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {

3192 LastIncExpr = IncExpr;

3193 break;

3194 }

3195 }

3196

3197

3198 if (ChainIdx == NChains) {

3200 return;

3203 return;

3204 }

3205 LastIncExpr = OperExpr;

3206

3207

3208

3210 return;

3211 ++NChains;

3212 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),

3213 OperExprBase));

3214 ChainUsersVec.resize(NChains);

3215 LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst

3216 << ") IV=" << *LastIncExpr << "\n");

3217 } else {

3218 LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst

3219 << ") IV+" << *LastIncExpr << "\n");

3220

3221 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));

3222 }

3223 IVChain &Chain = IVChainVec[ChainIdx];

3224

3225 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;

3226

3227 if (!LastIncExpr->isZero()) {

3228 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);

3229 NearUsers.clear();

3230 }

3231

3232

3233

3234

3235

3236

3237 for (User *U : IVOper->users()) {

3239 if (!OtherUse)

3240 continue;

3241

3242

3243 IVChain::const_iterator IncIter = Chain.Incs.begin();

3244 IVChain::const_iterator IncEnd = Chain.Incs.end();

3245 for( ; IncIter != IncEnd; ++IncIter) {

3246 if (IncIter->UserInst == OtherUse)

3247 break;

3248 }

3249 if (IncIter != IncEnd)

3250 continue;

3251

3254 && IU.isIVUserOrOperand(OtherUse)) {

3255 continue;

3256 }

3257 NearUsers.insert(OtherUse);

3258 }

3259

3260

3261

3262 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);

3263}

3264

3265

3266

3267

3268

3269

3270

3271

3272

3273

3274

3275

3276

3277

3278

3279

3280

3281

3282

3283

3284

3285

3286

3287void LSRInstance::CollectChains() {

3290

3291 SmallVector<BasicBlock *,8> LatchPath;

3292 BasicBlock *LoopHeader = L->getHeader();

3294 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {

3295 LatchPath.push_back(Rung->getBlock());

3296 }

3297 LatchPath.push_back(LoopHeader);

3298

3299

3300 for (BasicBlock *BB : reverse(LatchPath)) {

3301 for (Instruction &I : *BB) {

3302

3304 continue;

3305

3306

3307

3308

3310 continue;

3311

3312

3313 for (unsigned ChainIdx = 0, NChains = IVChainVec.size();

3314 ChainIdx < NChains; ++ChainIdx) {

3315 ChainUsersVec[ChainIdx].NearUsers.erase(&I);

3316 }

3317

3318 SmallPtrSet<Instruction*, 4> UniqueOperands;

3321 while (IVOpIter != IVOpEnd) {

3323 if (UniqueOperands.insert(IVOpInst).second)

3324 ChainInstruction(&I, IVOpInst, ChainUsersVec);

3325 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

3326 }

3327 }

3328 }

3329

3330 for (PHINode &PN : L->getHeader()->phis()) {

3332 continue;

3333

3336 if (IncV)

3337 ChainInstruction(&PN, IncV, ChainUsersVec);

3338 }

3339

3340 unsigned ChainIdx = 0;

3341 for (unsigned UsersIdx = 0, NChains = IVChainVec.size();

3342 UsersIdx < NChains; ++UsersIdx) {

3344 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))

3345 continue;

3346

3347 if (ChainIdx != UsersIdx)

3348 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];

3349 FinalizeChain(IVChainVec[ChainIdx]);

3350 ++ChainIdx;

3351 }

3352 IVChainVec.resize(ChainIdx);

3353}

3354

3355void LSRInstance::FinalizeChain(IVChain &Chain) {

3356 assert(!Chain.Incs.empty() && "empty IV chains are not allowed");

3357 LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");

3358

3359 for (const IVInc &Inc : Chain) {

3360 LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");

3361 auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);

3362 assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");

3363 IVIncSet.insert(UseI);

3364 }

3365}

3366

3367

3371 Immediate IncOffset = Immediate::getZero();

3372 if (IncConst) {

3374 return false;

3376 } else {

3377

3380 C->getSignificantBits() > 64)

3381 return false;

3382 IncOffset = Immediate::getScalable(C->getSExtValue());

3383 }

3384

3386 return false;

3387

3388 MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);

3389 if (isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, nullptr,

3390 IncOffset, false))

3391 return false;

3392

3393 return true;

3394}

3395

3396

3397

3398void LSRInstance::GenerateIVChain(const IVChain &Chain,

3399 SmallVectorImpl &DeadInsts) {

3400

3401

3402 const IVInc &Head = Chain.Incs[0];

3404

3406 IVOpEnd, L, SE);

3407 Value *IVSrc = nullptr;

3408 while (IVOpIter != IVOpEnd) {

3410

3411

3412

3413

3414

3415

3416

3417

3418

3419 if (SE.getSCEV(*IVOpIter) == Head.IncExpr

3420 || SE.getSCEV(IVSrc) == Head.IncExpr) {

3421 break;

3422 }

3423 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);

3424 }

3425 if (IVOpIter == IVOpEnd) {

3426

3427 LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");

3428 return;

3429 }

3430 assert(IVSrc && "Failed to find IV chain source");

3431

3432 LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");

3435 const SCEV *LeftOverExpr = nullptr;

3436 const SCEV *Accum = SE.getZero(IntTy);

3439

3440 for (const IVInc &Inc : Chain) {

3443 InsertPt = L->getLoopLatch()->getTerminator();

3444

3445

3446

3447 Value *IVOper = IVSrc;

3448 if (!Inc.IncExpr->isZero()) {

3449

3450

3452 Accum = SE.getAddExpr(Accum, IncExpr);

3453 LeftOverExpr = LeftOverExpr ?

3454 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;

3455 }

3456

3457

3458 bool FoundBase = false;

3459 for (auto [MapScev, MapIVOper] : reverse(Bases)) {

3460 const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);

3462 if (!Remainder->isZero()) {

3464 Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);

3465 const SCEV *IVOperExpr =

3467 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);

3468 } else {

3469 IVOper = MapIVOper;

3470 }

3471

3472 FoundBase = true;

3473 break;

3474 }

3475 }

3476 if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {

3477

3479 Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);

3482 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);

3483

3484

3485 if (canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {

3486 assert(IVTy == IVOper->getType() && "inconsistent IV increment type");

3488 IVSrc = IVOper;

3489 LeftOverExpr = nullptr;

3490 }

3491 }

3492 Type *OperTy = Inc.IVOperand->getType();

3493 if (IVTy != OperTy) {

3495 "cannot extend a chained IV");

3497 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");

3498 }

3499 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);

3502 }

3503

3504

3506 for (PHINode &Phi : L->getHeader()->phis()) {

3507 if (Phi.getType() != IVSrc->getType())

3508 continue;

3510 Phi.getIncomingValueForBlock(L->getLoopLatch()));

3511 if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))

3512 continue;

3513 Value *IVOper = IVSrc;

3515 if (IVTy != PostIncTy) {

3517 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());

3518 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());

3519 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");

3520 }

3521 Phi.replaceUsesOfWith(PostIncV, IVOper);

3523 }

3524 }

3525}

3526

3527void LSRInstance::CollectFixupsAndInitialFormulae() {

3528 BranchInst *ExitBranch = nullptr;

3529 bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);

3530

3531

3532 SmallPtrSet<const SCEV *, 16> Regs;

3533 DenseSet<const SCEV *> VisitedRegs;

3534 DenseSet<size_t> VisitedLSRUse;

3535

3536 for (const IVStrideUse &U : IU) {

3538

3540 find(UserInst->operands(), U.getOperandValToReplace());

3541 assert(UseI != UserInst->op_end() && "cannot find IV operand");

3542 if (IVIncSet.count(UseI)) {

3543 LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');

3544 continue;

3545 }

3546

3547 LSRUse::KindType Kind = LSRUse::Basic;

3548 MemAccessTy AccessTy;

3549 if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {

3550 Kind = LSRUse::Address;

3551 AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());

3552 }

3553

3554 const SCEV *S = IU.getExpr(U);

3555 if (!S)

3556 continue;

3558

3559

3560

3561

3562

3563

3564

3566

3567

3569 continue;

3570 if (CI->isEquality()) {

3571

3572

3573 Value *NV = CI->getOperand(1);

3574 if (NV == U.getOperandValToReplace()) {

3575 CI->setOperand(1, CI->getOperand(0));

3576 CI->setOperand(0, NV);

3577 NV = CI->getOperand(1);

3579 }

3580

3581

3582 const SCEV *N = SE.getSCEV(NV);

3584 (NV->getType()->isPointerTy() ||

3586

3587

3589 if (N)

3590 continue;

3591 Kind = LSRUse::ICmpZero;

3593 } else if (L->isLoopInvariant(NV) &&

3596 NV->getType()->isPointerTy()) {

3597

3598

3599

3600

3601

3602

3605 if (N)

3606 continue;

3607 Kind = LSRUse::ICmpZero;

3610 }

3611

3612

3613

3614 for (size_t i = 0, e = Factors.size(); i != e; ++i)

3615 if (Factors[i] != -1)

3616 Factors.insert(-(uint64_t)Factors[i]);

3617 Factors.insert(-1);

3618 }

3619 }

3620

3621

3622 std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);

3623 size_t LUIdx = P.first;

3624 Immediate Offset = P.second;

3625 LSRUse &LU = Uses[LUIdx];

3626

3627

3628 LSRFixup &LF = LU.getNewFixup();

3629 LF.UserInst = UserInst;

3630 LF.OperandValToReplace = U.getOperandValToReplace();

3631 LF.PostIncLoops = TmpPostIncLoops;

3633 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

3634 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);

3635

3636

3637 if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {

3638 Formula F;

3639 F.initialMatch(S, L, SE);

3640 BaselineCost.RateFormula(F, Regs, VisitedRegs, LU,

3641 HardwareLoopProfitable);

3642 VisitedLSRUse.insert(LUIdx);

3643 }

3644

3645 if (!LU.WidestFixupType ||

3648 LU.WidestFixupType = LF.OperandValToReplace->getType();

3649

3650

3651 if (LU.Formulae.empty()) {

3652 InsertInitialFormula(S, LU, LUIdx);

3653 CountRegisters(LU.Formulae.back(), LUIdx);

3654 }

3655 }

3656

3658}

3659

3660

3661

3662void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,

3663 size_t LUIdx) {

3664

3665 if (Rewriter.isSafeToExpand(S))

3666 LU.RigidFormula = true;

3667

3668 Formula F;

3669 F.initialMatch(S, L, SE);

3670 bool Inserted = InsertFormula(LU, LUIdx, F);

3671 assert(Inserted && "Initial formula already exists!"); (void)Inserted;

3672}

3673

3674

3675

3676void

3677LSRInstance::InsertSupplementalFormula(const SCEV *S,

3678 LSRUse &LU, size_t LUIdx) {

3679 Formula F;

3680 F.BaseRegs.push_back(S);

3681 F.HasBaseReg = true;

3682 bool Inserted = InsertFormula(LU, LUIdx, F);

3683 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;

3684}

3685

3686

3687void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {

3688 if (F.ScaledReg)

3689 RegUses.countRegister(F.ScaledReg, LUIdx);

3690 for (const SCEV *BaseReg : F.BaseRegs)

3691 RegUses.countRegister(BaseReg, LUIdx);

3692}

3693

3694

3695

3696bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {

3697

3698 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&

3699 "Formula is illegal");

3700

3701 if (!LU.InsertFormula(F, *L))

3702 return false;

3703

3704 CountRegisters(F, LUIdx);

3705 return true;

3706}

3707

3708

3709

3710bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {

3711

3712

3714}

3715

3716

3717

3718

3719

3720

3721void

3722LSRInstance::CollectLoopInvariantFixupsAndFormulae() {

3724 SmallPtrSet<const SCEV *, 32> Visited;

3725

3726

3727

3729 return;

3730

3731 while (!Worklist.empty()) {

3733

3734

3735 if (!Visited.insert(S).second)

3736 continue;

3737

3741 Worklist.push_back(C->getOperand());

3746 const Value *V = US->getValue();

3748

3749 if (L->contains(Inst)) continue;

3751

3752 continue;

3753 for (const Use &U : V->uses()) {

3755

3756 if (!UserInst)

3757 continue;

3758

3759 if (UserInst->isEHPad())

3760 continue;

3761

3762

3763 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())

3764 continue;

3765

3770 if (!DT.dominates(L->getHeader(), UseBB))

3771 continue;

3772

3774 continue;

3775

3776

3777

3778

3779

3780

3781

3782

3785 bool HasIncompatibleEHPTerminatedBlock = false;

3787 for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {

3788 if (PhiNode->getIncomingValue(I) == ExpectedValue) {

3789 if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {

3790 HasIncompatibleEHPTerminatedBlock = true;

3791 break;

3792 }

3793 }

3794 }

3795 if (HasIncompatibleEHPTerminatedBlock) {

3796 continue;

3797 }

3798 }

3799

3800

3802 continue;

3803

3804

3806 const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));

3807

3809 continue;

3810 if (UserS == US) {

3813 continue;

3814 }

3815 }

3816

3818 unsigned OtherIdx = U.getOperandNo();

3819 Value *OtherOp = ICI->getOperand(OtherIdx);

3821 continue;

3822 }

3823

3824

3825

3827 continue;

3828

3829 std::pair<size_t, Immediate> P =

3830 getUse(S, LSRUse::Basic, MemAccessTy());

3831 size_t LUIdx = P.first;

3832 Immediate Offset = P.second;

3833 LSRUse &LU = Uses[LUIdx];

3834 LSRFixup &LF = LU.getNewFixup();

3835 LF.UserInst = const_cast<Instruction *>(UserInst);

3836 LF.OperandValToReplace = U;

3838 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);

3839 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);

3840 if (!LU.WidestFixupType ||

3843 LU.WidestFixupType = LF.OperandValToReplace->getType();

3844 InsertSupplementalFormula(US, LU, LUIdx);

3845 CountRegisters(LU.Formulae.back(), Uses.size() - 1);

3846 break;

3847 }

3848 }

3849 }

3850}

3851

3852

3853

3854

3855

3856

3859 const Loop *L,

3861 unsigned Depth = 0) {

3862

3864 return S;

3865

3867

3868 for (const SCEV *S : Add->operands()) {

3870 if (Remainder)

3871 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

3872 }

3873 return nullptr;

3874 }

3875 const SCEV *Start, *Step;

3877 const SCEV *Op1;

3879

3880 if (Start->isZero())

3881 return S;

3882

3884

3885

3888 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);

3889 Remainder = nullptr;

3890 }

3891 if (Remainder != Start) {

3892 if (!Remainder)

3896

3898 }

3900

3903 if (Remainder)

3905 return nullptr;

3906 }

3907 return S;

3908}

3909

3910

3911

3913 LSRUse &LU, const SCEV *S, const Loop *L,

3915 if (LU.Kind != LSRUse::Address ||

3916 !LU.AccessTy.getType()->isIntOrIntVectorTy())

3917 return false;

3918 const SCEV *Start;

3920 return false;

3921

3922 if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, S->getType()) ||

3923 TTI.isIndexedStoreLegal(TTI.MIM_PostInc, S->getType())) {

3925 return true;

3926 }

3927 return false;

3928}

3929

3930

3931void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,

3932 const Formula &Base,

3933 unsigned Depth, size_t Idx,

3934 bool IsScaledReg) {

3935 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

3936

3937

3938

3939

3941 return;

3943 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);

3944 if (Remainder)

3946

3947 if (AddOps.size() == 1)

3948 return;

3949

3951 JE = AddOps.end();

3952 J != JE; ++J) {

3953

3954

3956 continue;

3957

3958

3959

3961 LU.AccessTy, *J, Base.getNumRegs() > 1))

3962 continue;

3963

3964

3966 InnerAddOps.append(std::next(J), std::as_const(AddOps).end());

3967

3968

3969

3970 if (InnerAddOps.size() == 1 &&

3972 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))

3973 continue;

3974

3975 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);

3976 if (InnerSum->isZero())

3977 continue;

3978 Formula F = Base;

3979

3980 if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())

3981 continue;

3982

3983

3988 F.UnfoldedOffset =

3989 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

3991 if (IsScaledReg) {

3992 F.ScaledReg = nullptr;

3993 F.Scale = 0;

3994 } else

3995 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);

3996 } else if (IsScaledReg)

3997 F.ScaledReg = InnerSum;

3998 else

3999 F.BaseRegs[Idx] = InnerSum;

4000

4001

4006 F.UnfoldedOffset =

4007 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +

4009 else

4010 F.BaseRegs.push_back(*J);

4011

4012

4013 F.canonicalize(*L);

4014

4015 if (InsertFormula(LU, LUIdx, F))

4016

4017

4018

4019

4020

4021

4022 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),

4024 }

4025}

4026

4027

4028void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,

4030 assert(Base.isCanonical(*L) && "Input must be in the canonical form");

4031

4033 return;

4034

4035 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

4036 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);

4037

4038 if (Base.Scale == 1)

4039 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,

4040 -1, true);

4041}

4042

4043

4044

4045void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,

4046 Formula Base) {

4047

4048 if (Base.BaseRegs.size() + (Base.Scale == 1) +

4049 (Base.UnfoldedOffset.isNonZero()) <=

4050 1)

4051 return;

4052

4053

4054

4055 Base.unscale();

4057 Formula NewBase = Base;

4058 NewBase.BaseRegs.clear();

4059 Type *CombinedIntegerType = nullptr;

4060 for (const SCEV *BaseReg : Base.BaseRegs) {

4063 if (!CombinedIntegerType)

4065 Ops.push_back(BaseReg);

4066 }

4067 else

4068 NewBase.BaseRegs.push_back(BaseReg);

4069 }

4070

4071

4072 if (Ops.size() == 0)

4073 return;

4074

4075

4076

4077 auto GenerateFormula = [&](const SCEV *Sum) {

4078 Formula F = NewBase;

4079

4080

4081

4082

4084 return;

4085

4086 F.BaseRegs.push_back(Sum);

4087 F.canonicalize(*L);

4088 (void)InsertFormula(LU, LUIdx, F);

4089 };

4090

4091

4092 if (Ops.size() > 1) {

4094 GenerateFormula(SE.getAddExpr(OpsCopy));

4095 }

4096

4097

4098

4099 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {

4100 assert(CombinedIntegerType && "Missing a type for the unfolded offset");

4102 NewBase.UnfoldedOffset.getFixedValue(), true));

4103 NewBase.UnfoldedOffset = Immediate::getFixed(0);

4105 }

4106}

4107

4108

4109void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,

4110 const Formula &Base, size_t Idx,

4111 bool IsScaledReg) {

4112 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

4114 if (G->isZero() || !GV)

4115 return;

4116 Formula F = Base;

4117 F.BaseGV = GV;

4118 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))

4119 return;

4120 if (IsScaledReg)

4121 F.ScaledReg = G;

4122 else

4123 F.BaseRegs[Idx] = G;

4124 (void)InsertFormula(LU, LUIdx, F);

4125}

4126

4127

4128void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,

4129 Formula Base) {

4130

4131 if (Base.BaseGV) return;

4132

4133 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

4134 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);

4135 if (Base.Scale == 1)

4136 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,

4137 true);

4138}

4139

4140

4141void LSRInstance::GenerateConstantOffsetsImpl(

4142 LSRUse &LU, unsigned LUIdx, const Formula &Base,

4143 const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) {

4144

4145 auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {

4146 Formula F = Base;

4147 if (Base.BaseOffset.isCompatibleImmediate(Offset))

4148 return;

4149 F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);

4150

4151 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {

4152

4153 const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());

4154 const SCEV *NewG = SE.getAddExpr(NewOffset, G);

4155

4156 if (NewG->isZero()) {

4157 if (IsScaledReg) {

4158 F.Scale = 0;

4159 F.ScaledReg = nullptr;

4160 } else

4161 F.deleteBaseReg(F.BaseRegs[Idx]);

4162 F.canonicalize(*L);

4163 } else if (IsScaledReg)

4164 F.ScaledReg = NewG;

4165 else

4166 F.BaseRegs[Idx] = NewG;

4167

4168 (void)InsertFormula(LU, LUIdx, F);

4169 }

4170 };

4171

4172 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];

4173

4174

4175

4176

4177

4178

4179

4180

4181

4183 const APInt *StepInt;

4187

4188 for (Immediate Offset : Worklist) {

4189 if (Offset.isFixed()) {

4190 Offset = Immediate::getFixed(Offset.getFixedValue() - Step);

4191 GenerateOffset(G, Offset);

4192 }

4193 }

4194 }

4195 }

4196 for (Immediate Offset : Worklist)

4197 GenerateOffset(G, Offset);

4198

4200 if (G->isZero() || Imm.isZero() ||

4201 Base.BaseOffset.isCompatibleImmediate(Imm))

4202 return;

4203 Formula F = Base;

4204 F.BaseOffset = F.BaseOffset.addUnsigned(Imm);

4205 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))

4206 return;

4207 if (IsScaledReg) {

4208 F.ScaledReg = G;

4209 } else {

4210 F.BaseRegs[Idx] = G;

4211

4212

4213 F.canonicalize(*L);

4214 }

4215 (void)InsertFormula(LU, LUIdx, F);

4216}

4217

4218

4219void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,

4220 Formula Base) {

4221

4222

4224 Worklist.push_back(LU.MinOffset);

4225 if (LU.MaxOffset != LU.MinOffset)

4226 Worklist.push_back(LU.MaxOffset);

4227

4228 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)

4229 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);

4230 if (Base.Scale == 1)

4231 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,

4232 true);

4233}

4234

4235

4236

4237void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,

4238 Formula Base) {

4239 if (LU.Kind != LSRUse::ICmpZero) return;

4240

4241

4242 Type *IntTy = Base.getType();

4243 if (!IntTy) return;

4245

4246

4247 if (LU.MinOffset != LU.MaxOffset) return;

4248

4249

4250 if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())

4251 return;

4252 for (const SCEV *BaseReg : Base.BaseRegs)

4253 if (BaseReg->getType()->isPointerTy())

4254 return;

4255 assert(Base.BaseGV && "ICmpZero use is not legal!");

4256

4257

4258 for (int64_t Factor : Factors) {

4259

4261 continue;

4262

4263 if (Base.BaseOffset.isMin() && Factor == -1)

4264 continue;

4265

4266 if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())

4267 continue;

4268 Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);

4269 assert(Factor != 0 && "Zero factor not expected!");

4270 if (NewBaseOffset.getFixedValue() / Factor !=

4271 Base.BaseOffset.getFixedValue())

4272 continue;

4273

4276 continue;

4277

4278

4279 Immediate Offset = LU.MinOffset;

4280 if (Offset.isMin() && Factor == -1)

4281 continue;

4283 if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())

4284 continue;

4285

4288 continue;

4289

4290 Formula F = Base;

4291 F.BaseOffset = NewBaseOffset;

4292

4293

4295 continue;

4296

4297

4298 F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);

4299

4300 const SCEV *FactorS = SE.getConstant(IntTy, Factor);

4301

4302

4303 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {

4304 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);

4305 if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])

4306 goto next;

4307 }

4308

4309

4310 if (F.ScaledReg) {

4311 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);

4313 continue;

4314 }

4315

4316

4317 if (F.UnfoldedOffset.isNonZero()) {

4318 if (F.UnfoldedOffset.isMin() && Factor == -1)

4319 continue;

4320 F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);

4321 if (F.UnfoldedOffset.getFixedValue() / Factor !=

4322 Base.UnfoldedOffset.getFixedValue())

4323 continue;

4324

4326 IntTy, F.UnfoldedOffset.getFixedValue()))

4327 continue;

4328 }

4329

4330

4331 (void)InsertFormula(LU, LUIdx, F);

4332 next:;

4333 }

4334}

4335

4336

4337

4338void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {

4339

4340 Type *IntTy = Base.getType();

4341 if (!IntTy) return;

4342

4343

4344

4345 if (Base.Scale != 0 && Base.unscale())

4346 return;

4347

4348 assert(Base.Scale == 0 && "unscale did not did its job!");

4349

4350

4351 for (int64_t Factor : Factors) {

4352 Base.Scale = Factor;

4353 Base.HasBaseReg = Base.BaseRegs.size() > 1;

4354

4355 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

4357

4358

4359 if (LU.Kind == LSRUse::Basic &&

4360 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,

4361 LU.AccessTy, Base) &&

4362 LU.AllFixupsOutsideLoop)

4363 LU.Kind = LSRUse::Special;

4364 else

4365 continue;

4366 }

4367

4368

4369 if (LU.Kind == LSRUse::ICmpZero && Base.HasBaseReg &&

4370 Base.BaseOffset.isZero() && Base.BaseGV)

4371 continue;

4372

4373 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {

4375 if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {

4376 const SCEV *FactorS = SE.getConstant(IntTy, Factor);

4377 if (FactorS->isZero())

4378 continue;

4379

4380

4381 if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))

4382 if (!Quotient->isZero()) {

4383

4384 Formula F = Base;

4385 F.ScaledReg = Quotient;

4386 F.deleteBaseReg(F.BaseRegs[i]);

4387

4388

4389

4390 if (F.Scale == 1 && (F.BaseRegs.empty() ||

4391 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))

4392 continue;

4393

4394

4395 if (F.Scale == 1 && LU.AllFixupsOutsideLoop)

4396 F.canonicalize(*L);

4397 (void)InsertFormula(LU, LUIdx, F);

4398 }

4399 }

4400 }

4401 }

4402}

4403

4404

4405

4406

4407

4408

4409static const SCEV *

4411 const SCEV *Expr, Type *ToTy,

4413 const SCEV *Result = nullptr;

4414 for (auto &L : Loops) {

4418 if (!New || (Result && New != Result))

4419 return nullptr;

4420 Result = New;

4421 }

4422

4423 assert(Result && "failed to create expression");

4424 return Result;

4425}

4426

4427

4428void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {

4429

4430 if (Base.BaseGV) return;

4431

4432

4433 Type *DstTy = Base.getType();

4434 if (!DstTy) return;

4436 return;

4437

4438

4439

4440 if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())

4441 return;

4443 [](const SCEV *S) { return S->getType()->isPointerTy(); }))

4444 return;

4445

4447 for (auto &LF : LU.Fixups)

4448 Loops.push_back(LF.PostIncLoops);

4449

4450 for (Type *SrcTy : Types) {

4452 Formula F = Base;

4453

4454

4455

4456

4457

4458 if (F.ScaledReg) {

4459 const SCEV *NewScaledReg =

4461 if (!NewScaledReg || NewScaledReg->isZero())

4462 continue;

4463 F.ScaledReg = NewScaledReg;

4464 }

4465 bool HasZeroBaseReg = false;

4466 for (const SCEV *&BaseReg : F.BaseRegs) {

4467 const SCEV *NewBaseReg =

4469 if (!NewBaseReg || NewBaseReg->isZero()) {

4470 HasZeroBaseReg = true;

4471 break;

4472 }

4474 }

4475 if (HasZeroBaseReg)

4476 continue;

4477

4478

4479

4480 if (F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))

4481 continue;

4482

4483 F.canonicalize(*L);

4484 (void)InsertFormula(LU, LUIdx, F);

4485 }

4486 }

4487}

4488

4489namespace {

4490

4491

4492

4493

4494struct WorkItem {

4495 size_t LUIdx;

4496 Immediate Imm;

4497 const SCEV *OrigReg;

4498

4499 WorkItem(size_t LI, Immediate I, const SCEV *R)

4500 : LUIdx(LI), Imm(I), OrigReg(R) {}

4501

4502 void print(raw_ostream &OS) const;

4503 void dump() const;

4504};

4505

4506}

4507

4508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

4509void WorkItem::print(raw_ostream &OS) const {

4510 OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx

4511 << " , add offset " << Imm;

4512}

4513

4516}

4517#endif

4518

4519

4520

4521void LSRInstance::GenerateCrossUseConstantOffsets() {

4522

4523 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;

4524

4525 DenseMap<const SCEV *, ImmMapTy> Map;

4526 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;

4528 for (const SCEV *Use : RegUses) {

4529 const SCEV *Reg = Use;

4531 auto Pair = Map.try_emplace(Reg);

4532 if (Pair.second)

4534 Pair.first->second.insert(std::make_pair(Imm, Use));

4535 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);

4536 }

4537

4538

4539

4540

4542 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>

4543 UniqueItems;

4544 for (const SCEV *Reg : Sequence) {

4545 const ImmMapTy &Imms = Map.find(Reg)->second;

4546

4547

4548 if (Imms.size() == 1)

4549 continue;

4550

4551 LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';

4552 for (const auto &Entry

4553 : Imms) dbgs()

4554 << ' ' << Entry.first;

4555 dbgs() << '\n');

4556

4557

4558 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();

4559 J != JE; ++J) {

4560 const SCEV *OrigReg = J->second;

4561

4562 Immediate JImm = J->first;

4563 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);

4564

4566 UsedByIndicesMap[Reg].count() == 1) {

4567 LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg

4568 << '\n');

4569 continue;

4570 }

4571

4572

4573

4574 Immediate First = Imms.begin()->first;

4575 Immediate Last = std::prev(Imms.end())->first;

4576 if (First.isCompatibleImmediate(Last)) {

4577 LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg

4578 << "\n");

4579 continue;

4580 }

4581

4582

4583 bool Scalable = First.isScalable() || Last.isScalable();

4584 int64_t FI = First.getKnownMinValue();

4585 int64_t LI = Last.getKnownMinValue();

4586

4587

4588 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);

4589

4590

4591 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));

4592 ImmMapTy::const_iterator OtherImms[] = {

4593 Imms.begin(), std::prev(Imms.end()),

4594 Imms.lower_bound(Immediate::get(Avg, Scalable))};

4595 for (const auto &M : OtherImms) {

4596 if (M == J || M == JE) continue;

4597 if (!JImm.isCompatibleImmediate(M->first))

4598 continue;

4599

4600

4601 Immediate Imm = JImm.subUnsigned(M->first);

4602 for (unsigned LUIdx : UsedByIndices.set_bits())

4603

4604 if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)

4605 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));

4606 }

4607 }

4608 }

4609

4610 Map.clear();

4612 UsedByIndicesMap.clear();

4613 UniqueItems.clear();

4614

4615

4616 for (const WorkItem &WI : WorkItems) {

4617 size_t LUIdx = WI.LUIdx;

4618 LSRUse &LU = Uses[LUIdx];

4619 Immediate Imm = WI.Imm;

4620 const SCEV *OrigReg = WI.OrigReg;

4621

4623 const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);

4625

4626

4627 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {

4628 Formula F = LU.Formulae[L];

4629

4630

4631

4632

4633 F.unscale();

4634

4635 if (F.ScaledReg == OrigReg) {

4636 if (F.BaseOffset.isCompatibleImmediate(Imm))

4637 continue;

4638 Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));

4639

4640 const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);

4641 if (F.referencesReg(S))

4642 continue;

4643 Formula NewF = F;

4644 NewF.BaseOffset = Offset;

4645 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

4646 NewF))

4647 continue;

4648 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);

4649

4650

4651

4652

4654

4655

4656

4657 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

4658 continue;

4659 if (C->getValue()->isNegative() !=

4660 (NewF.BaseOffset.isLessThanZero()) &&

4661 (C->getAPInt().abs() * APInt(BitWidth, F.Scale))

4662 .ule(std::abs(NewF.BaseOffset.getFixedValue())))

4663 continue;

4664 }

4665

4666

4667 NewF.canonicalize(*this->L);

4668 (void)InsertFormula(LU, LUIdx, NewF);

4669 } else {

4670

4671 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {

4672 const SCEV *BaseReg = F.BaseRegs[N];

4673 if (BaseReg != OrigReg)

4674 continue;

4675 Formula NewF = F;

4676 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||

4677 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||

4678 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))

4679 continue;

4680 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);

4681 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,

4682 LU.Kind, LU.AccessTy, NewF)) {

4685 continue;

4686 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);

4688 continue;

4689 NewF = F;

4690 NewF.UnfoldedOffset = NewUnfoldedOffset;

4691 }

4692 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);

4693

4694

4695

4696

4697 for (const SCEV *NewReg : NewF.BaseRegs)

4699 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())

4700 goto skip_formula;

4701 if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())

4702 .abs()

4703 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&

4704 (C->getAPInt() + NewF.BaseOffset.getFixedValue())

4705 .countr_zero() >=

4707 NewF.BaseOffset.getFixedValue()))

4708 goto skip_formula;

4709 }

4710

4711

4712 NewF.canonicalize(*this->L);

4713 (void)InsertFormula(LU, LUIdx, NewF);

4714 break;

4715 skip_formula:;

4716 }

4717 }

4718 }

4719 }

4720}

4721

4722

4723void

4724LSRInstance::GenerateAllReuseFormulae() {

4725

4726

4727 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

4728 LSRUse &LU = Uses[LUIdx];

4729 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4730 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);

4731 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4732 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);

4733 }

4734 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

4735 LSRUse &LU = Uses[LUIdx];

4736 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4737 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);

4738 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4739 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);

4740 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4741 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);

4742 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4743 GenerateScales(LU, LUIdx, LU.Formulae[i]);

4744 }

4745 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

4746 LSRUse &LU = Uses[LUIdx];

4747 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)

4748 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);

4749 }

4750

4751 GenerateCrossUseConstantOffsets();

4752

4754 "After generating reuse formulae:\n";

4755 print_uses(dbgs()));

4756}

4757

4758

4759

4760void LSRInstance::FilterOutUndesirableDedicatedRegisters() {

4761 DenseSet<const SCEV *> VisitedRegs;

4762 SmallPtrSet<const SCEV *, 16> Regs;

4763 SmallPtrSet<const SCEV *, 16> LoserRegs;

4764#ifndef NDEBUG

4765 bool ChangedFormulae = false;

4766#endif

4767

4768

4769

4770 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>, size_t>;

4771

4772 BestFormulaeTy BestFormulae;

4773

4774 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

4775 LSRUse &LU = Uses[LUIdx];

4777 dbgs() << '\n');

4778

4779 bool Any = false;

4780 for (size_t FIdx = 0, NumForms = LU.Formulae.size();

4781 FIdx != NumForms; ++FIdx) {

4782 Formula &F = LU.Formulae[FIdx];

4783

4784

4785

4786

4787

4788

4789

4790

4791 Cost CostF(L, SE, TTI, AMK);

4793 CostF.RateFormula(F, Regs, VisitedRegs, LU, HardwareLoopProfitable,

4794 &LoserRegs);

4795 if (CostF.isLoser()) {

4796

4797

4798

4799

4800

4801

4803 dbgs() << "\n");

4804 }

4805 else {

4807 for (const SCEV *Reg : F.BaseRegs) {

4808 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))

4810 }

4811 if (F.ScaledReg &&

4812 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))

4813 Key.push_back(F.ScaledReg);

4814

4815

4817

4818 std::pair<BestFormulaeTy::const_iterator, bool> P =

4819 BestFormulae.insert(std::make_pair(Key, FIdx));

4820 if (P.second)

4821 continue;

4822

4823 Formula &Best = LU.Formulae[P.first->second];

4824

4825 Cost CostBest(L, SE, TTI, AMK);

4827 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,

4828 HardwareLoopProfitable);

4829 if (CostF.isLess(CostBest))

4832 dbgs() << "\n"

4833 " in favor of formula ";

4834 Best.print(dbgs()); dbgs() << '\n');

4835 }

4836#ifndef NDEBUG

4837 ChangedFormulae = true;

4838#endif

4839 LU.DeleteFormula(F);

4840 --FIdx;

4841 --NumForms;

4842 Any = true;

4843 }

4844

4845

4846 if (Any)

4847 LU.RecomputeRegs(LUIdx, RegUses);

4848

4849

4850 BestFormulae.clear();

4851 }

4852

4854 dbgs() << "\n"

4855 "After filtering out undesirable candidates:\n";

4856 print_uses(dbgs());

4857 });

4858}

4859

4860

4861

4862

4863size_t LSRInstance::EstimateSearchSpaceComplexity() const {

4864 size_t Power = 1;

4865 for (const LSRUse &LU : Uses) {

4866 size_t FSize = LU.Formulae.size();

4869 break;

4870 }

4871 Power *= FSize;

4873 break;

4874 }

4875 return Power;

4876}

4877

4878

4879

4880

4881void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {

4882 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

4883 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");

4884

4885 LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "

4886 "which use a superset of registers used by other "

4887 "formulae.\n");

4888

4889 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

4890 LSRUse &LU = Uses[LUIdx];

4891 bool Any = false;

4892 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

4893 Formula &F = LU.Formulae[i];

4894 if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())

4895 continue;

4896

4897

4898

4900 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {

4902 Formula NewF = F;

4903

4904

4905 NewF.BaseOffset =

4906 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +

4907 (uint64_t)C->getValue()->getSExtValue());

4908 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

4909 (I - F.BaseRegs.begin()));

4910 if (LU.HasFormulaWithSameRegs(NewF)) {

4912 dbgs() << '\n');

4913 LU.DeleteFormula(F);

4914 --i;

4915 --e;

4916 Any = true;

4917 break;

4918 }

4921 if (F.BaseGV) {

4922 Formula NewF = F;

4923 NewF.BaseGV = GV;

4924 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +

4925 (I - F.BaseRegs.begin()));

4926 if (LU.HasFormulaWithSameRegs(NewF)) {

4928 dbgs() << '\n');

4929 LU.DeleteFormula(F);

4930 --i;

4931 --e;

4932 Any = true;

4933 break;

4934 }

4935 }

4936 }

4937 }

4938 }

4939 if (Any)

4940 LU.RecomputeRegs(LUIdx, RegUses);

4941 }

4942

4944 }

4945}

4946

4947

4948

4949void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {

4951 return;

4952

4954 dbgs() << "The search space is too complex.\n"

4955 "Narrowing the search space by assuming that uses separated "

4956 "by a constant offset will use the same registers.\n");

4957

4958

4959

4960 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

4961 LSRUse &LU = Uses[LUIdx];

4962 for (const Formula &F : LU.Formulae) {

4963 if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))

4964 continue;

4965

4966 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);

4967 if (!LUThatHas)

4968 continue;

4969

4970 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, false,

4971 LU.Kind, LU.AccessTy))

4972 continue;

4973

4975

4976 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;

4977 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;

4978

4979

4980 for (LSRFixup &Fixup : LU.Fixups) {

4981 Fixup.Offset += F.BaseOffset;

4982 LUThatHas->pushFixup(Fixup);

4984 }

4985

4986

4987 bool Any = false;

4988 for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {

4989 Formula &F = LUThatHas->Formulae[i];

4990 if (isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,

4991 LUThatHas->Kind, LUThatHas->AccessTy, F)) {

4993 LUThatHas->DeleteFormula(F);

4994 --i;

4995 --e;

4996 Any = true;

4997 }

4998 }

4999

5000 if (Any)

5001 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);

5002

5003

5004 DeleteUse(LU, LUIdx);

5005 --LUIdx;

5006 --NumUses;

5007 break;

5008 }

5009 }

5010

5012}

5013

5014

5015

5016

5017void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){

5018 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

5019 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");

5020

5021 LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "

5022 "undesirable dedicated registers.\n");

5023

5024 FilterOutUndesirableDedicatedRegisters();

5025

5027 }

5028}

5029

5030

5031

5032

5033

5034

5035

5036

5037

5038

5039void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {

5041 return;

5042

5044 dbgs() << "The search space is too complex.\n"

5045 "Narrowing the search space by choosing the best Formula "

5046 "from the Formulae with the same Scale and ScaledReg.\n");

5047

5048

5049 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;

5050

5051 BestFormulaeTy BestFormulae;

5052#ifndef NDEBUG

5053 bool ChangedFormulae = false;

5054#endif

5055 DenseSet<const SCEV *> VisitedRegs;

5056 SmallPtrSet<const SCEV *, 16> Regs;

5057

5058 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

5059 LSRUse &LU = Uses[LUIdx];

5061 dbgs() << '\n');

5062

5063

5064 auto IsBetterThan = [&](Formula &FA, Formula &FB) {

5065

5066

5067

5068

5069 size_t FARegNum = 0;

5070 for (const SCEV *Reg : FA.BaseRegs) {

5071 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

5072 FARegNum += (NumUses - UsedByIndices.count() + 1);

5073 }

5074 size_t FBRegNum = 0;

5075 for (const SCEV *Reg : FB.BaseRegs) {

5076 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);

5077 FBRegNum += (NumUses - UsedByIndices.count() + 1);

5078 }

5079 if (FARegNum != FBRegNum)

5080 return FARegNum < FBRegNum;

5081

5082

5083

5084 Cost CostFA(L, SE, TTI, AMK);

5085 Cost CostFB(L, SE, TTI, AMK);

5087 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);

5089 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);

5090 return CostFA.isLess(CostFB);

5091 };

5092

5093 bool Any = false;

5094 for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

5095 ++FIdx) {

5096 Formula &F = LU.Formulae[FIdx];

5097 if (F.ScaledReg)

5098 continue;

5099 auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});

5100 if (P.second)

5101 continue;

5102

5103 Formula &Best = LU.Formulae[P.first->second];

5104 if (IsBetterThan(F, Best))

5107 dbgs() << "\n"

5108 " in favor of formula ";

5109 Best.print(dbgs()); dbgs() << '\n');

5110#ifndef NDEBUG

5111 ChangedFormulae = true;

5112#endif

5113 LU.DeleteFormula(F);

5114 --FIdx;

5115 --NumForms;

5116 Any = true;

5117 }

5118 if (Any)

5119 LU.RecomputeRegs(LUIdx, RegUses);

5120

5121

5122 BestFormulae.clear();

5123 }

5124

5126 dbgs() << "\n"

5127 "After filtering out undesirable candidates:\n";

5128 print_uses(dbgs());

5129 });

5130}

5131

5132

5133

5134void LSRInstance::NarrowSearchSpaceByFilterPostInc() {

5136 return;

5138 return;

5139

5140 LLVM_DEBUG(dbgs() << "The search space is too complex.\n"

5141 "Narrowing the search space by choosing the lowest "

5142 "register Formula for PostInc Uses.\n");

5143

5144 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

5145 LSRUse &LU = Uses[LUIdx];

5146

5147 if (LU.Kind != LSRUse::Address)

5148 continue;

5151 continue;

5152

5153 size_t MinRegs = std::numeric_limits<size_t>::max();

5154 for (const Formula &F : LU.Formulae)

5155 MinRegs = std::min(F.getNumRegs(), MinRegs);

5156

5157 bool Any = false;

5158 for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;

5159 ++FIdx) {

5160 Formula &F = LU.Formulae[FIdx];

5161 if (F.getNumRegs() > MinRegs) {

5163 dbgs() << "\n");

5164 LU.DeleteFormula(F);

5165 --FIdx;

5166 --NumForms;

5167 Any = true;

5168 }

5169 }

5170 if (Any)

5171 LU.RecomputeRegs(LUIdx, RegUses);

5172

5174 break;

5175 }

5176

5178}

5179

5180

5181

5182

5183

5184

5185

5186

5187

5188

5189

5190

5191

5192

5193

5194

5195

5196

5197

5198

5199

5200

5201

5202

5203

5204

5205

5206

5207

5208

5209

5210

5211

5212

5213

5214

5215

5216

5217

5218

5219

5220

5221

5222void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {

5224 return;

5225

5226

5227

5228

5229

5230

5231 SmallPtrSet<const SCEV *, 4> UniqRegs;

5232 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");

5233

5234

5235 DenseMap <const SCEV *, float> RegNumMap;

5236 for (const SCEV *Reg : RegUses) {

5238 continue;

5239 float PNotSel = 1;

5240 for (const LSRUse &LU : Uses) {

5241 if (!LU.Regs.count(Reg))

5242 continue;

5243 float P = LU.getNotSelectedProbability(Reg);

5244 if (P != 0.0)

5245 PNotSel *= P;

5246 else

5248 }

5249 RegNumMap.insert(std::make_pair(Reg, PNotSel));

5250 }

5251

5253 dbgs() << "Narrowing the search space by deleting costly formulas\n");

5254

5255

5256 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

5257 LSRUse &LU = Uses[LUIdx];

5258

5259 if (LU.Formulae.size() < 2)

5260 continue;

5261

5262

5263

5264 float FMinRegNum = LU.Formulae[0].getNumRegs();

5265 float FMinARegNum = LU.Formulae[0].getNumRegs();

5266 size_t MinIdx = 0;

5267 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

5268 Formula &F = LU.Formulae[i];

5269 float FRegNum = 0;

5270 float FARegNum = 0;

5271 for (const SCEV *BaseReg : F.BaseRegs) {

5272 if (UniqRegs.count(BaseReg))

5273 continue;

5274 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

5276 FARegNum +=

5277 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);

5278 }

5279 if (const SCEV *ScaledReg = F.ScaledReg) {

5280 if (!UniqRegs.count(ScaledReg)) {

5281 FRegNum +=

5282 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

5284 FARegNum +=

5285 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);

5286 }

5287 }

5288 if (FMinRegNum > FRegNum ||

5289 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {

5290 FMinRegNum = FRegNum;

5291 FMinARegNum = FARegNum;

5292 MinIdx = i;

5293 }

5294 }

5295 LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());

5296 dbgs() << " with min reg num " << FMinRegNum << '\n');

5297 if (MinIdx != 0)

5298 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);

5299 while (LU.Formulae.size() != 1) {

5301 dbgs() << '\n');

5302 LU.Formulae.pop_back();

5303 }

5304 LU.RecomputeRegs(LUIdx, RegUses);

5305 assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");

5306 Formula &F = LU.Formulae[0];

5308

5310 if (F.ScaledReg)

5311 UniqRegs.insert(F.ScaledReg);

5312 }

5314}

5315

5316

5317

5318

5322 MemAccessTy AccessType) {

5323 if (Best->getType() != Reg->getType() ||

5327 return false;

5329 if (!Diff)

5330 return false;

5331

5332 return TTI.isLegalAddressingMode(

5333 AccessType.MemTy, nullptr,

5334 Diff->getSExtValue(),

5335 true, 0, AccessType.AddrSpace) &&

5336 TTI.isLegalAddressingMode(

5337 AccessType.MemTy, nullptr,

5338 -Diff->getSExtValue(),

5339 true, 0, AccessType.AddrSpace);

5340}

5341

5342

5343

5344

5345void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {

5346

5347

5348 SmallPtrSet<const SCEV *, 4> Taken;

5349 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {

5350

5351

5352 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");

5353

5354

5355

5356 const SCEV *Best = nullptr;

5357 unsigned BestNum = 0;

5358 for (const SCEV *Reg : RegUses) {

5360 continue;

5361 if (!Best) {

5362 Best = Reg;

5363 BestNum = RegUses.getUsedByIndices(Reg).count();

5364 } else {

5365 unsigned Count = RegUses.getUsedByIndices(Reg).count();

5366 if (Count > BestNum) {

5367 Best = Reg;

5368 BestNum = Count;

5369 }

5370

5371

5372

5373

5374 if (Count == BestNum) {

5375 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();

5376 if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&

5378 Uses[LUIdx].AccessTy)) {

5379 Best = Reg;

5380 BestNum = Count;

5381 }

5382 }

5383 }

5384 }

5385 assert(Best && "Failed to find best LSRUse candidate");

5386

5387 LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best

5388 << " will yield profitable reuse.\n");

5390

5391

5392

5393 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {

5394 LSRUse &LU = Uses[LUIdx];

5395 if (!LU.Regs.count(Best)) continue;

5396

5397 bool Any = false;

5398 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {

5399 Formula &F = LU.Formulae[i];

5400 if (F.referencesReg(Best)) {

5402 LU.DeleteFormula(F);

5403 --e;

5404 --i;

5405 Any = true;

5406 assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");

5407 continue;

5408 }

5409 }

5410

5411 if (Any)

5412 LU.RecomputeRegs(LUIdx, RegUses);

5413 }

5414

5416 }

5417}

5418

5419

5420

5421

5422

5423void LSRInstance::NarrowSearchSpaceUsingHeuristics() {

5424 NarrowSearchSpaceByDetectingSupersets();

5425 NarrowSearchSpaceByCollapsingUnrolledCode();

5426 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();

5428 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();

5429 NarrowSearchSpaceByFilterPostInc();

5431 NarrowSearchSpaceByDeletingCostlyFormulas();

5432 else

5433 NarrowSearchSpaceByPickingWinnerRegs();

5434}

5435

5436

5437void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,

5438 Cost &SolutionCost,

5439 SmallVectorImpl<const Formula *> &Workspace,

5440 const Cost &CurCost,

5441 const SmallPtrSet<const SCEV *, 16> &CurRegs,

5442 DenseSet<const SCEV *> &VisitedRegs) const {

5443

5444

5445

5446

5447

5448

5449

5450

5451

5452

5453 const LSRUse &LU = Uses[Workspace.size()];

5454

5455

5456

5457

5458

5459 SmallSetVector<const SCEV *, 4> ReqRegs;

5460 for (const SCEV *S : CurRegs)

5461 if (LU.Regs.count(S))

5463

5464 SmallPtrSet<const SCEV *, 16> NewRegs;

5465 Cost NewCost(L, SE, TTI, AMK);

5466 for (const Formula &F : LU.Formulae) {

5467

5468

5469

5470

5471

5472

5474 int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());

5475 for (const SCEV *Reg : ReqRegs) {

5476 if ((F.ScaledReg && F.ScaledReg == Reg) ||

5478 --NumReqRegsToFind;

5479 if (NumReqRegsToFind == 0)

5480 break;

5481 }

5482 }

5483 if (NumReqRegsToFind != 0) {

5484

5485

5486 continue;

5487 }

5488 }

5489

5490

5491

5492 NewCost = CurCost;

5493 NewRegs = CurRegs;

5494 NewCost.RateFormula(F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);

5495 if (NewCost.isLess(SolutionCost)) {

5497 if (Workspace.size() != Uses.size()) {

5498 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,

5499 NewRegs, VisitedRegs);

5500 if (F.getNumRegs() == 1 && Workspace.size() == 1)

5501 VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);

5502 } else {

5504 dbgs() << ".\nRegs:\n";

5505 for (const SCEV *S : NewRegs) dbgs()

5506 << "- " << *S << "\n";

5507 dbgs() << '\n');

5508

5509 SolutionCost = NewCost;

5510 Solution = Workspace;

5511 }

5513 }

5514 }

5515}

5516

5517

5518

5519void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {

5521 Cost SolutionCost(L, SE, TTI, AMK);

5522 SolutionCost.Lose();

5523 Cost CurCost(L, SE, TTI, AMK);

5524 SmallPtrSet<const SCEV *, 16> CurRegs;

5525 DenseSet<const SCEV *> VisitedRegs;

5527

5528

5529 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,

5530 CurRegs, VisitedRegs);

5531 if (Solution.empty()) {

5532 LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");

5533 return;

5534 }

5535

5536

5538 "The chosen solution requires ";

5539 SolutionCost.print(dbgs()); dbgs() << ":\n";

5540 for (size_t i = 0, e = Uses.size(); i != e; ++i) {

5541 dbgs() << " ";

5543 dbgs() << "\n"

5544 " ";

5545 Solution[i]->print(dbgs());

5546 dbgs() << '\n';

5547 });

5548

5549 assert(Solution.size() == Uses.size() && "Malformed solution!");

5550

5551 const bool EnableDropUnprofitableSolution = [&] {

5554 return true;

5556 return false;

5559 }

5561 }();

5562

5563 if (BaselineCost.isLess(SolutionCost)) {

5564 if (!EnableDropUnprofitableSolution)

5566 dbgs() << "Baseline is more profitable than chosen solution, "

5567 "add option 'lsr-drop-solution' to drop LSR solution.\n");

5568 else {

5569 LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "

5570 "solution, dropping LSR solution.\n";);

5571 Solution.clear();

5572 }

5573 }

5574}

5575

5576

5577

5578

5581 const SmallVectorImpl<Instruction *> &Inputs)

5582 const {

5584 while (true) {

5585 bool AllDominate = true;

5587

5588

5590 return IP;

5591

5592 for (Instruction *Inst : Inputs) {

5593 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {

5594 AllDominate = false;

5595 break;

5596 }

5597

5598

5599 if (Tentative->getParent() == Inst->getParent() &&

5600 (!BetterPos || !DT.dominates(Inst, BetterPos)))

5602 }

5603 if (!AllDominate)

5604 break;

5605 if (BetterPos)

5607 else

5609

5610 const Loop *IPLoop = LI.getLoopFor(IP->getParent());

5611 unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;

5612

5615 if (!Rung) return IP;

5616 Rung = Rung->getIDom();

5617 if (!Rung) return IP;

5618 IDom = Rung->getBlock();

5619

5620

5621 const Loop *IDomLoop = LI.getLoopFor(IDom);

5622 unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;

5623 if (IDomDepth <= IPLoopDepth &&

5624 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))

5625 break;

5626 }

5627

5629 }

5630

5631 return IP;

5632}

5633

5634

5635

5637 BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {

5638

5639

5640

5641 SmallVector<Instruction *, 4> Inputs;

5644 if (LU.Kind == LSRUse::ICmpZero)

5645 if (Instruction *I =

5648 if (LF.PostIncLoops.count(L)) {

5649 if (LF.isUseFullyOutsideLoop(L))

5650 Inputs.push_back(L->getLoopLatch()->getTerminator());

5651 else

5652 Inputs.push_back(IVIncInsertPos);

5653 }

5654

5655

5656 for (const Loop *PIL : LF.PostIncLoops) {

5657 if (PIL == L) continue;

5658

5659

5662 if (!ExitingBlocks.empty()) {

5664 for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)

5667 }

5668 }

5669

5671 "Insertion point must be a normal instruction");

5672

5673

5674

5676

5677

5679

5680

5681 while (IP->isEHPad()) ++IP;

5682

5683

5684

5685

5686 while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)

5687 ++IP;

5688

5689 return IP;

5690}

5691

5692

5693

5694Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,

5696 SmallVectorImpl &DeadInsts) const {

5697 if (LU.RigidFormula)

5698 return LF.OperandValToReplace;

5699

5700

5701

5702 IP = AdjustInsertPositionForExpand(IP, LF, LU);

5703 Rewriter.setInsertPoint(&*IP);

5704

5705

5706

5707 Rewriter.setPostInc(LF.PostIncLoops);

5708

5709

5710 Type *OpTy = LF.OperandValToReplace->getType();

5711

5712 Type *Ty = F.getType();

5713 if (!Ty)

5714

5715 Ty = OpTy;

5717

5718 Ty = OpTy;

5719

5721

5722

5724

5725

5726 for (const SCEV *Reg : F.BaseRegs) {

5727 assert(Reg->isZero() && "Zero allocated in a base register!");

5728

5729

5732 }

5733

5734

5735 Value *ICmpScaledV = nullptr;

5736 if (F.Scale != 0) {

5737 const SCEV *ScaledS = F.ScaledReg;

5738

5739

5742

5743 if (LU.Kind == LSRUse::ICmpZero) {

5744

5745 if (F.Scale == 1)

5746 Ops.push_back(

5748 else {

5749

5750

5751

5753 "The only scale supported by ICmpZero uses is -1!");

5754 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);

5755 }

5756 } else {

5757

5758

5759

5760

5761

5762 if (Ops.empty() && LU.Kind == LSRUse::Address &&

5765 Ops.clear();

5767 }

5769 if (F.Scale != 1)

5770 ScaledS =

5772 Ops.push_back(ScaledS);

5773 }

5774 }

5775

5776

5777 if (F.BaseGV) {

5778

5779 if (Ops.empty()) {

5781 Ops.clear();

5783 }

5785 }

5786

5787

5788

5789 if (Ops.empty()) {

5791 Ops.clear();

5793 }

5794

5795

5796

5797

5798 assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&

5799 "Expanding mismatched offsets\n");

5800

5801 Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);

5802 if (Offset.isNonZero()) {

5803 if (LU.Kind == LSRUse::ICmpZero) {

5804

5805

5806 if (!ICmpScaledV)

5807 ICmpScaledV =

5809 else {

5811 ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());

5812 }

5813 } else {

5814

5815

5816 Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));

5817 }

5818 }

5819

5820

5821 Immediate UnfoldedOffset = F.UnfoldedOffset;

5822 if (UnfoldedOffset.isNonZero()) {

5823

5824 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));

5825 }

5826

5827

5828 const SCEV *FullS = Ops.empty() ?

5831 Value *FullV = Rewriter.expandCodeFor(FullS, Ty);

5832

5833

5835

5836

5837

5838

5839 if (LU.Kind == LSRUse::ICmpZero) {

5843 assert(F.BaseGV && "ICmp does not support folding a global value and "

5844 "a scale at the same time!");

5845 if (F.Scale == -1) {

5846 if (ICmpScaledV->getType() != OpTy) {

5849 ICmpScaledV, OpTy, "tmp", CI->getIterator());

5850 ICmpScaledV = Cast;

5851 }

5853 } else {

5854

5855

5856 assert((F.Scale == 0 || F.Scale == 1) &&

5857 "ICmp does not support folding a global value and "

5858 "a scale at the same time!");

5860 -(uint64_t)Offset.getFixedValue());

5861 if (C->getType() != OpTy) {

5865 assert(C && "Cast of ConstantInt should have folded");

5866 }

5867

5869 }

5870 }

5871

5872 return FullV;

5873}

5874

5875

5876

5877

5878void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,

5879 const LSRFixup &LF, const Formula &F,

5880 SmallVectorImpl &DeadInsts) {

5881 DenseMap<BasicBlock *, Value *> Inserted;

5882

5885 bool needUpdateFixups = false;

5887

5888

5889

5890

5891

5896 Loop *PNLoop = LI.getLoopFor(Parent);

5897 if (!PNLoop || Parent != PNLoop->getHeader()) {

5898

5901 NewBB =

5903 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)

5904 .setMergeIdenticalEdges()

5905 .setKeepOneInputPHIs());

5906 } else {

5908 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);

5910 NewBB = NewBBs[0];

5911 }

5912

5913

5914

5915 if (NewBB) {

5916

5917

5918

5919 if (L->contains(BB) && L->contains(PN))

5921

5922

5924 BB = NewBB;

5926

5927 needUpdateFixups = true;

5928 }

5929 }

5930 }

5931

5932 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =

5934 if (!Pair.second)

5936 else {

5939

5940

5941 Type *OpTy = LF.OperandValToReplace->getType();

5942 if (FullV->getType() != OpTy)

5945 LF.OperandValToReplace->getType(), "tmp",

5947

5948

5949

5950

5952 if (L->contains(I) && L->contains(BB))

5953 InsertedNonLCSSAInsts.insert(I);

5954

5956 Pair.first->second = FullV;

5957 }

5958

5959

5960

5961

5962

5963 if (needUpdateFixups) {

5964 for (LSRUse &LU : Uses)

5965 for (LSRFixup &Fixup : LU.Fixups)

5966

5967

5968

5969 if (Fixup.UserInst == PN) {

5970

5971

5972 bool foundInOriginalPHI = false;

5974 if (val == Fixup.OperandValToReplace) {

5975 foundInOriginalPHI = true;

5976 break;

5977 }

5978

5979

5980 if (foundInOriginalPHI)

5981 continue;

5982

5983

5984

5985

5988 ++I) {

5991 if (val == Fixup.OperandValToReplace)

5992 Fixup.UserInst = NewPN;

5993 }

5994 }

5995 }

5996 }

5997}

5998

5999

6000

6001

6002void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,

6003 const Formula &F,

6004 SmallVectorImpl &DeadInsts) {

6005

6006

6008 RewriteForPHI(PN, LU, LF, F, DeadInsts);

6009 } else {

6010 Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);

6011

6012

6013 Type *OpTy = LF.OperandValToReplace->getType();

6014 if (FullV->getType() != OpTy) {

6017 FullV, OpTy, "tmp", LF.UserInst->getIterator());

6018 FullV = Cast;

6019 }

6020

6021

6022

6023

6024

6025

6026 if (LU.Kind == LSRUse::ICmpZero)

6028 else

6030 }

6031

6034}

6035

6036

6037

6038

6039

6040

6042 const LSRFixup &Fixup, const LSRUse &LU,

6045

6046 if (LU.Kind != LSRUse::Address)

6047 return IVIncInsertPos;

6048

6049

6051 Type *Ty = I->getType();

6054 return IVIncInsertPos;

6055

6056

6057

6058 BasicBlock *HoistBlock = I->getParent();

6061 return IVIncInsertPos;

6062

6064}

6065

6066

6067

6068void LSRInstance::ImplementSolution(

6069 const SmallVectorImpl<const Formula *> &Solution) {

6070

6071

6073

6074

6075 for (const IVChain &Chain : IVChainVec) {

6078 }

6079

6080

6081 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)

6082 for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {

6085 Rewriter.setIVIncInsertPos(L, InsertPos);

6086 Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);

6088 }

6089

6090 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();

6092

6093 for (const IVChain &Chain : IVChainVec) {

6094 GenerateIVChain(Chain, DeadInsts);

6096 }

6097

6098 for (const WeakVH &IV : Rewriter.getInsertedIVs())

6101

6102

6103

6105

6107 &TLI, MSSAU);

6108

6109

6110

6111

6112

6113

6114

6115

6116 for (PHINode &PN : L->getHeader()->phis()) {

6117 BinaryOperator *BO = nullptr;

6118 Value *Start = nullptr, *Step = nullptr;

6120 continue;

6121

6123 case Instruction::Sub:

6125

6126 continue;

6127 break;

6128 case Instruction::Add:

6129 break;

6130 default:

6131 continue;

6132 };

6133

6135

6136

6137 continue;

6138

6140

6141 continue;

6142

6143

6145 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))

6146 continue;

6149 }

6150

6151

6152}

6153

6154LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,

6155 DominatorTree &DT, LoopInfo &LI,

6156 const TargetTransformInfo &TTI, AssumptionCache &AC,

6157 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)

6158 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),

6161 : TTI.getPreferredAddressingMode(L, &SE)),

6163

6164 if (L->isLoopSimplifyForm())

6165 return;

6166

6167

6169

6170

6171

6172 unsigned NumUsers = 0;

6175 (void)U;

6176 LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U

6177 << "\n");

6178 return;

6179 }

6180

6181

6182

6184 auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();

6189 return;

6190 }

6191 }

6192

6194 L->getHeader()->printAsOperand(dbgs(), false);

6195 dbgs() << ":\n");

6196

6197

6198

6200 HardwareLoopProfitable =

6201 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);

6202

6203

6204

6205#if LLVM_ENABLE_ABI_BREAKING_CHECKS

6207#endif

6208 Rewriter.disableCanonicalMode();

6209 Rewriter.enableLSRMode();

6210

6211

6212 OptimizeShadowIV();

6213 OptimizeLoopTermCond();

6214

6215

6216 if (IU.empty()) return;

6217

6218

6219 if (L->isInnermost()) {

6220 LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");

6221 return;

6222 }

6223

6224

6225

6226

6227

6228

6229

6231 CollectChains();

6232 CollectInterestingTypesAndFactors();

6233 CollectFixupsAndInitialFormulae();

6234 CollectLoopInvariantFixupsAndFormulae();

6235

6236 if (Uses.empty())

6237 return;

6238

6240 print_uses(dbgs()));

6241 LLVM_DEBUG(dbgs() << "The baseline solution requires ";

6242 BaselineCost.print(dbgs()); dbgs() << "\n");

6243

6244

6245

6246 GenerateAllReuseFormulae();

6247

6248 FilterOutUndesirableDedicatedRegisters();

6249 NarrowSearchSpaceUsingHeuristics();

6250

6252 Solve(Solution);

6253

6254

6255 Factors.clear();

6256 Types.clear();

6257 RegUses.clear();

6258

6259 if (Solution.empty())

6260 return;

6261

6262#ifndef NDEBUG

6263

6264 for (const LSRUse &LU : Uses) {

6265 for (const Formula &F : LU.Formulae)

6266 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,

6267 F) && "Illegal formula generated!");

6268 };

6269#endif

6270

6271

6272 ImplementSolution(Solution);

6273}

6274

6275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

6276void LSRInstance::print_factors_and_types(raw_ostream &OS) const {

6277 if (Factors.empty() && Types.empty()) return;

6278

6279 OS << "LSR has identified the following interesting factors and types: ";

6280 bool First = true;

6281

6282 for (int64_t Factor : Factors) {

6283 if (First) OS << ", ";

6285 OS << '*' << Factor;

6286 }

6287

6288 for (Type *Ty : Types) {

6289 if (First) OS << ", ";

6291 OS << '(' << *Ty << ')';

6292 }

6293 OS << '\n';

6294}

6295

6296void LSRInstance::print_fixups(raw_ostream &OS) const {

6297 OS << "LSR is examining the following fixup sites:\n";

6298 for (const LSRUse &LU : Uses)

6299 for (const LSRFixup &LF : LU.Fixups) {

6300 dbgs() << " ";

6301 LF.print(OS);

6302 OS << '\n';

6303 }

6304}

6305

6306void LSRInstance::print_uses(raw_ostream &OS) const {

6307 OS << "LSR is examining the following uses:\n";

6308 for (const LSRUse &LU : Uses) {

6309 dbgs() << " ";

6310 LU.print(OS);

6311 OS << '\n';

6312 for (const Formula &F : LU.Formulae) {

6313 OS << " ";

6314 F.print(OS);

6315 OS << '\n';

6316 }

6317 }

6318}

6319

6320void LSRInstance::print(raw_ostream &OS) const {

6321 print_factors_and_types(OS);

6322 print_fixups(OS);

6323 print_uses(OS);

6324}

6325

6328}

6329#endif

6330

6331namespace {

6332

6333class LoopStrengthReduce : public LoopPass {

6334public:

6335 static char ID;

6336

6337 LoopStrengthReduce();

6338

6339private:

6340 bool runOnLoop(Loop *L, LPPassManager &LPM) override;

6341 void getAnalysisUsage(AnalysisUsage &AU) const override;

6342};

6343

6344}

6345

6346LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {

6348}

6349

6350void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {

6351

6352

6354

6358 AU.addRequired();

6359 AU.addPreserved();

6360 AU.addRequired();

6361 AU.addPreserved();

6362 AU.addRequired();

6363 AU.addRequired();

6364

6365

6369 AU.addRequired();

6371}

6372

6373namespace {

6374

6375

6377ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {

6378 llvm::DIExpression::expr_op_iterator Begin =

6379 llvm::DIExpression::expr_op_iterator(Expr.begin());

6380 llvm::DIExpression::expr_op_iterator End =

6381 llvm::DIExpression::expr_op_iterator(Expr.end());

6382 return {Begin, End};

6383}

6384

6385struct SCEVDbgValueBuilder {

6386 SCEVDbgValueBuilder() = default;

6387 SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }

6388

6389 void clone(const SCEVDbgValueBuilder &Base) {

6390 LocationOps = Base.LocationOps;

6391 Expr = Base.Expr;

6392 }

6393

6394 void clear() {

6395 LocationOps.clear();

6396 Expr.clear();

6397 }

6398

6399

6401

6402 SmallVector<Value *, 2> LocationOps;

6403

6404 void pushOperator(uint64_t Op) { Expr.push_back(Op); }

6405 void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }

6406

6407

6408

6411 auto *It = llvm::find(LocationOps, V);

6412 unsigned ArgIndex = 0;

6413 if (It != LocationOps.end()) {

6414 ArgIndex = std::distance(LocationOps.begin(), It);

6415 } else {

6416 ArgIndex = LocationOps.size();

6418 }

6420 }

6421

6422 void pushValue(const SCEVUnknown *U) {

6424 pushLocation(V);

6425 }

6426

6427 bool pushConst(const SCEVConstant *C) {

6428 if (C->getAPInt().getSignificantBits() > 64)

6429 return false;

6430 Expr.push_back(llvm::dwarf::DW_OP_consts);

6431 Expr.push_back(C->getAPInt().getSExtValue());

6432 return true;

6433 }

6434

6435

6436

6438 return ToDwarfOpIter(Expr);

6439 }

6440

6441

6442

6443 bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,

6444 uint64_t DwarfOp) {

6446 "Expected arithmetic SCEV type");

6448 unsigned EmitOperator = 0;

6449 for (const auto &Op : CommExpr->operands()) {

6451

6452 if (EmitOperator >= 1)

6453 pushOperator(DwarfOp);

6454 ++EmitOperator;

6455 }

6457 }

6458

6459

6460 bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {

6461 const llvm::SCEV *Inner = C->getOperand(0);

6462 const llvm::Type *Type = C->getType();

6463 uint64_t ToWidth = Type->getIntegerBitWidth();

6464 bool Success = pushSCEV(Inner);

6466 IsSigned ? llvm::dwarf::DW_ATE_signed

6467 : llvm::dwarf::DW_ATE_unsigned};

6468 for (const auto &Op : CastOps)

6469 pushOperator(Op);

6471 }

6472

6473

6474 bool pushSCEV(const llvm::SCEV *S) {

6477 Success &= pushConst(StartInt);

6478

6480 if (U->getValue())

6481 return false;

6482 pushLocation(U->getValue());

6483

6485 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);

6486

6488 Success &= pushSCEV(UDiv->getLHS());

6489 Success &= pushSCEV(UDiv->getRHS());

6490 pushOperator(llvm::dwarf::DW_OP_div);

6491

6493

6496 "Unexpected cast type in SCEV.");

6498

6500 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);

6501

6503

6504

6505 return false;

6506

6507 } else {

6508 return false;

6509 }

6511 }

6512

6513

6514

6515 bool isIdentityFunction(uint64_t Op, const SCEV *S) {

6517 if (C->getAPInt().getSignificantBits() > 64)

6518 return false;

6519 int64_t I = C->getAPInt().getSExtValue();

6520 switch (Op) {

6521 case llvm::dwarf::DW_OP_plus:

6522 case llvm::dwarf::DW_OP_minus:

6523 return I == 0;

6524 case llvm::dwarf::DW_OP_mul:

6525 case llvm::dwarf::DW_OP_div:

6526 return I == 1;

6527 }

6528 }

6529 return false;

6530 }

6531

6532

6533

6534

6535

6536

6537

6538 bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {

6542

6543

6544 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {

6545 if (!pushSCEV(Stride))

6546 return false;

6547 pushOperator(llvm::dwarf::DW_OP_mul);

6548 }

6549 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {

6550 if (!pushSCEV(Start))

6551 return false;

6552 pushOperator(llvm::dwarf::DW_OP_plus);

6553 }

6554 return true;

6555 }

6556

6557

6558 void createOffsetExpr(int64_t Offset, Value *OffsetValue) {

6559 pushLocation(OffsetValue);

6562 dbgs() << "scev-salvage: Generated IV offset expression. Offset: "

6563 << std::to_string(Offset) << "\n");

6564 }

6565

6566

6567

6568

6569 bool createIterCountExpr(const SCEV *S,

6570 const SCEVDbgValueBuilder &IterationCount,

6571 ScalarEvolution &SE) {

6572

6573

6574

6575

6576

6578 return false;

6579

6580 LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S

6581 << '\n');

6582

6584 if (!Rec->isAffine())

6585 return false;

6586

6588 return false;

6589

6590

6591

6592 clone(IterationCount);

6593 if (!SCEVToValueExpr(*Rec, SE))

6594 return false;

6595

6596 return true;

6597 }

6598

6599

6600

6601

6602

6603

6604 bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,

6605 ScalarEvolution &SE) {

6609

6610

6611 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {

6612 if (!pushSCEV(Start))

6613 return false;

6614 pushOperator(llvm::dwarf::DW_OP_minus);

6615 }

6616 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {

6617 if (!pushSCEV(Stride))

6618 return false;

6619 pushOperator(llvm::dwarf::DW_OP_div);

6620 }

6621 return true;

6622 }

6623

6624

6625

6626

6627 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,

6628 SmallVectorImpl<Value *> &DestLocations) {

6630 "Expected the locations vector to contain the IV");

6631

6632

6633

6635 "Expected the location ops to contain the IV.");

6636

6637

6639 for (const auto &Op : LocationOps) {

6640 auto It = find(DestLocations, Op);

6641 if (It != DestLocations.end()) {

6642

6643 DestIndexMap.push_back(std::distance(DestLocations.begin(), It));

6644 continue;

6645 }

6646

6649 }

6650

6651 for (const auto &Op : expr_ops()) {

6653 Op.appendToVector(DestExpr);

6654 continue;

6655 }

6656

6658

6659

6660 uint64_t NewIndex = DestIndexMap[Op.getArg(0)];

6662 }

6663 }

6664};

6665

6666

6667

6668struct DVIRecoveryRec {

6669 DVIRecoveryRec(DbgVariableRecord *DVR)

6670 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}

6671

6672 DbgVariableRecord *DbgRef;

6673 DIExpression *Expr;

6674 bool HadLocationArgList;

6678

6679 void clear() {

6680 for (auto &RE : RecoveryExprs)

6681 RE.reset();

6682 RecoveryExprs.clear();

6683 }

6684

6685 ~DVIRecoveryRec() { clear(); }

6686};

6687}

6688

6689

6690

6691

6693 auto expr_ops = ToDwarfOpIter(Expr);

6694 unsigned Count = 0;

6695 for (auto Op : expr_ops)

6699}

6700

6701

6702

6703

6704template

6708 "contain any DW_OP_llvm_arg operands.");

6712}

6713

6714

6715template

6720 "Expected expression that references DIArglist locations using "

6721 "DW_OP_llvm_arg operands.");

6723 for (Value *V : Locations)

6726 DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));

6728}

6729

6730

6731

6732

6733

6734

6740 if (NumLLVMArgs == 0) {

6741

6744

6745

6747 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");

6750 } else {

6751

6753 }

6754

6755

6756

6757

6760 SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});

6762 }

6763}

6764

6765

6766

6767

6768

6769

6773

6774

6777 LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"

6778 << "scev-salvage: post-LSR: " << *DbgVal << '\n');

6779 assert(DVIRec.Expr && "Expected an expression");

6781

6782

6783

6784 if (!DVIRec.HadLocationArgList) {

6785 assert(DVIRec.LocationOps.size() == 1 &&

6786 "Unexpected number of location ops.");

6787

6788

6789

6790 Value *CachedValue =

6793 } else {

6795 for (WeakVH VH : DVIRec.LocationOps) {

6798 }

6802 }

6803 LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');

6804}

6805

6807 llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,

6808 const SCEV *SCEVInductionVar,

6809 SCEVDbgValueBuilder IterCountExpr) {

6810

6812 return false;

6813

6814

6815

6816

6817

6819

6820

6821

6823 LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);

6825 NewLocationOps.push_back(LSRInductionVar);

6826

6827 for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {

6828 WeakVH VH = DVIRec.LocationOps[i];

6829

6830

6831

6834 LocationOpIndexMap[i] = NewLocationOps.size() - 1;

6835 LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i

6836 << " now at index " << LocationOpIndexMap[i] << "\n");

6837 continue;

6838 }

6839

6840

6841

6844 LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i

6845 << " refers to a location that is now undef or erased. "

6846 "Salvage abandoned.\n");

6847 return false;

6848 }

6849

6850 LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i

6851 << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");

6852

6853 DVIRec.RecoveryExprs[i] = std::make_unique();

6854 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();

6855

6856

6857

6858 if (std::optional Offset =

6860 if (Offset->getSignificantBits() <= 64)

6861 SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);

6862 else

6863 return false;

6864 } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,

6865 SE))

6866 return false;

6867 }

6868

6869

6870

6873 assert(DVIRec.RecoveryExprs.size() == 1 &&

6874 "Expected only a single recovery expression for an empty "

6875 "DIExpression.");

6876 assert(DVIRec.RecoveryExprs[0] &&

6877 "Expected a SCEVDbgSalvageBuilder for location 0");

6878 SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();

6879 B->appendToVectors(NewExpr, NewLocationOps);

6880 }

6881 for (const auto &Op : DVIRec.Expr->expr_ops()) {

6882

6885 continue;

6886 }

6887

6888 uint64_t LocationArgIndex = Op.getArg(0);

6889 SCEVDbgValueBuilder *DbgBuilder =

6890 DVIRec.RecoveryExprs[LocationArgIndex].get();

6891

6892

6893

6894 if (!DbgBuilder) {

6896 assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&

6897 "Expected a positive index for the location-op position.");

6898 NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);

6899 continue;

6900 }

6901

6902 DbgBuilder->appendToVectors(NewExpr, NewLocationOps);

6903 }

6904

6906 LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DbgRef << "\n");

6907 return true;

6908}

6909

6910

6911

6914 SmallVector<std::unique_ptr, 2> &DVIToUpdate) {

6915 if (DVIToUpdate.empty())

6916 return;

6917

6918 const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);

6919 assert(SCEVInductionVar &&

6920 "Anticipated a SCEV for the post-LSR induction variable");

6921

6924 if (!IVAddRec->isAffine())

6925 return;

6926

6927

6929 return;

6930

6931

6932 SCEVDbgValueBuilder IterCountExpr;

6933 IterCountExpr.pushLocation(LSRInductionVar);

6934 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))

6935 return;

6936

6937 LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar

6938 << '\n');

6939

6940 for (auto &DVIRec : DVIToUpdate) {

6941 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,

6942 IterCountExpr);

6943 }

6944 }

6945}

6946

6947

6948

6949

6952 SmallVector<std::unique_ptr, 2> &SalvageableDVISCEVs) {

6953 for (const auto &B : L->getBlocks()) {

6954 for (auto &I : *B) {

6956 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())

6957 continue;

6958

6959

6960

6961 if (DbgVal.isKillLocation())

6962 continue;

6963

6964

6965

6966 const auto &HasTranslatableLocationOps =

6968 for (const auto LocOp : DbgValToTranslate.location_ops()) {

6969 if (!LocOp)

6970 return false;

6971

6972 if (!SE.isSCEVable(LocOp->getType()))

6973 return false;

6974

6977 return false;

6978 }

6979 return true;

6980 };

6981

6982 if (!HasTranslatableLocationOps(DbgVal))

6983 continue;

6984

6985 std::unique_ptr NewRec =

6986 std::make_unique(&DbgVal);

6987

6988

6989

6990 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());

6991 for (const auto LocOp : DbgVal.location_ops()) {

6992 NewRec->SCEVs.push_back(SE.getSCEV(LocOp));

6993 NewRec->LocationOps.push_back(LocOp);

6994 NewRec->HadLocationArgList = DbgVal.hasArgList();

6995 }

6996 SalvageableDVISCEVs.push_back(std::move(NewRec));

6997 }

6998 }

6999 }

7000}

7001

7002

7003

7004

7006 const LSRInstance &LSR) {

7007

7008 auto IsSuitableIV = [&](PHINode *P) {

7010 return false;

7013 return false;

7014 };

7015

7016

7017

7018

7019 for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {

7020 if (IV)

7021 continue;

7022

7023

7025

7026 if (IsSuitableIV(P))

7027 return P;

7028 }

7029

7030 for (PHINode &P : L.getHeader()->phis()) {

7031 if (IsSuitableIV(&P))

7032 return &P;

7033 }

7034 return nullptr;

7035}

7036

7042

7043

7044

7047

7049 std::unique_ptr MSSAU;

7050 if (MSSA)

7051 MSSAU = std::make_unique(MSSA);

7052

7053

7054 const LSRInstance &Reducer =

7055 LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());

7056 Changed |= Reducer.getChanged();

7057

7058

7063#if LLVM_ENABLE_ABI_BREAKING_CHECKS

7065#endif

7066 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);

7067 Rewriter.clear();

7068 if (numFolded) {

7071 MSSAU.get());

7073 }

7074 }

7075

7076

7077

7078

7079

7080 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {

7085 Rewriter.clear();

7086 if (Rewrites) {

7089 MSSAU.get());

7091 }

7092 }

7093

7094 if (SalvageableDVIRecords.empty())

7096

7097

7098

7099

7100 for (const auto &L : LI) {

7103 else {

7104 LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "

7105 "could not be identified.\n");

7106 }

7107 }

7108

7109 for (auto &Rec : SalvageableDVIRecords)

7110 Rec->clear();

7111 SalvageableDVIRecords.clear();

7113}

7114

7115bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {

7116 if (skipLoop(L))

7117 return false;

7118

7119 auto &IU = getAnalysis().getIU();

7120 auto &SE = getAnalysis().getSE();

7121 auto &DT = getAnalysis().getDomTree();

7122 auto &LI = getAnalysis().getLoopInfo();

7123 const auto &TTI = getAnalysis().getTTI(

7124 *L->getHeader()->getParent());

7125 auto &AC = getAnalysis().getAssumptionCache(

7126 *L->getHeader()->getParent());

7127 auto &TLI = getAnalysis().getTLI(

7128 *L->getHeader()->getParent());

7129 auto *MSSAAnalysis = getAnalysisIfAvailable();

7131 if (MSSAAnalysis)

7132 MSSA = &MSSAAnalysis->getMSSA();

7134}

7135

7142

7144 if (AR.MSSA)

7146 return PA;

7147}

7148

7149char LoopStrengthReduce::ID = 0;

7150

7152 "Loop Strength Reduction", false, false)

7161

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

Function Alias Analysis false

static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)

static const Function * getParent(const Value *V)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

This file defines the DenseSet and SmallDenseSet classes.

This file contains constants used for implementing Dwarf debug support.

early cse Early CSE w MemorySSA

Module.h This file contains the declarations for the Module class.

This defines the Use class.

iv Induction Variable Users

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

This header provides classes for managing per-loop analyses.

static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)

Definition LoopStrengthReduce.cpp:6806

static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))

static Value * getWideOperand(Value *Oper)

IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.

Definition LoopStrengthReduce.cpp:2993

static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)

Return true if the given add can be sign-extended without changing its value.

Definition LoopStrengthReduce.cpp:810

static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)

Return true if the SCEV represents a value that may end up as a post-increment operation.

Definition LoopStrengthReduce.cpp:3912

static void restorePreTransformState(DVIRecoveryRec &DVIRec)

Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.

Definition LoopStrengthReduce.cpp:6775

static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)

If S involves the addition of a constant integer value, return that integer value,...

Definition LoopStrengthReduce.cpp:935

static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)

Definition LoopStrengthReduce.cpp:615

static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)

Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...

Definition LoopStrengthReduce.cpp:2974

static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))

static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)

Test whether we know how to expand the current formula.

Definition LoopStrengthReduce.cpp:1916

static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)

Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).

Definition LoopStrengthReduce.cpp:6950

static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)

Return true if the given mul can be sign-extended without changing its value.

Definition LoopStrengthReduce.cpp:818

static const unsigned MaxSCEVSalvageExpressionSize

Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.

Definition LoopStrengthReduce.cpp:144

static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)

Return true if this AddRec is already a phi in its loop.

Definition LoopStrengthReduce.cpp:1090

static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)

Definition LoopStrengthReduce.cpp:1962

static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))

static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))

static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)

Returns true if the specified instruction is using the specified value as an address.

Definition LoopStrengthReduce.cpp:992

static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)

If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...

Definition LoopStrengthReduce.cpp:966

static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)

Overwrites DVI with the location and Ops as the DIExpression.

Definition LoopStrengthReduce.cpp:6705

static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)

Definition LoopStrengthReduce.cpp:1937

static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))

static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))

static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)

Definition LoopStrengthReduce.cpp:6041

static const SCEV * getExprBase(const SCEV *S)

Return an approximation of this SCEV expression's "base", or NULL for any constant.

Definition LoopStrengthReduce.cpp:3009

static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)

Definition LoopStrengthReduce.cpp:2007

static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)

Ideally pick the PHI IV inserted by ScalarEvolutionExpander.

Definition LoopStrengthReduce.cpp:7005

static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)

Definition LoopStrengthReduce.cpp:5319

static const unsigned MaxIVUsers

MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.

Definition LoopStrengthReduce.cpp:138

static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)

Check if expanding this expression is likely to incur significant cost.

Definition LoopStrengthReduce.cpp:1110

static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)

Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...

Definition LoopStrengthReduce.cpp:6770

static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)

Return the type of the memory being accessed.

Definition LoopStrengthReduce.cpp:1037

static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)

Returns the total number of DW_OP_llvm_arg operands in the expression.

Definition LoopStrengthReduce.cpp:6692

static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)

Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.

Definition LoopStrengthReduce.cpp:6912

static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))

static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)

Write the new expression and new location ops for the dbg.value.

Definition LoopStrengthReduce.cpp:6735

static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)

Return true if the given addrec can be sign-extended without changing its value.

Definition LoopStrengthReduce.cpp:802

static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)

Recursion helper for initialMatch.

Definition LoopStrengthReduce.cpp:540

static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)

Check if the addressing mode defined by F is completely folded in LU at isel time.

Definition LoopStrengthReduce.cpp:1945

static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))

static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))

static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)

Overwrite DVI with locations placed into a DIArglist.

Definition LoopStrengthReduce.cpp:6716

static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))

static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)

Definition LoopStrengthReduce.cpp:7037

static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)

Return true if the number of registers needed for the chain is estimated to be less than the number r...

Definition LoopStrengthReduce.cpp:3076

static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)

Split S into subexpressions which can be pulled out into separate registers.

Definition LoopStrengthReduce.cpp:3857

static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)

Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...

Definition LoopStrengthReduce.cpp:830

static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)

Return true if the IVInc can be folded into an addressing mode.

Definition LoopStrengthReduce.cpp:3368

static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)

Extend/Truncate Expr to ToTy considering post-inc uses in Loops.

Definition LoopStrengthReduce.cpp:4410

static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)

Definition LoopStrengthReduce.cpp:1380

static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

uint64_t IntrinsicInst * II

PowerPC TLS Dynamic Call Fixup

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the PointerIntPair class.

const SmallVectorImpl< MachineOperand > & Cond

Remove Loads Into Fake Uses

static bool isValid(const char C)

Returns true if C is a valid mangled character: <0-9a-zA-Z_>.

SI optimize exec mask operations pre RA

This file implements a set that has insertion order iteration characteristics.

This file implements the SmallBitVector class.

This file defines the SmallPtrSet class.

This file defines the SmallSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

static const unsigned UnknownAddressSpace

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

Virtual Register Rewriter

static const uint32_t IV[8]

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

bool isNegative() const

Determine sign of this APInt.

LLVM_ABI APInt sdiv(const APInt &RHS) const

Signed division function for APInt.

unsigned getSignificantBits() const

Get the minimum bit size for this signed APInt.

LLVM_ABI APInt srem(const APInt &RHS) const

Function for signed remainder operation.

int64_t getSExtValue() const

Get sign extended value.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)

AnalysisUsage & addPreservedID(const void *ID)

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

A cache of @llvm.assume calls within a function.

An instruction that atomically checks whether a specified value is in a memory location,...

an instruction that atomically reads a memory location, combines it with another value,...

LLVM Basic Block Representation.

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

InstListType::iterator iterator

Instruction iterators...

void moveBefore(BasicBlock *MovePos)

Unlink this basic block from its current function and insert it into the function that MovePos lives ...

LLVM_ABI bool isLandingPad() const

Return true if this basic block is a landing pad.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

BinaryOps getOpcode() const

static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)

Construct a binary instruction, given the opcode and the two operands.

bool isUnconditional() const

Value * getCondition() const

static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)

Returns the opcode necessary to cast Val into Ty using usual casting rules.

static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)

This static method returns true if the type Ty is big enough to represent the value V.

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

int64_t getSExtValue() const

Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)

iterator_range< expr_op_iterator > expr_ops() const

static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)

Append the opcodes Ops to DIExpr.

unsigned getNumElements() const

static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)

Append Ops with operations to apply the Offset.

LLVM_ABI bool isComplex() const

Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...

LLVM_ABI LLVMContext & getContext()

Record of a variable value-assignment, aka a non instruction representation of the dbg....

LLVM_ABI bool isKillLocation() const

void setRawLocation(Metadata *NewLocation)

Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...

void setExpression(DIExpression *NewExpr)

DIExpression * getExpression() const

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const

getNode - return the (Post)DominatorTree node for the specified basic block.

bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

properlyDominates - Returns true iff A dominates B and A != B.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const

Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

PointerType * getType() const

Global values are always pointers.

IVStrideUse - Keep track of one use of a strided induction variable.

void transformToPostInc(const Loop *L)

transformToPostInc - Transform the expression to post-inc form for the given loop.

Value * getOperandValToReplace() const

getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...

void setUser(Instruction *NewUser)

setUser - Assign a new user instruction for this use.

Analysis pass that exposes the IVUsers for a loop.

ilist< IVStrideUse >::const_iterator const_iterator

LLVM_ABI void print(raw_ostream &OS) const

CostType getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY

Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.

LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY

Return the number of successors that this instruction has.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI void moveBefore(InstListType::iterator InsertPos)

Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...

bool isEHPad() const

Return true if the instruction is a variety of EH-block.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI Type * getAccessType() const LLVM_READONLY

Return the type this instruction accesses in memory, if any.

const char * getOpcodeName() const

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

This is an important class for using LLVM in a threaded context.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

An instruction for reading from memory.

void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const

Return all blocks inside the loop that have successors outside of the loop.

BlockT * getHeader() const

unsigned getLoopDepth() const

Return the nesting level of this loop.

The legacy pass manager's analysis pass to compute loop information.

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Definition LoopStrengthReduce.cpp:7136

Represents a single loop in the control flow graph.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

An analysis that produces MemorySSA for a function.

Encapsulates MemorySSA, including all data associated with memory accesses.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

iterator_range< const_block_iterator > blocks() const

op_range incoming_values()

void setIncomingValue(unsigned i, Value *V)

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

static unsigned getIncomingValueNumForOperand(unsigned i)

int getBasicBlockIndex(const BasicBlock *BB) const

Return the first index of the specified basic block in the value list for this PHI.

unsigned getNumIncomingValues() const

Return the number of incoming edges.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

This node represents an addition of some number of SCEVs.

This node represents a polynomial recurrence on the trip count of the specified loop.

const SCEV * getStart() const

const SCEV * getStepRecurrence(ScalarEvolution &SE) const

Constructs and returns the recurrence indicating how much this expression steps by.

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

const Loop * getLoop() const

This class represents a constant integer value.

ConstantInt * getValue() const

const APInt & getAPInt() const

This class uses information about analyze scalars to rewrite expressions in canonical form.

This node represents multiplication of some number of SCEVs.

bool hasNoUnsignedWrap() const

bool hasNoSignedWrap() const

ArrayRef< const SCEV * > operands() const

This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...

This class represents an analyzed expression in the program.

LLVM_ABI ArrayRef< const SCEV * > operands() const

Return operands of this SCEV expression.

unsigned short getExpressionSize() const

LLVM_ABI bool isZero() const

Return true if the expression is a constant zero.

SCEVTypes getSCEVType() const

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

The main scalar evolution driver.

LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

const SCEV * getZero(Type *Ty)

Return a SCEV for the constant 0 of a specific type.

LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const

Return the size in bits of the specified type, for which isSCEVable must return true.

LLVM_ABI const SCEV * getConstant(ConstantInt *V)

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)

Return a SCEV corresponding to a conversion of the input value to the specified type.

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)

Get an add recurrence expression for the specified loop.

LLVM_ABI bool isSCEVable(Type *Ty) const

Test if values of the given type are analyzable within the SCEV framework.

LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const

Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...

LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)

getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...

LLVM_ABI bool containsUndefs(const SCEV *S) const

Return true if the SCEV expression contains an undef value.

LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)

LLVM_ABI const SCEV * getVScale(Type *Ty)

LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)

Return true if the given SCEV changes value in a known way in the specified loop.

LLVM_ABI const SCEV * getPointerBase(const SCEV *V)

Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...

LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

LLVM_ABI const SCEV * getUnknown(Value *V)

LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)

Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...

LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)

Return true if elements that makes up the given SCEV properly dominate the specified basic block.

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

LLVM_ABI bool containsErasedValue(const SCEV *S) const

Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.

LLVMContext & getContext() const

size_type size() const

Determine the number of elements in the SetVector.

iterator end()

Get an iterator to the end of the SetVector.

iterator begin()

Get an iterator to the beginning of the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

int find_first() const

Returns the index of the first set bit, -1 if none of the bits are set.

iterator_range< const_set_bits_iterator > set_bits() const

int find_next(unsigned Prev) const

Returns the index of the next set bit following the "Prev" bit.

size_type size() const

Returns the number of bits in this bitvector.

void resize(unsigned N, bool t=false)

Grow or shrink the bitvector.

size_type count() const

Returns the number of bits which are set.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

void insert_range(Range &&R)

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

std::pair< const_iterator, bool > insert(const T &V)

insert - Insert an element into the set if it isn't already there.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void assign(size_type NumElts, ValueParamT Elt)

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

iterator erase(const_iterator CI)

typename SuperClass::const_iterator const_iterator

typename SuperClass::iterator iterator

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

static StackOffset get(int64_t Fixed, int64_t Scalable)

An instruction for storing to memory.

Provides information about what library functions are available for the current target.

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI bool shouldDropLSRSolutionIfLessProfitable() const

Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...

LLVM_ABI bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const

Return true if LSR cost of C1 is lower than C2.

LLVM_ABI bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const

LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const

LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const

Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...

LLVM_ABI bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const

LLVM_ABI bool isTypeLegal(Type *Ty) const

Return true if this type is legal.

LLVM_ABI bool isLegalAddImmediate(int64_t Imm) const

Return true if the specified immediate is legal add immediate, that is the target has add instruction...

LLVM_ABI bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const

Return true if the target can save a compare for loop count, for example hardware loop saves a compar...

LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const

@ MIM_PostInc

Post-incrementing.

LLVM_ABI bool canMacroFuseCmp() const

Return true if the target can fuse a compare and branch.

AddressingModeKind

Which addressing mode Loop Strength Reduction will try to generate.

@ AMK_PostIndexed

Prefer post-indexed addressing mode.

@ AMK_All

Consider all addressing modes.

@ AMK_PreIndexed

Prefer pre-indexed addressing mode.

@ AMK_None

Don't prefer any addressing mode.

LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const

Return true if it's free to truncate a value of type Ty1 to type Ty2.

This class represents a truncation of integer types.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const

Return true if this is a type whose size is a known multiple of vscale.

bool isPointerTy() const

True if this is an instance of PointerType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI int getFPMantissaWidth() const

Return the width of the mantissa of this type.

bool isVoidTy() const

Return true if this is 'void'.

void setOperand(unsigned i, Value *Val)

LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)

Replace uses of one Value with another.

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const

Print the name of this Value out to the specified raw_ostream.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

iterator_range< use_iterator > uses()

A nullable Value handle that is nullable.

int getNumOccurrences() const

std::pair< iterator, bool > insert(const ValueT &V)

size_type count(const_arg_type_t< ValueT > V) const

Return 1 if the specified key is in the set, 0 otherwise.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

class_match< const SCEVVScale > m_SCEVVScale()

bind_cst_ty m_scev_APInt(const APInt *&C)

Match an SCEV constant and bind it to an APInt.

class_match< const SCEVConstant > m_SCEVConstant()

SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)

bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)

bool match(const SCEV *S, const Pattern &P)

class_match< const Loop > m_Loop()

cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)

Match an SCEV constant with a plain unsigned integer.

class_match< const SCEV > m_SCEV()

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

@ DW_OP_LLVM_arg

Only used in LLVM metadata.

@ DW_OP_LLVM_convert

Only used in LLVM metadata.

Sequence

A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...

DiagnosticInfoOptimizationBase::Argument NV

NodeAddr< PhiNode * > Phi

NodeAddr< UseNode * > Use

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI iterator begin() const

BaseReg

Stack frame base register. Bit 0 of FREInfo.Info.

unsigned KindType

For isa, dyn_cast, etc operations on TelemetryInfo.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

FunctionAddr VTableAddr Value

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)

Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...

bool operator!=(uint64_t V1, const APInt &V2)

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

LLVM_ABI char & LoopSimplifyID

bool isa_and_nonnull(const Y &Val)

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

DomTreeNodeBase< BasicBlock > DomTreeNode

AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager

The loop analysis manager.

LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)

Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)

Examine each PHI in the given block and delete it if it is dead.

LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)

auto reverse(ContainerTy &&C)

LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)

Denormalize S to be post-increment for all loops present in Loops.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

FunctionAddr VTableAddr Count

LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)

Attempt to constant fold a cast with the specified operand.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)

This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...

LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key

LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)

Normalize S to be post-increment for all loops present in Loops.

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

DWARFExpression::Operation Op

LLVM_ABI Pass * createLoopStrengthReducePass()

Definition LoopStrengthReduce.cpp:7162

LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")

If this edge is a critical edge, insert a new node to split the critical edge.

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...

constexpr unsigned BitWidth

LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)

Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

SmallPtrSet< const Loop *, 2 > PostIncLoopSet

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)

If the final value of any expressions that are recurrent in the loop can be computed,...

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)

Filter the DbgRecord range to DbgVariableRecord types only and downcast.

bool SCEVExprContains(const SCEV *Root, PredTy Pred)

Return true if any node in Root satisfies the predicate Pred.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

Attributes of a target dependent hardware loop.

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...

TargetTransformInfo & TTI

Information about a load/store intrinsic defined by the target.

Value * PtrVal

This is the pointer that the intrinsic is loading from or storing to.