LLVM: lib/Transforms/Scalar/LoopStrengthReduce.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
118#include
119#include
120#include
121#include
122#include
123#include
124#include
125#include
126#include
127#include
128
129using namespace llvm;
131
132#define DEBUG_TYPE "loop-reduce"
133
134
135
136
137
139
140
141
142
143
145
146
149 cl::desc("Enable LSR phi elimination"));
150
151
154 cl::desc("Add instruction count to a LSR cost model"));
155
156
159 cl::desc("Narrow LSR complex solution using"
160 " expectation of registers number"));
161
162
163
166 cl::desc("Narrow LSR search space by filtering non-optimal formulae"
167 " with the same ScaledReg and Scale"));
168
171 cl::desc("A flag that overrides the target's preferred addressing mode."),
175 "Prefer pre-indexed addressing mode"),
177 "Prefer post-indexed addressing mode"),
179
182 cl::init(std::numeric_limits<uint16_t>::max()),
183 cl::desc("LSR search space complexity limit"));
184
187 cl::desc("The limit on recursion depth for LSRs setup cost"));
188
191 cl::desc("Attempt to drop solution if it is less profitable"));
192
195 cl::desc("Enable analysis of vscale-relative immediates in LSR"));
196
199 cl::desc("Avoid using scaled registers with vscale-relative addressing"));
200
201#ifndef NDEBUG
202
205 cl::desc("Stress test LSR IV chains"));
206#else
208#endif
209
210namespace {
211
212struct MemAccessTy {
213
215 std::numeric_limits::max();
216
217 Type *MemTy = nullptr;
219
220 MemAccessTy() = default;
221 MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
222
224 return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
225 }
226
228
229 static MemAccessTy getUnknown(LLVMContext &Ctx,
230 unsigned AS = UnknownAddressSpace) {
231 return MemAccessTy(Type::getVoidTy(Ctx), AS);
232 }
233
235};
236
237
238class RegSortData {
239public:
240
241
242 SmallBitVector UsedByIndices;
243
244 void print(raw_ostream &OS) const;
245 void dump() const;
246};
247
248
249
251 constexpr Immediate(ScalarTy MinVal, bool Scalable)
252 : FixedOrScalableQuantity(MinVal, Scalable) {}
253
254 constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
255 : FixedOrScalableQuantity(V) {}
256
257public:
258 constexpr Immediate() = delete;
259
260 static constexpr Immediate getFixed(ScalarTy MinVal) {
261 return {MinVal, false};
262 }
263 static constexpr Immediate getScalable(ScalarTy MinVal) {
264 return {MinVal, true};
265 }
266 static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {
267 return {MinVal, Scalable};
268 }
269 static constexpr Immediate getZero() { return {0, false}; }
270 static constexpr Immediate getFixedMin() {
271 return {std::numeric_limits<int64_t>::min(), false};
272 }
273 static constexpr Immediate getFixedMax() {
274 return {std::numeric_limits<int64_t>::max(), false};
275 }
276 static constexpr Immediate getScalableMin() {
277 return {std::numeric_limits<int64_t>::min(), true};
278 }
279 static constexpr Immediate getScalableMax() {
280 return {std::numeric_limits<int64_t>::max(), true};
281 }
282
283 constexpr bool isLessThanZero() const { return Quantity < 0; }
284
285 constexpr bool isGreaterThanZero() const { return Quantity > 0; }
286
287 constexpr bool isCompatibleImmediate(const Immediate &Imm) const {
288 return isZero() || Imm.isZero() || Imm.Scalable == Scalable;
289 }
290
291 constexpr bool isMin() const {
292 return Quantity == std::numeric_limits::min();
293 }
294
295 constexpr bool isMax() const {
296 return Quantity == std::numeric_limits::max();
297 }
298
299
300 constexpr Immediate addUnsigned(const Immediate &RHS) const {
301 assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
302 ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();
303 return {Value, Scalable || RHS.isScalable()};
304 }
305
306 constexpr Immediate subUnsigned(const Immediate &RHS) const {
307 assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
308 ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();
309 return {Value, Scalable || RHS.isScalable()};
310 }
311
312
313 constexpr Immediate mulUnsigned(const ScalarTy RHS) const {
314 ScalarTy Value = (uint64_t)Quantity * RHS;
315 return {Value, Scalable};
316 }
317
318
319 const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {
320 const SCEV *S = SE.getConstant(Ty, Quantity);
321 if (Scalable)
323 return S;
324 }
325
326 const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {
327 const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);
328 if (Scalable)
330 return NegS;
331 }
332
333 const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {
335 if (Scalable)
337 return SU;
338 }
339};
340
341
342
343
344
345struct KeyOrderTargetImmediate {
346 bool operator()(const Immediate &LHS, const Immediate &RHS) const {
347 if (LHS.isScalable() && .isScalable())
348 return false;
349 if (.isScalable() && RHS.isScalable())
350 return true;
351 return LHS.getKnownMinValue() < RHS.getKnownMinValue();
352 }
353};
354
355
356
357
358struct KeyOrderSizeTAndImmediate {
359 bool operator()(const std::pair<size_t, Immediate> &LHS,
360 const std::pair<size_t, Immediate> &RHS) const {
361 size_t LSize = LHS.first;
362 size_t RSize = RHS.first;
363 if (LSize != RSize)
364 return LSize < RSize;
365 return KeyOrderTargetImmediate()(LHS.second, RHS.second);
366 }
367};
368}
369
370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
371void RegSortData::print(raw_ostream &OS) const {
372 OS << "[NumUses=" << UsedByIndices.count() << ']';
373}
374
377}
378#endif
379
380namespace {
381
382
383class RegUseTracker {
384 using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
385
386 RegUsesTy RegUsesMap;
388
389public:
390 void countRegister(const SCEV *Reg, size_t LUIdx);
391 void dropRegister(const SCEV *Reg, size_t LUIdx);
392 void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
393
394 bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
395
396 const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
397
398 void clear();
399
402
406 const_iterator end() const { return RegSequence.end(); }
407};
408
409}
410
411void
412RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
413 std::pair<RegUsesTy::iterator, bool> Pair = RegUsesMap.try_emplace(Reg);
414 RegSortData &RSD = Pair.first->second;
415 if (Pair.second)
417 RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
418 RSD.UsedByIndices.set(LUIdx);
419}
420
421void
422RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
423 RegUsesTy::iterator It = RegUsesMap.find(Reg);
424 assert(It != RegUsesMap.end());
425 RegSortData &RSD = It->second;
426 assert(RSD.UsedByIndices.size() > LUIdx);
427 RSD.UsedByIndices.reset(LUIdx);
428}
429
430void
431RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
432 assert(LUIdx <= LastLUIdx);
433
434
435
436 for (auto &Pair : RegUsesMap) {
437 SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
438 if (LUIdx < UsedByIndices.size())
439 UsedByIndices[LUIdx] =
440 LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
441 UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
442 }
443}
444
445bool
446RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
447 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
448 if (I == RegUsesMap.end())
449 return false;
450 const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
452 if (i == -1) return false;
453 if ((size_t)i != LUIdx) return true;
454 return UsedByIndices.find_next(i) != -1;
455}
456
457const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
458 RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
459 assert(I != RegUsesMap.end() && "Unknown register!");
460 return I->second.UsedByIndices;
461}
462
463void RegUseTracker::clear() {
464 RegUsesMap.clear();
466}
467
468namespace {
469
470
471
472struct Formula {
473
474 GlobalValue *BaseGV = nullptr;
475
476
477 Immediate BaseOffset = Immediate::getZero();
478
479
480 bool HasBaseReg = false;
481
482
483 int64_t Scale = 0;
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
500
501
502
503 const SCEV *ScaledReg = nullptr;
504
505
506
507
508 Immediate UnfoldedOffset = Immediate::getZero();
509
510 Formula() = default;
511
512 void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
513
515
516 void canonicalize(const Loop &L);
517
518 bool unscale();
519
520 bool hasZeroEnd() const;
521
522 bool countsDownToZero() const;
523
524 size_t getNumRegs() const;
526
527 void deleteBaseReg(const SCEV *&S);
528
529 bool referencesReg(const SCEV *S) const;
530 bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
531 const RegUseTracker &RegUses) const;
532
533 void print(raw_ostream &OS) const;
534 void dump() const;
535};
536
537}
538
539
544
547 return;
548 }
549
550
552 for (const SCEV *S : Add->operands())
554 return;
555 }
556
557
558 const SCEV *Start, *Step;
559 const Loop *ARLoop;
562 !Start->isZero()) {
565
567 L, Good, Bad, SE);
568 return;
569 }
570
571
573 if (Mul->getOperand(0)->isAllOnesValue()) {
576
582 for (const SCEV *S : MyGood)
584 for (const SCEV *S : MyBad)
586 return;
587 }
588
589
590
592}
593
594
595
596void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
600 if (!Good.empty()) {
601 const SCEV *Sum = SE.getAddExpr(Good);
603 BaseRegs.push_back(Sum);
604 HasBaseReg = true;
605 }
606 if (!Bad.empty()) {
607 const SCEV *Sum = SE.getAddExpr(Bad);
609 BaseRegs.push_back(Sum);
610 HasBaseReg = true;
611 }
612 canonicalize(*L);
613}
614
620
621
622
623
624bool Formula::isCanonical(const Loop &L) const {
625 assert((Scale == 0 || ScaledReg) &&
626 "ScaledReg must be non-null if Scale is non-zero");
627
628 if (!ScaledReg)
629 return BaseRegs.size() <= 1;
630
631 if (Scale != 1)
632 return true;
633
634 if (Scale == 1 && BaseRegs.empty())
635 return false;
636
638 return true;
639
640
641
642
643 return none_of(BaseRegs, [&L](const SCEV *S) {
645 });
646}
647
648
649
650
651
652
653
654void Formula::canonicalize(const Loop &L) {
656 return;
657
658 if (BaseRegs.empty()) {
659
660 assert(ScaledReg && "Expected 1*reg => reg");
661 assert(Scale == 1 && "Expected 1*reg => reg");
662 BaseRegs.push_back(ScaledReg);
663 Scale = 0;
664 ScaledReg = nullptr;
665 return;
666 }
667
668
669 if (!ScaledReg) {
670 ScaledReg = BaseRegs.pop_back_val();
671 Scale = 1;
672 }
673
674
675
676
678 auto I = find_if(BaseRegs, [&L](const SCEV *S) {
680 });
681 if (I != BaseRegs.end())
683 }
685}
686
687
688
689
690
691bool Formula::unscale() {
692 if (Scale != 1)
693 return false;
694 Scale = 0;
695 BaseRegs.push_back(ScaledReg);
696 ScaledReg = nullptr;
697 return true;
698}
699
700bool Formula::hasZeroEnd() const {
701 if (UnfoldedOffset || BaseOffset)
702 return false;
703 if (BaseRegs.size() != 1 || ScaledReg)
704 return false;
705 return true;
706}
707
708bool Formula::countsDownToZero() const {
709 if (!hasZeroEnd())
710 return false;
711 assert(BaseRegs.size() == 1 && "hasZeroEnd should mean one BaseReg");
712 const APInt *StepInt;
714 return false;
716}
717
718
719
720size_t Formula::getNumRegs() const {
721 return !!ScaledReg + BaseRegs.size();
722}
723
724
725
726Type *Formula::getType() const {
727 return !BaseRegs.empty() ? BaseRegs.front()->getType() :
728 ScaledReg ? ScaledReg->getType() :
729 BaseGV ? BaseGV->getType() :
730 nullptr;
731}
732
733
734void Formula::deleteBaseReg(const SCEV *&S) {
735 if (&S != &BaseRegs.back())
737 BaseRegs.pop_back();
738}
739
740
741bool Formula::referencesReg(const SCEV *S) const {
742 return S == ScaledReg || is_contained(BaseRegs, S);
743}
744
745
746
747bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
748 const RegUseTracker &RegUses) const {
749 if (ScaledReg)
750 if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
751 return true;
752 for (const SCEV *BaseReg : BaseRegs)
753 if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
754 return true;
755 return false;
756}
757
758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
759void Formula::print(raw_ostream &OS) const {
760 bool First = true;
761 if (BaseGV) {
762 if () OS << " + "; else First = false;
764 }
765 if (BaseOffset.isNonZero()) {
766 if () OS << " + "; else First = false;
767 OS << BaseOffset;
768 }
769 for (const SCEV *BaseReg : BaseRegs) {
770 if () OS << " + "; else First = false;
771 OS << "reg(" << *BaseReg << ')';
772 }
773 if (HasBaseReg && BaseRegs.empty()) {
774 if () OS << " + "; else First = false;
775 OS << "**error: HasBaseReg**";
776 } else if (!HasBaseReg && !BaseRegs.empty()) {
777 if () OS << " + "; else First = false;
778 OS << "**error: !HasBaseReg**";
779 }
780 if (Scale != 0) {
781 if () OS << " + "; else First = false;
782 OS << Scale << "*reg(";
783 if (ScaledReg)
784 OS << *ScaledReg;
785 else
786 OS << "";
787 OS << ')';
788 }
789 if (UnfoldedOffset.isNonZero()) {
790 if () OS << " + ";
791 OS << "imm(" << UnfoldedOffset << ')';
792 }
793}
794
797}
798#endif
799
800
801
807
808
809
815
816
817
824
825
826
827
828
829
832 bool IgnoreSignificantBits = false) {
833
836
837
839 if (RC) {
841
842
843 if (RA.isAllOnes()) {
844 if (LHS->getType()->isPointerTy())
845 return nullptr;
847 }
848
849 if (RA == 1)
850 return LHS;
851 }
852
853
855 if (!RC)
856 return nullptr;
857 const APInt &LA = C->getAPInt();
860 return nullptr;
862 }
863
864
866 if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
868 IgnoreSignificantBits);
869 if (!Step) return nullptr;
871 IgnoreSignificantBits);
872 if (!Start) return nullptr;
873
874
875
877 }
878 return nullptr;
879 }
880
881
885 for (const SCEV *S : Add->operands()) {
887 if () return nullptr;
889 }
891 }
892 return nullptr;
893 }
894
895
898
900 if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {
904 if (LC && RC) {
907 if (LOps == ROps)
908 return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);
909 }
910 }
911 }
912
914 bool Found = false;
915 for (const SCEV *S : Mul->operands()) {
916 if (!Found)
918 IgnoreSignificantBits)) {
919 S = Q;
920 Found = true;
921 }
922 Ops.push_back(S);
923 }
925 }
926 return nullptr;
927 }
928
929
930 return nullptr;
931}
932
933
934
938 if (C->getSignificantBits() <= 64) {
940 return Immediate::getFixed(C->getSExtValue());
941 }
945 if (Result.isNonZero())
947 return Result;
951 if (Result.isNonZero())
953
955 return Result;
959 return Immediate::getScalable(C->getSExtValue());
960 }
961 return Immediate::getZero();
962}
963
964
965
970 return GV;
971 }
975 if (Result)
977 return Result;
981 if (Result)
983
985 return Result;
986 }
987 return nullptr;
988}
989
990
991
996 if (SI->getPointerOperand() == OperandVal)
997 isAddress = true;
999
1000
1001 switch (II->getIntrinsicID()) {
1002 case Intrinsic::memset:
1003 case Intrinsic::prefetch:
1004 case Intrinsic::masked_load:
1005 if (II->getArgOperand(0) == OperandVal)
1006 isAddress = true;
1007 break;
1008 case Intrinsic::masked_store:
1009 if (II->getArgOperand(1) == OperandVal)
1010 isAddress = true;
1011 break;
1012 case Intrinsic::memmove:
1013 case Intrinsic::memcpy:
1014 if (II->getArgOperand(0) == OperandVal ||
1015 II->getArgOperand(1) == OperandVal)
1016 isAddress = true;
1017 break;
1018 default: {
1020 if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
1021 if (IntrInfo.PtrVal == OperandVal)
1022 isAddress = true;
1023 }
1024 }
1025 }
1027 if (RMW->getPointerOperand() == OperandVal)
1028 isAddress = true;
1030 if (CmpX->getPointerOperand() == OperandVal)
1031 isAddress = true;
1032 }
1033 return isAddress;
1034}
1035
1036
1039 MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());
1040
1041
1043 AccessTy.MemTy = Ty;
1044
1045
1047 AccessTy.AddrSpace = SI->getPointerAddressSpace();
1049 AccessTy.AddrSpace = LI->getPointerAddressSpace();
1051 AccessTy.AddrSpace = RMW->getPointerAddressSpace();
1053 AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
1055 switch (II->getIntrinsicID()) {
1056 case Intrinsic::prefetch:
1057 case Intrinsic::memset:
1058 AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
1059 AccessTy.MemTy = OperandVal->getType();
1060 break;
1061 case Intrinsic::memmove:
1062 case Intrinsic::memcpy:
1064 AccessTy.MemTy = OperandVal->getType();
1065 break;
1066 case Intrinsic::masked_load:
1067 AccessTy.AddrSpace =
1068 II->getArgOperand(0)->getType()->getPointerAddressSpace();
1069 break;
1070 case Intrinsic::masked_store:
1071 AccessTy.AddrSpace =
1072 II->getArgOperand(1)->getType()->getPointerAddressSpace();
1073 break;
1074 default: {
1076 if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
1077 AccessTy.AddrSpace
1079 }
1080
1081 break;
1082 }
1083 }
1084 }
1085
1086 return AccessTy;
1087}
1088
1089
1096 return true;
1097 }
1098 return false;
1099}
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1113
1118 return false;
1121 Processed, SE);
1124 Processed, SE);
1127 Processed, SE);
1128 default:
1129 break;
1130 }
1131
1132 if (!Processed.insert(S).second)
1133 return false;
1134
1136 for (const SCEV *S : Add->operands()) {
1138 return true;
1139 }
1140 return false;
1141 }
1142
1143 const SCEV *Op0, *Op1;
1145
1148
1149
1150
1152 Value *UVal = U->getValue();
1153 for (User *UR : UVal->users()) {
1154
1156 if (UI && UI->getOpcode() == Instruction::Mul &&
1158 return SE.getSCEV(UI) == S;
1159 }
1160 }
1161 }
1162 }
1163
1166 return false;
1167 }
1168
1169
1170 return true;
1171}
1172
1173namespace {
1174
1175class LSRUse;
1176
1177}
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1189 const LSRUse &LU, const Formula &F);
1190
1191
1193 const LSRUse &LU, const Formula &F,
1194 const Loop &L);
1195
1196namespace {
1197
1198
1200 const Loop *L = nullptr;
1201 ScalarEvolution *SE = nullptr;
1202 const TargetTransformInfo *TTI = nullptr;
1203 TargetTransformInfo::LSRCost C;
1205
1206public:
1207 Cost() = delete;
1208 Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
1210 L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
1211 C.Insns = 0;
1212 C.NumRegs = 0;
1213 C.AddRecCost = 0;
1214 C.NumIVMuls = 0;
1215 C.NumBaseAdds = 0;
1216 C.ImmCost = 0;
1217 C.SetupCost = 0;
1218 C.ScaleCost = 0;
1219 }
1220
1221 bool isLess(const Cost &Other) const;
1222
1223 void Lose();
1224
1225#ifndef NDEBUG
1226
1228 return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
1229 | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
1230 || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
1231 & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
1232 }
1233#endif
1234
1235 bool isLoser() {
1238 }
1239
1240 void RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1241 const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU,
1242 bool HardwareLoopProfitable,
1243 SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
1244
1245 void print(raw_ostream &OS) const;
1246 void dump() const;
1247
1248private:
1249 void RateRegister(const Formula &F, const SCEV *Reg,
1250 SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,
1251 bool HardwareLoopProfitable);
1252 void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1253 SmallPtrSetImpl<const SCEV *> &Regs,
1254 const LSRUse &LU, bool HardwareLoopProfitable,
1255 SmallPtrSetImpl<const SCEV *> *LoserRegs);
1256};
1257
1258
1259
1260struct LSRFixup {
1261
1263
1264
1265
1266 Value *OperandValToReplace = nullptr;
1267
1268
1269
1270
1272
1273
1274
1275
1276 Immediate Offset = Immediate::getZero();
1277
1278 LSRFixup() = default;
1279
1280 bool isUseFullyOutsideLoop(const Loop *L) const;
1281
1282 void print(raw_ostream &OS) const;
1283 void dump() const;
1284};
1285
1286
1287
1288
1289
1290
1291class LSRUse {
1292 DenseSet<SmallVector<const SCEV *, 4>> Uniquifier;
1293
1294public:
1295
1296
1298 Basic,
1299 Special,
1300 Address,
1301 ICmpZero
1302
1303 };
1304
1305 using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
1306
1308 MemAccessTy AccessTy;
1309
1310
1312
1313
1314 Immediate MinOffset = Immediate::getFixedMax();
1315 Immediate MaxOffset = Immediate::getFixedMin();
1316
1317
1318
1319 bool AllFixupsOutsideLoop = true;
1320
1321
1322
1323
1324 bool AllFixupsUnconditional = true;
1325
1326
1327
1328
1329
1330
1331 bool RigidFormula = false;
1332
1333
1334
1335
1336
1337 Type *WidestFixupType = nullptr;
1338
1339
1340
1341
1343
1344
1345 SmallPtrSet<const SCEV *, 4> Regs;
1346
1347 LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
1348
1349 LSRFixup &getNewFixup() {
1350 Fixups.push_back(LSRFixup());
1351 return Fixups.back();
1352 }
1353
1354 void pushFixup(LSRFixup &f) {
1355 Fixups.push_back(f);
1356 if (Immediate::isKnownGT(f.Offset, MaxOffset))
1357 MaxOffset = f.Offset;
1358 if (Immediate::isKnownLT(f.Offset, MinOffset))
1359 MinOffset = f.Offset;
1360 }
1361
1362 bool HasFormulaWithSameRegs(const Formula &F) const;
1363 float getNotSelectedProbability(const SCEV *Reg) const;
1364 bool InsertFormula(const Formula &F, const Loop &L);
1365 void DeleteFormula(Formula &F);
1366 void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
1367
1368 void print(raw_ostream &OS) const;
1369 void dump() const;
1370};
1371
1372}
1373
1375 LSRUse::KindType Kind, MemAccessTy AccessTy,
1376 GlobalValue *BaseGV, Immediate BaseOffset,
1377 bool HasBaseReg, int64_t Scale,
1378 Instruction *Fixup = nullptr);
1379
1382 return 1;
1384 return 0;
1390 return std::accumulate(S->operands().begin(), S->operands().end(), 0,
1391 [&](unsigned i, const SCEV *Reg) {
1392 return i + getSetupCost(Reg, Depth - 1);
1393 });
1397 return 0;
1398}
1399
1400
1401void Cost::RateRegister(const Formula &F, const SCEV *Reg,
1402 SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,
1403 bool HardwareLoopProfitable) {
1405
1406
1407
1408 if (AR->getLoop() != L) {
1409
1411 return;
1412
1413
1414
1415 if (!AR->getLoop()->contains(L)) {
1416 Lose();
1417 return;
1418 }
1419
1420
1421 ++C.NumRegs;
1422 return;
1423 }
1424
1425 unsigned LoopCost = 1;
1428 const SCEV *Start;
1429 const APInt *Step;
1431
1432
1434 F.BaseOffset.isFixed() &&
1435 *Step == F.BaseOffset.getFixedValue();
1439
1440 if ((CanPreIndex || CanPostIndex) && LU.AllFixupsUnconditional)
1441 LoopCost = 0;
1442 }
1443 }
1444
1445
1446
1447 if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero() &&
1448 HardwareLoopProfitable)
1449 LoopCost = 0;
1450 C.AddRecCost += LoopCost;
1451
1452
1453
1455 if (!Regs.count(AR->getOperand(1))) {
1456 RateRegister(F, AR->getOperand(1), Regs, LU, HardwareLoopProfitable);
1457 if (isLoser())
1458 return;
1459 }
1460 }
1461 }
1462 ++C.NumRegs;
1463
1464
1465
1467
1468 C.SetupCost = std::min(C.SetupCost, 1 << 16);
1469
1472}
1473
1474
1475
1476
1477void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
1478 SmallPtrSetImpl<const SCEV *> &Regs,
1479 const LSRUse &LU, bool HardwareLoopProfitable,
1480 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1481 if (LoserRegs && LoserRegs->count(Reg)) {
1482 Lose();
1483 return;
1484 }
1486 RateRegister(F, Reg, Regs, LU, HardwareLoopProfitable);
1487 if (LoserRegs && isLoser())
1489 }
1490}
1491
1492void Cost::RateFormula(const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1493 const DenseSet<const SCEV *> &VisitedRegs,
1494 const LSRUse &LU, bool HardwareLoopProfitable,
1495 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1496 if (isLoser())
1497 return;
1498 assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
1499
1500 unsigned PrevAddRecCost = C.AddRecCost;
1501 unsigned PrevNumRegs = C.NumRegs;
1502 unsigned PrevNumBaseAdds = C.NumBaseAdds;
1503 if (const SCEV *ScaledReg = F.ScaledReg) {
1504 if (VisitedRegs.count(ScaledReg)) {
1505 Lose();
1506 return;
1507 }
1508 RatePrimaryRegister(F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1509 LoserRegs);
1510 if (isLoser())
1511 return;
1512 }
1513 for (const SCEV *BaseReg : F.BaseRegs) {
1514 if (VisitedRegs.count(BaseReg)) {
1515 Lose();
1516 return;
1517 }
1518 RatePrimaryRegister(F, BaseReg, Regs, LU, HardwareLoopProfitable,
1519 LoserRegs);
1520 if (isLoser())
1521 return;
1522 }
1523
1524
1525 size_t NumBaseParts = F.getNumRegs();
1526 if (NumBaseParts > 1)
1527
1528
1529 C.NumBaseAdds +=
1531 C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
1532
1533
1535
1536
1537 for (const LSRFixup &Fixup : LU.Fixups) {
1538 if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {
1539 Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);
1540 if (F.BaseGV)
1541 C.ImmCost += 64;
1542
1543 else if (Offset.isNonZero())
1544 C.ImmCost +=
1545 APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();
1546
1547
1548
1549 if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&
1552 C.NumBaseAdds++;
1553 } else {
1554
1555 C.ImmCost += 2048;
1556 }
1557 }
1558
1559
1562 return;
1563 }
1564
1565
1566
1567
1570 if (C.NumRegs > TTIRegNum) {
1571
1572
1573 if (PrevNumRegs > TTIRegNum)
1574 C.Insns += (C.NumRegs - PrevNumRegs);
1575 else
1576 C.Insns += (C.NumRegs - TTIRegNum);
1577 }
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589 if (LU.Kind == LSRUse::ICmpZero && .hasZeroEnd() &&
1591 C.Insns++;
1592
1593 C.Insns += (C.AddRecCost - PrevAddRecCost);
1594
1595
1596 if (LU.Kind != LSRUse::ICmpZero)
1597 C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
1599}
1600
1601
1602void Cost::Lose() {
1603 C.Insns = std::numeric_limits::max();
1604 C.NumRegs = std::numeric_limits::max();
1605 C.AddRecCost = std::numeric_limits::max();
1606 C.NumIVMuls = std::numeric_limits::max();
1607 C.NumBaseAdds = std::numeric_limits::max();
1608 C.ImmCost = std::numeric_limits::max();
1609 C.SetupCost = std::numeric_limits::max();
1610 C.ScaleCost = std::numeric_limits::max();
1611}
1612
1613
1614bool Cost::isLess(const Cost &Other) const {
1616 C.Insns != Other.C.Insns)
1617 return C.Insns < Other.C.Insns;
1619}
1620
1621#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1622void Cost::print(raw_ostream &OS) const {
1624 OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
1625 OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
1626 if (C.AddRecCost != 0)
1627 OS << ", with addrec cost " << C.AddRecCost;
1628 if (C.NumIVMuls != 0)
1629 OS << ", plus " << C.NumIVMuls << " IV mul"
1630 << (C.NumIVMuls == 1 ? "" : "s");
1631 if (C.NumBaseAdds != 0)
1632 OS << ", plus " << C.NumBaseAdds << " base add"
1633 << (C.NumBaseAdds == 1 ? "" : "s");
1634 if (C.ScaleCost != 0)
1635 OS << ", plus " << C.ScaleCost << " scale cost";
1636 if (C.ImmCost != 0)
1637 OS << ", plus " << C.ImmCost << " imm cost";
1638 if (C.SetupCost != 0)
1639 OS << ", plus " << C.SetupCost << " setup cost";
1640}
1641
1644}
1645#endif
1646
1647
1648bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
1649
1651 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1652 if (PN->getIncomingValue(i) == OperandValToReplace &&
1653 L->contains(PN->getIncomingBlock(i)))
1654 return false;
1655 return true;
1656 }
1657
1658 return ->contains(UserInst);
1659}
1660
1661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1662void LSRFixup::print(raw_ostream &OS) const {
1663 OS << "UserInst=";
1664
1666 OS << "store ";
1667 Store->getOperand(0)->printAsOperand(OS, false);
1670 else
1672
1673 OS << ", OperandValToReplace=";
1674 OperandValToReplace->printAsOperand(OS, false);
1675
1676 for (const Loop *PIL : PostIncLoops) {
1677 OS << ", PostIncLoop=";
1678 PIL->getHeader()->printAsOperand(OS, false);
1679 }
1680
1681 if (Offset.isNonZero())
1682 OS << ", Offset=" << Offset;
1683}
1684
1687}
1688#endif
1689
1690
1691
1692bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
1694 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1695
1697 return Uniquifier.count(Key);
1698}
1699
1700
1701float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
1702 unsigned FNum = 0;
1703 for (const Formula &F : Formulae)
1704 if (F.referencesReg(Reg))
1705 FNum++;
1706 return ((float)(Formulae.size() - FNum)) / Formulae.size();
1707}
1708
1709
1710
1711bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
1712 assert(F.isCanonical(L) && "Invalid canonical representation");
1713
1714 if (!Formulae.empty() && RigidFormula)
1715 return false;
1716
1718 if (F.ScaledReg) Key.push_back(F.ScaledReg);
1719
1721
1722 if (!Uniquifier.insert(Key).second)
1723 return false;
1724
1725
1726 assert((.ScaledReg ||
.ScaledReg->isZero()) &&
1727 "Zero allocated in a scaled register!");
1728#ifndef NDEBUG
1729 for (const SCEV *BaseReg : F.BaseRegs)
1730 assert(->isZero() && "Zero allocated in a base register!");
1731#endif
1732
1733
1734 Formulae.push_back(F);
1735
1736
1738 if (F.ScaledReg)
1739 Regs.insert(F.ScaledReg);
1740
1741 return true;
1742}
1743
1744
1745void LSRUse::DeleteFormula(Formula &F) {
1746 if (&F != &Formulae.back())
1748 Formulae.pop_back();
1749}
1750
1751
1752void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
1753
1754 SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
1756 for (const Formula &F : Formulae) {
1757 if (F.ScaledReg) Regs.insert(F.ScaledReg);
1759 }
1760
1761
1762 for (const SCEV *S : OldRegs)
1763 if (!Regs.count(S))
1764 RegUses.dropRegister(S, LUIdx);
1765}
1766
1767#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1768void LSRUse::print(raw_ostream &OS) const {
1769 OS << "LSR Use: Kind=";
1770 switch (Kind) {
1771 case Basic: OS << "Basic"; break;
1772 case Special: OS << "Special"; break;
1773 case ICmpZero: OS << "ICmpZero"; break;
1775 OS << "Address of ";
1777 OS << "pointer";
1778 else {
1779 OS << *AccessTy.MemTy;
1780 }
1781
1782 OS << " in addrspace(" << AccessTy.AddrSpace << ')';
1783 }
1784
1785 OS << ", Offsets={";
1786 bool NeedComma = false;
1787 for (const LSRFixup &Fixup : Fixups) {
1788 if (NeedComma) OS << ',';
1789 OS << Fixup.Offset;
1790 NeedComma = true;
1791 }
1792 OS << '}';
1793
1794 if (AllFixupsOutsideLoop)
1795 OS << ", all-fixups-outside-loop";
1796
1797 if (AllFixupsUnconditional)
1798 OS << ", all-fixups-unconditional";
1799
1800 if (WidestFixupType)
1801 OS << ", widest fixup type: " << *WidestFixupType;
1802}
1803
1806}
1807#endif
1808
1810 LSRUse::KindType Kind, MemAccessTy AccessTy,
1811 GlobalValue *BaseGV, Immediate BaseOffset,
1812 bool HasBaseReg, int64_t Scale,
1814 switch (Kind) {
1815 case LSRUse::Address: {
1816 int64_t FixedOffset =
1817 BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
1818 int64_t ScalableOffset =
1819 BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
1820 return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
1821 HasBaseReg, Scale, AccessTy.AddrSpace,
1822 Fixup, ScalableOffset);
1823 }
1824 case LSRUse::ICmpZero:
1825
1826
1827 if (BaseGV)
1828 return false;
1829
1830
1831 if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
1832 return false;
1833
1834
1835
1836 if (Scale != 0 && Scale != -1)
1837 return false;
1838
1839
1840
1841 if (BaseOffset.isNonZero()) {
1842
1843
1844 if (BaseOffset.isScalable())
1845 return false;
1846
1847
1848
1849
1850
1851 if (Scale == 0)
1852
1853
1854 BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());
1855 return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
1856 }
1857
1858
1859 return true;
1860
1861 case LSRUse::Basic:
1862
1863 return !BaseGV && Scale == 0 && BaseOffset.isZero();
1864
1865 case LSRUse::Special:
1866
1867 return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
1868 }
1869
1871}
1872
1874 Immediate MinOffset, Immediate MaxOffset,
1875 LSRUse::KindType Kind, MemAccessTy AccessTy,
1876 GlobalValue *BaseGV, Immediate BaseOffset,
1877 bool HasBaseReg, int64_t Scale) {
1878 if (BaseOffset.isNonZero() &&
1879 (BaseOffset.isScalable() != MinOffset.isScalable() ||
1880 BaseOffset.isScalable() != MaxOffset.isScalable()))
1881 return false;
1882
1883 int64_t Base = BaseOffset.getKnownMinValue();
1884 int64_t Min = MinOffset.getKnownMinValue();
1885 int64_t Max = MaxOffset.getKnownMinValue();
1887 return false;
1888 MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());
1890 return false;
1891 MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());
1892
1894 HasBaseReg, Scale) &&
1896 HasBaseReg, Scale);
1897}
1898
1900 Immediate MinOffset, Immediate MaxOffset,
1901 LSRUse::KindType Kind, MemAccessTy AccessTy,
1902 const Formula &F, const Loop &L) {
1903
1904
1905
1906
1907
1908
1909
1910 assert((F.isCanonical(L) || F.Scale != 0));
1912 F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
1913}
1914
1915
1917 Immediate MaxOffset, LSRUse::KindType Kind,
1918 MemAccessTy AccessTy, GlobalValue *BaseGV,
1919 Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {
1920
1922 BaseOffset, HasBaseReg, Scale) ||
1923
1924
1925 (Scale == 1 &&
1927 BaseGV, BaseOffset, true, 0));
1928}
1929
1931 Immediate MaxOffset, LSRUse::KindType Kind,
1932 MemAccessTy AccessTy, const Formula &F) {
1933 return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
1934 F.BaseOffset, F.HasBaseReg, F.Scale);
1935}
1936
1939 if (Offset.isScalable())
1940 return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());
1941
1942 return TTI.isLegalAddImmediate(Offset.getFixedValue());
1943}
1944
1946 const LSRUse &LU, const Formula &F) {
1947
1948 if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
1949 for (const LSRFixup &Fixup : LU.Fixups)
1951 (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
1952 F.Scale, Fixup.UserInst))
1953 return false;
1954 return true;
1955 }
1956
1958 LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
1959 F.Scale);
1960}
1961
1963 const LSRUse &LU, const Formula &F,
1964 const Loop &L) {
1965 if (.Scale)
1966 return 0;
1967
1968
1969
1971 LU.AccessTy, F, L))
1972 return F.Scale != 1;
1973
1974 switch (LU.Kind) {
1975 case LSRUse::Address: {
1976
1977 int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
1978 if (F.BaseOffset.isScalable()) {
1979 ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
1980 ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
1981 } else {
1982 FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
1983 FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
1984 }
1986 LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),
1987 F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
1989 LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),
1990 F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
1991
1993 "Legal addressing mode has an illegal cost!");
1994 return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
1995 }
1996 case LSRUse::ICmpZero:
1997 case LSRUse::Basic:
1998 case LSRUse::Special:
1999
2000
2001 return 0;
2002 }
2003
2005}
2006
2008 LSRUse::KindType Kind, MemAccessTy AccessTy,
2009 GlobalValue *BaseGV, Immediate BaseOffset,
2010 bool HasBaseReg) {
2011
2012 if (BaseOffset.isZero() && !BaseGV)
2013 return true;
2014
2015
2016
2017 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2018
2019
2020
2021 if (!HasBaseReg && Scale == 1) {
2022 Scale = 0;
2023 HasBaseReg = true;
2024 }
2025
2026
2027
2028
2029
2030
2031 if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
2033 Scale = 0;
2034
2036 HasBaseReg, Scale);
2037}
2038
2041 Immediate MaxOffset, LSRUse::KindType Kind,
2042 MemAccessTy AccessTy, const SCEV *S,
2043 bool HasBaseReg) {
2044
2045 if (S->isZero()) return true;
2046
2047
2048
2051
2052
2053 if (!S->isZero()) return false;
2054
2055
2056 if (BaseOffset.isZero() && !BaseGV)
2057 return true;
2058
2059 if (BaseOffset.isScalable())
2060 return false;
2061
2062
2063
2064 int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
2065
2067 BaseOffset, HasBaseReg, Scale);
2068}
2069
2070namespace {
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081struct IVInc {
2083 Value* IVOperand;
2084 const SCEV *IncExpr;
2085
2086 IVInc(Instruction *U, Value *O, const SCEV *E)
2087 : UserInst(U), IVOperand(O), IncExpr(E) {}
2088};
2089
2090
2091
2092struct IVChain {
2094 const SCEV *ExprBase = nullptr;
2095
2096 IVChain() = default;
2097 IVChain(const IVInc &Head, const SCEV *Base)
2098 : Incs(1, Head), ExprBase(Base) {}
2099
2101
2102
2103 const_iterator begin() const {
2105 return std::next(Incs.begin());
2106 }
2107 const_iterator end() const {
2108 return Incs.end();
2109 }
2110
2111
2112 bool hasIncs() const { return Incs.size() >= 2; }
2113
2114
2116
2117
2118 Instruction *tailUserInst() const { return Incs.back().UserInst; }
2119
2120
2121 bool isProfitableIncrement(const SCEV *OperExpr,
2122 const SCEV *IncExpr,
2123 ScalarEvolution&);
2124};
2125
2126
2127
2128
2129struct ChainUsers {
2130 SmallPtrSet<Instruction*, 4> FarUsers;
2131 SmallPtrSet<Instruction*, 4> NearUsers;
2132};
2133
2134
2135class LSRInstance {
2136 IVUsers &IU;
2137 ScalarEvolution &SE;
2138 DominatorTree &DT;
2139 LoopInfo &LI;
2140 AssumptionCache &AC;
2141 TargetLibraryInfo &TLI;
2142 const TargetTransformInfo &TTI;
2143 Loop *const L;
2144 MemorySSAUpdater *MSSAU;
2146 mutable SCEVExpander Rewriter;
2148 bool HardwareLoopProfitable = false;
2149
2150
2151
2152
2153
2155
2156
2157
2158
2159
2160
2161
2162 SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
2163
2164
2165
2166 Cost BaselineCost;
2167
2168
2169 SmallSetVector<Type *, 4> Types;
2170
2171
2173
2174
2175 RegUseTracker RegUses;
2176
2177
2178
2179
2180 static const unsigned MaxChains = 8;
2181
2182
2184
2185
2186 SmallPtrSet<Use*, MaxChains> IVIncSet;
2187
2188
2189 SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2190
2191
2192
2193
2194
2195 SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2196
2197 void OptimizeShadowIV();
2198 bool FindIVUserForCond(Instruction *Cond, IVStrideUse *&CondUse);
2199 Instruction *OptimizeMax(ICmpInst *Cond, IVStrideUse *&CondUse);
2200 void OptimizeLoopTermCond();
2201
2202 void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
2203 SmallVectorImpl &ChainUsersVec);
2204 void FinalizeChain(IVChain &Chain);
2205 void CollectChains();
2206 void GenerateIVChain(const IVChain &Chain,
2207 SmallVectorImpl &DeadInsts);
2208
2209 void CollectInterestingTypesAndFactors();
2210 void CollectFixupsAndInitialFormulae();
2211
2212
2213 using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
2214 UseMapTy UseMap;
2215
2216 bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,
2217 LSRUse::KindType Kind, MemAccessTy AccessTy);
2218
2219 std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
2220 MemAccessTy AccessTy);
2221
2222 void DeleteUse(LSRUse &LU, size_t LUIdx);
2223
2224 LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
2225
2226 void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2227 void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
2228 void CountRegisters(const Formula &F, size_t LUIdx);
2229 bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
2230 bool IsFixupExecutedEachIncrement(const LSRFixup &LF) const;
2231
2232 void CollectLoopInvariantFixupsAndFormulae();
2233
2234 void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
2235 unsigned Depth = 0);
2236
2237 void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
2238 const Formula &Base, unsigned Depth,
2239 size_t Idx, bool IsScaledReg = false);
2240 void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
2241 void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2242 const Formula &Base, size_t Idx,
2243 bool IsScaledReg = false);
2244 void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2245 void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
2246 const Formula &Base,
2247 const SmallVectorImpl &Worklist,
2248 size_t Idx, bool IsScaledReg = false);
2249 void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
2250 void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2251 void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
2252 void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
2253 void GenerateCrossUseConstantOffsets();
2254 void GenerateAllReuseFormulae();
2255
2256 void FilterOutUndesirableDedicatedRegisters();
2257
2258 size_t EstimateSearchSpaceComplexity() const;
2259 void NarrowSearchSpaceByDetectingSupersets();
2260 void NarrowSearchSpaceByCollapsingUnrolledCode();
2261 void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
2262 void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
2263 void NarrowSearchSpaceByFilterPostInc();
2264 void NarrowSearchSpaceByDeletingCostlyFormulas();
2265 void NarrowSearchSpaceByPickingWinnerRegs();
2266 void NarrowSearchSpaceUsingHeuristics();
2267
2268 void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
2269 Cost &SolutionCost,
2270 SmallVectorImpl<const Formula *> &Workspace,
2271 const Cost &CurCost,
2272 const SmallPtrSet<const SCEV *, 16> &CurRegs,
2273 DenseSet<const SCEV *> &VisitedRegs) const;
2274 void Solve(SmallVectorImpl<const Formula *> &Solution) const;
2275
2278 const SmallVectorImpl<Instruction *> &Inputs) const;
2280 const LSRFixup &LF,
2281 const LSRUse &LU) const;
2282
2283 Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2285 SmallVectorImpl &DeadInsts) const;
2286 void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
2287 const Formula &F,
2288 SmallVectorImpl &DeadInsts);
2289 void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2290 SmallVectorImpl &DeadInsts);
2291 void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
2292
2293public:
2294 LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
2295 LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
2296 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
2297
2298 bool getChanged() const { return Changed; }
2299 const SmallVectorImpl &getScalarEvolutionIVs() const {
2300 return ScalarEvolutionIVs;
2301 }
2302
2303 void print_factors_and_types(raw_ostream &OS) const;
2304 void print_fixups(raw_ostream &OS) const;
2305 void print_uses(raw_ostream &OS) const;
2306 void print(raw_ostream &OS) const;
2307 void dump() const;
2308};
2309
2310}
2311
2312
2313
2314void LSRInstance::OptimizeShadowIV() {
2317 return;
2318
2320 UI != E; ) {
2322 ++UI;
2323 Instruction *ShadowUse = CandidateUI->getUser();
2324 Type *DestTy = nullptr;
2325 bool IsSigned = false;
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2340 IsSigned = false;
2341 DestTy = UCast->getDestTy();
2342 }
2343 else if (SIToFPInst *SCast = dyn_cast(CandidateUI->getUser())) {
2344 IsSigned = true;
2345 DestTy = SCast->getDestTy();
2346 }
2347 if (!DestTy) continue;
2348
2349
2350
2352
2354 if (!PH) continue;
2356
2357
2358
2359
2361 if (!AR) continue;
2364
2367 if (Mantissa == -1) continue;
2369 continue;
2370
2371 unsigned Entry, Latch;
2374 Latch = 1;
2375 } else {
2377 Latch = 0;
2378 }
2379
2381 if (!Init) continue;
2382 Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
2385
2386 BinaryOperator *Incr =
2388 if (!Incr) continue;
2389 if (Incr->getOpcode() != Instruction::Add
2390 && Incr->getOpcode() != Instruction::Sub)
2391 continue;
2392
2393
2394 ConstantInt *C = nullptr;
2399 else
2400 continue;
2401
2402 if () continue;
2403
2404
2405
2406 if (->getValue().isStrictlyPositive())
2407 continue;
2408
2409
2412
2413
2414 Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
2416 Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
2417 : Instruction::FSub,
2418 NewPH, CFP, "IV.S.next.", Incr->getIterator());
2420
2423
2424
2428 break;
2429 }
2430}
2431
2432
2433
2434bool LSRInstance::FindIVUserForCond(Instruction *Cond, IVStrideUse *&CondUse) {
2435 for (IVStrideUse &U : IU)
2436 if (U.getUser() == Cond) {
2437
2438
2439
2440 CondUse = &U;
2441 return true;
2442 }
2443 return false;
2444}
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494Instruction *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse *&CondUse) {
2495
2498 return Cond;
2499
2502
2505 return Cond;
2507
2508
2509 const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
2510 if (IterationCount != SE.getSCEV(Sel)) return Cond;
2511
2512
2513
2514
2516 const SCEVNAryExpr *Max = nullptr;
2518 Pred = ICmpInst::ICMP_SLE;
2519 Max = S;
2521 Pred = ICmpInst::ICMP_SLT;
2522 Max = S;
2524 Pred = ICmpInst::ICMP_ULT;
2526 } else {
2527
2528 return Cond;
2529 }
2530
2531
2532
2533 if (Max->getNumOperands() != 2)
2534 return Cond;
2535
2536 const SCEV *MaxLHS = Max->getOperand(0);
2537 const SCEV *MaxRHS = Max->getOperand(1);
2538
2539
2540
2541 if (!MaxLHS ||
2542 (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
2543 return Cond;
2544
2545
2546
2547 const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
2550 return Cond;
2551
2553 "Loop condition operand is an addrec in a different loop!");
2554
2555
2556
2557 Value *NewRHS = nullptr;
2558 if (ICmpInst::isTrueWhenEqual(Pred)) {
2559
2562 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2563 NewRHS = BO->getOperand(0);
2566 if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
2567 NewRHS = BO->getOperand(0);
2568 if (!NewRHS)
2569 return Cond;
2575 NewRHS = SU->getValue();
2576 else
2577
2578 return Cond;
2579
2580
2581
2584
2585
2586
2587 ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,
2588 Cond->getOperand(0), NewRHS, "scmp");
2589
2590
2592 Cond->replaceAllUsesWith(NewCond);
2593 CondUse->setUser(NewCond);
2595 Cond->eraseFromParent();
2597 if (Cmp->use_empty()) {
2599 Cmp->eraseFromParent();
2600 }
2601 return NewCond;
2602}
2603
2604
2605void
2606LSRInstance::OptimizeLoopTermCond() {
2607 SmallPtrSet<Instruction *, 4> PostIncs;
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621 BasicBlock *LatchBlock = L->getLoopLatch();
2622 SmallVector<BasicBlock*, 8> ExitingBlocks;
2623 L->getExitingBlocks(ExitingBlocks);
2625
2627 return;
2628 }
2629
2630
2631 for (BasicBlock *ExitingBlock : ExitingBlocks) {
2632
2633
2634
2635
2636
2639 continue;
2640
2642
2643
2645 if (Extract)
2647
2648
2650 continue;
2651
2652
2653 IVStrideUse *CondUse = nullptr;
2654 if (!FindIVUserForCond(Cond, CondUse))
2655 continue;
2656
2657
2658
2659
2660
2661
2662
2664 Cond = OptimizeMax(Cmp, CondUse);
2665
2666
2667
2668
2669 if (!DT.dominates(ExitingBlock, LatchBlock))
2670 continue;
2671
2672
2673
2674 if (LatchBlock != ExitingBlock)
2675 for (const IVStrideUse &UI : IU)
2676
2677
2678 if (&UI != CondUse &&
2679 !DT.properlyDominates(UI.getUser()->getParent(), ExitingBlock)) {
2680
2681
2682 const SCEV *A = IU.getStride(*CondUse, L);
2683 const SCEV *B = IU.getStride(UI, L);
2684 if ( ||
) continue;
2690 else
2692 }
2693 if (const SCEVConstant *D =
2695 const ConstantInt *C = D->getValue();
2696
2697 if (C->isOne() || C->isMinusOne())
2698 goto decline_post_inc;
2699
2700 if (C->getValue().getSignificantBits() >= 64 ||
2701 C->getValue().isMinSignedValue())
2702 goto decline_post_inc;
2703
2704 if (isAddressUse(TTI, UI.getUser(), UI.getOperandValToReplace())) {
2705 MemAccessTy AccessTy =
2706 getAccessType(TTI, UI.getUser(), UI.getOperandValToReplace());
2707 int64_t Scale = C->getSExtValue();
2709 0,
2710 true, Scale,
2711 AccessTy.AddrSpace))
2712 goto decline_post_inc;
2713 Scale = -Scale;
2715 0,
2716 true, Scale,
2717 AccessTy.AddrSpace))
2718 goto decline_post_inc;
2719 }
2720 }
2721 }
2722
2723 LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: "
2724 << *Cond << '\n');
2725
2726
2727
2728
2730 !Extract) {
2731 if (Cond->hasOneUse()) {
2733 } else {
2734
2737 Cond->setName(L->getHeader()->getName() + ".termcond");
2739
2740
2743 }
2744 }
2745
2746
2747
2748
2751
2753 decline_post_inc:;
2754 }
2755
2756
2757
2758
2759 IVIncInsertPos = L->getLoopLatch()->getTerminator();
2760 for (Instruction *Inst : PostIncs)
2762}
2763
2764
2765
2766bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
2767 bool HasBaseReg, LSRUse::KindType Kind,
2768 MemAccessTy AccessTy) {
2769 Immediate NewMinOffset = LU.MinOffset;
2770 Immediate NewMaxOffset = LU.MaxOffset;
2771 MemAccessTy NewAccessTy = AccessTy;
2772
2773
2774
2775
2776 if (LU.Kind != Kind)
2777 return false;
2778
2779
2780
2781
2782 if (Kind == LSRUse::Address) {
2783 if (AccessTy.MemTy != LU.AccessTy.MemTy) {
2784 NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
2785 AccessTy.AddrSpace);
2786 }
2787 }
2788
2789
2790 if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
2792 LU.MaxOffset - NewOffset, HasBaseReg))
2793 return false;
2794 NewMinOffset = NewOffset;
2795 } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
2797 NewOffset - LU.MinOffset, HasBaseReg))
2798 return false;
2799 NewMaxOffset = NewOffset;
2800 }
2801
2802
2803
2804
2805 if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
2806 (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
2807 return false;
2808
2809
2810 LU.MinOffset = NewMinOffset;
2811 LU.MaxOffset = NewMaxOffset;
2812 LU.AccessTy = NewAccessTy;
2813 return true;
2814}
2815
2816
2817
2818
2819std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,
2820 LSRUse::KindType Kind,
2821 MemAccessTy AccessTy) {
2822 const SCEV *Copy = Expr;
2824
2825
2827 Offset, true)) {
2828 Expr = Copy;
2829 Offset = Immediate::getFixed(0);
2830 }
2831
2832 std::pair<UseMapTy::iterator, bool> P =
2833 UseMap.try_emplace(LSRUse::SCEVUseKindPair(Expr, Kind));
2834 if (.second) {
2835
2836 size_t LUIdx = P.first->second;
2837 LSRUse &LU = Uses[LUIdx];
2838 if (reconcileNewOffset(LU, Offset, true, Kind, AccessTy))
2839
2840 return std::make_pair(LUIdx, Offset);
2841 }
2842
2843
2844 size_t LUIdx = Uses.size();
2845 P.first->second = LUIdx;
2846 Uses.push_back(LSRUse(Kind, AccessTy));
2847 LSRUse &LU = Uses[LUIdx];
2848
2849 LU.MinOffset = Offset;
2850 LU.MaxOffset = Offset;
2851 return std::make_pair(LUIdx, Offset);
2852}
2853
2854
2855void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
2856 if (&LU != &Uses.back())
2858 Uses.pop_back();
2859
2860
2861 RegUses.swapAndDropUse(LUIdx, Uses.size());
2862}
2863
2864
2865
2866LSRUse *
2867LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
2868 const LSRUse &OrigLU) {
2869
2870 for (LSRUse &LU : Uses) {
2871
2872
2873
2874
2875
2876 if (&LU != &OrigLU &&
2877 LU.Kind != LSRUse::ICmpZero &&
2878 LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
2879 LU.WidestFixupType == OrigLU.WidestFixupType &&
2880 LU.HasFormulaWithSameRegs(OrigF)) {
2881
2882 for (const Formula &F : LU.Formulae) {
2883
2884
2885 if (F.BaseRegs == OrigF.BaseRegs &&
2886 F.ScaledReg == OrigF.ScaledReg &&
2887 F.BaseGV == OrigF.BaseGV &&
2888 F.Scale == OrigF.Scale &&
2889 F.UnfoldedOffset == OrigF.UnfoldedOffset) {
2890 if (F.BaseOffset.isZero())
2891 return &LU;
2892
2893
2894
2895 break;
2896 }
2897 }
2898 }
2899 }
2900
2901
2902 return nullptr;
2903}
2904
2905void LSRInstance::CollectInterestingTypesAndFactors() {
2906 SmallSetVector<const SCEV *, 4> Strides;
2907
2908
2910 for (const IVStrideUse &U : IU) {
2911 const SCEV *Expr = IU.getExpr(U);
2912 if (!Expr)
2913 continue;
2914
2915
2917
2918
2920 do {
2928 }
2929 } while (!Worklist.empty());
2930 }
2931
2932
2933 for (SmallSetVector<const SCEV *, 4>::const_iterator
2934 I = Strides.begin(), E = Strides.end(); I != E; ++I)
2935 for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
2936 std::next(I); NewStrideIter != E; ++NewStrideIter) {
2937 const SCEV *OldStride = *I;
2938 const SCEV *NewStride = *NewStrideIter;
2939
2945 else
2947 }
2948 if (const SCEVConstant *Factor =
2950 SE, true))) {
2951 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2952 Factors.insert(Factor->getAPInt().getSExtValue());
2953 } else if (const SCEVConstant *Factor =
2955 NewStride,
2956 SE, true))) {
2957 if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
2958 Factors.insert(Factor->getAPInt().getSExtValue());
2959 }
2960 }
2961
2962
2963
2964 if (Types.size() == 1)
2965 Types.clear();
2966
2968}
2969
2970
2971
2972
2976 for(; OI != OE; ++OI) {
2978 if (!SE.isSCEVable(Oper->getType()))
2979 continue;
2980
2984 break;
2985 }
2986 }
2987 }
2988 return OI;
2989}
2990
2991
2992
2995 return Trunc->getOperand(0);
2996 return Oper;
2997}
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3011 default:
3012 return S;
3015 return nullptr;
3023
3024
3025
3027 for (const SCEV *SubExpr : reverse(Add->operands())) {
3028 if (SubExpr->getSCEVType() == scAddExpr)
3030
3031 if (SubExpr->getSCEVType() != scMulExpr)
3032 return SubExpr;
3033 }
3034 return S;
3035 }
3038 }
3040}
3041
3042
3043
3044
3045
3046
3047bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
3048 const SCEV *IncExpr,
3049 ScalarEvolution &SE) {
3050
3052 return true;
3053
3054
3055
3059 return false;
3060 }
3061
3062 SmallPtrSet<const SCEV*, 8> Processed;
3064}
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3081 return true;
3082
3083 if (!Chain.hasIncs())
3084 return false;
3085
3086 if (.empty()) {
3087 LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
3089 : Users) { dbgs() << " " << *Inst << "\n"; });
3090 return false;
3091 }
3092 assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3093
3094
3095 int cost = 1;
3096
3097
3098
3099
3101 && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
3102 --cost;
3103 }
3104 const SCEV *LastIncExpr = nullptr;
3105 unsigned NumConstIncrements = 0;
3106 unsigned NumVarIncrements = 0;
3107 unsigned NumReusedIncrements = 0;
3108
3109 if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
3110 return true;
3111
3112 for (const IVInc &Inc : Chain) {
3113 if (TTI.isProfitableLSRChainElement(Inc.UserInst))
3114 return true;
3115 if (Inc.IncExpr->isZero())
3116 continue;
3117
3118
3119
3121 ++NumConstIncrements;
3122 continue;
3123 }
3124
3125 if (Inc.IncExpr == LastIncExpr)
3126 ++NumReusedIncrements;
3127 else
3128 ++NumVarIncrements;
3129
3130 LastIncExpr = Inc.IncExpr;
3131 }
3132
3133
3134
3135 if (NumConstIncrements > 1)
3136 --cost;
3137
3138
3139
3140
3141
3142 cost += NumVarIncrements;
3143
3144
3145
3146 cost -= NumReusedIncrements;
3147
3148 LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
3149 << "\n");
3150
3151 return cost < 0;
3152}
3153
3154
3155void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
3156 SmallVectorImpl &ChainUsersVec) {
3157
3158
3160 const SCEV *const OperExpr = SE.getSCEV(NextIV);
3161 const SCEV *const OperExprBase = getExprBase(OperExpr);
3162
3163
3164
3165 unsigned ChainIdx = 0, NChains = IVChainVec.size();
3166 const SCEV *LastIncExpr = nullptr;
3167 for (; ChainIdx < NChains; ++ChainIdx) {
3168 IVChain &Chain = IVChainVec[ChainIdx];
3169
3170
3171
3172
3173
3174 if ( && Chain.ExprBase != OperExprBase)
3175 continue;
3176
3179 continue;
3180
3181
3183 continue;
3184
3185
3186 const SCEV *PrevExpr = SE.getSCEV(PrevIV);
3187 const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
3189 continue;
3190
3191 if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
3192 LastIncExpr = IncExpr;
3193 break;
3194 }
3195 }
3196
3197
3198 if (ChainIdx == NChains) {
3200 return;
3203 return;
3204 }
3205 LastIncExpr = OperExpr;
3206
3207
3208
3210 return;
3211 ++NChains;
3212 IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
3213 OperExprBase));
3214 ChainUsersVec.resize(NChains);
3215 LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
3216 << ") IV=" << *LastIncExpr << "\n");
3217 } else {
3218 LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst
3219 << ") IV+" << *LastIncExpr << "\n");
3220
3221 IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
3222 }
3223 IVChain &Chain = IVChainVec[ChainIdx];
3224
3225 SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
3226
3227 if (!LastIncExpr->isZero()) {
3228 ChainUsersVec[ChainIdx].FarUsers.insert_range(NearUsers);
3229 NearUsers.clear();
3230 }
3231
3232
3233
3234
3235
3236
3237 for (User *U : IVOper->users()) {
3239 if (!OtherUse)
3240 continue;
3241
3242
3243 IVChain::const_iterator IncIter = Chain.Incs.begin();
3244 IVChain::const_iterator IncEnd = Chain.Incs.end();
3245 for( ; IncIter != IncEnd; ++IncIter) {
3246 if (IncIter->UserInst == OtherUse)
3247 break;
3248 }
3249 if (IncIter != IncEnd)
3250 continue;
3251
3254 && IU.isIVUserOrOperand(OtherUse)) {
3255 continue;
3256 }
3257 NearUsers.insert(OtherUse);
3258 }
3259
3260
3261
3262 ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
3263}
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287void LSRInstance::CollectChains() {
3290
3291 SmallVector<BasicBlock *,8> LatchPath;
3292 BasicBlock *LoopHeader = L->getHeader();
3294 Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
3295 LatchPath.push_back(Rung->getBlock());
3296 }
3297 LatchPath.push_back(LoopHeader);
3298
3299
3300 for (BasicBlock *BB : reverse(LatchPath)) {
3301 for (Instruction &I : *BB) {
3302
3304 continue;
3305
3306
3307
3308
3310 continue;
3311
3312
3313 for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
3314 ChainIdx < NChains; ++ChainIdx) {
3315 ChainUsersVec[ChainIdx].NearUsers.erase(&I);
3316 }
3317
3318 SmallPtrSet<Instruction*, 4> UniqueOperands;
3321 while (IVOpIter != IVOpEnd) {
3323 if (UniqueOperands.insert(IVOpInst).second)
3324 ChainInstruction(&I, IVOpInst, ChainUsersVec);
3325 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3326 }
3327 }
3328 }
3329
3330 for (PHINode &PN : L->getHeader()->phis()) {
3332 continue;
3333
3336 if (IncV)
3337 ChainInstruction(&PN, IncV, ChainUsersVec);
3338 }
3339
3340 unsigned ChainIdx = 0;
3341 for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
3342 UsersIdx < NChains; ++UsersIdx) {
3344 ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
3345 continue;
3346
3347 if (ChainIdx != UsersIdx)
3348 IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
3349 FinalizeChain(IVChainVec[ChainIdx]);
3350 ++ChainIdx;
3351 }
3352 IVChainVec.resize(ChainIdx);
3353}
3354
3355void LSRInstance::FinalizeChain(IVChain &Chain) {
3356 assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
3357 LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
3358
3359 for (const IVInc &Inc : Chain) {
3360 LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
3361 auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
3362 assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
3363 IVIncSet.insert(UseI);
3364 }
3365}
3366
3367
3371 Immediate IncOffset = Immediate::getZero();
3372 if (IncConst) {
3374 return false;
3376 } else {
3377
3380 C->getSignificantBits() > 64)
3381 return false;
3382 IncOffset = Immediate::getScalable(C->getSExtValue());
3383 }
3384
3386 return false;
3387
3388 MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
3389 if ((TTI, LSRUse::Address, AccessTy, nullptr,
3390 IncOffset, false))
3391 return false;
3392
3393 return true;
3394}
3395
3396
3397
3398void LSRInstance::GenerateIVChain(const IVChain &Chain,
3399 SmallVectorImpl &DeadInsts) {
3400
3401
3402 const IVInc &Head = Chain.Incs[0];
3404
3406 IVOpEnd, L, SE);
3407 Value *IVSrc = nullptr;
3408 while (IVOpIter != IVOpEnd) {
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419 if (SE.getSCEV(*IVOpIter) == Head.IncExpr
3420 || SE.getSCEV(IVSrc) == Head.IncExpr) {
3421 break;
3422 }
3423 IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
3424 }
3425 if (IVOpIter == IVOpEnd) {
3426
3427 LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
3428 return;
3429 }
3430 assert(IVSrc && "Failed to find IV chain source");
3431
3432 LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
3435 const SCEV *LeftOverExpr = nullptr;
3436 const SCEV *Accum = SE.getZero(IntTy);
3439
3440 for (const IVInc &Inc : Chain) {
3443 InsertPt = L->getLoopLatch()->getTerminator();
3444
3445
3446
3447 Value *IVOper = IVSrc;
3448 if (!Inc.IncExpr->isZero()) {
3449
3450
3452 Accum = SE.getAddExpr(Accum, IncExpr);
3453 LeftOverExpr = LeftOverExpr ?
3454 SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
3455 }
3456
3457
3458 bool FoundBase = false;
3459 for (auto [MapScev, MapIVOper] : reverse(Bases)) {
3460 const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);
3462 if (!Remainder->isZero()) {
3464 Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
3465 const SCEV *IVOperExpr =
3467 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3468 } else {
3469 IVOper = MapIVOper;
3470 }
3471
3472 FoundBase = true;
3473 break;
3474 }
3475 }
3476 if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
3477
3479 Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
3482 IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
3483
3484
3485 if ((LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
3486 assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
3488 IVSrc = IVOper;
3489 LeftOverExpr = nullptr;
3490 }
3491 }
3492 Type *OperTy = Inc.IVOperand->getType();
3493 if (IVTy != OperTy) {
3495 "cannot extend a chained IV");
3497 IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
3498 }
3499 Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
3502 }
3503
3504
3506 for (PHINode &Phi : L->getHeader()->phis()) {
3507 if (Phi.getType() != IVSrc->getType())
3508 continue;
3510 Phi.getIncomingValueForBlock(L->getLoopLatch()));
3511 if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
3512 continue;
3513 Value *IVOper = IVSrc;
3515 if (IVTy != PostIncTy) {
3517 IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
3518 Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
3519 IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
3520 }
3521 Phi.replaceUsesOfWith(PostIncV, IVOper);
3523 }
3524 }
3525}
3526
3527void LSRInstance::CollectFixupsAndInitialFormulae() {
3528 BranchInst *ExitBranch = nullptr;
3529 bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
3530
3531
3532 SmallPtrSet<const SCEV *, 16> Regs;
3533 DenseSet<const SCEV *> VisitedRegs;
3534 DenseSet<size_t> VisitedLSRUse;
3535
3536 for (const IVStrideUse &U : IU) {
3538
3540 find(UserInst->operands(), U.getOperandValToReplace());
3541 assert(UseI != UserInst->op_end() && "cannot find IV operand");
3542 if (IVIncSet.count(UseI)) {
3543 LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
3544 continue;
3545 }
3546
3547 LSRUse::KindType Kind = LSRUse::Basic;
3548 MemAccessTy AccessTy;
3549 if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
3550 Kind = LSRUse::Address;
3551 AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
3552 }
3553
3554 const SCEV *S = IU.getExpr(U);
3555 if (!S)
3556 continue;
3558
3559
3560
3561
3562
3563
3564
3566
3567
3569 continue;
3570 if (CI->isEquality()) {
3571
3572
3573 Value *NV = CI->getOperand(1);
3574 if (NV == U.getOperandValToReplace()) {
3575 CI->setOperand(1, CI->getOperand(0));
3576 CI->setOperand(0, NV);
3577 NV = CI->getOperand(1);
3579 }
3580
3581
3582 const SCEV *N = SE.getSCEV(NV);
3584 (->getType()->isPointerTy() ||
3586
3587
3589 if ()
3590 continue;
3591 Kind = LSRUse::ICmpZero;
3593 } else if (L->isLoopInvariant(NV) &&
3596 ->getType()->isPointerTy()) {
3597
3598
3599
3600
3601
3602
3605 if ()
3606 continue;
3607 Kind = LSRUse::ICmpZero;
3610 }
3611
3612
3613
3614 for (size_t i = 0, e = Factors.size(); i != e; ++i)
3615 if (Factors[i] != -1)
3616 Factors.insert(-(uint64_t)Factors[i]);
3617 Factors.insert(-1);
3618 }
3619 }
3620
3621
3622 std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);
3623 size_t LUIdx = P.first;
3624 Immediate Offset = P.second;
3625 LSRUse &LU = Uses[LUIdx];
3626
3627
3628 LSRFixup &LF = LU.getNewFixup();
3629 LF.UserInst = UserInst;
3630 LF.OperandValToReplace = U.getOperandValToReplace();
3631 LF.PostIncLoops = TmpPostIncLoops;
3633 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3634 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3635
3636
3637 if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3638 Formula F;
3639 F.initialMatch(S, L, SE);
3640 BaselineCost.RateFormula(F, Regs, VisitedRegs, LU,
3641 HardwareLoopProfitable);
3642 VisitedLSRUse.insert(LUIdx);
3643 }
3644
3645 if (!LU.WidestFixupType ||
3648 LU.WidestFixupType = LF.OperandValToReplace->getType();
3649
3650
3651 if (LU.Formulae.empty()) {
3652 InsertInitialFormula(S, LU, LUIdx);
3653 CountRegisters(LU.Formulae.back(), LUIdx);
3654 }
3655 }
3656
3658}
3659
3660
3661
3662void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3663 size_t LUIdx) {
3664
3665 if (.isSafeToExpand(S))
3666 LU.RigidFormula = true;
3667
3668 Formula F;
3669 F.initialMatch(S, L, SE);
3670 bool Inserted = InsertFormula(LU, LUIdx, F);
3671 assert(Inserted && "Initial formula already exists!"); (void)Inserted;
3672}
3673
3674
3675
3676void
3677LSRInstance::InsertSupplementalFormula(const SCEV *S,
3678 LSRUse &LU, size_t LUIdx) {
3679 Formula F;
3680 F.BaseRegs.push_back(S);
3681 F.HasBaseReg = true;
3682 bool Inserted = InsertFormula(LU, LUIdx, F);
3683 assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
3684}
3685
3686
3687void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
3688 if (F.ScaledReg)
3689 RegUses.countRegister(F.ScaledReg, LUIdx);
3690 for (const SCEV *BaseReg : F.BaseRegs)
3691 RegUses.countRegister(BaseReg, LUIdx);
3692}
3693
3694
3695
3696bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
3697
3698 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
3699 "Formula is illegal");
3700
3701 if (!LU.InsertFormula(F, *L))
3702 return false;
3703
3704 CountRegisters(F, LUIdx);
3705 return true;
3706}
3707
3708
3709
3710bool LSRInstance::IsFixupExecutedEachIncrement(const LSRFixup &LF) const {
3711
3712
3714}
3715
3716
3717
3718
3719
3720
3721void
3722LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
3724 SmallPtrSet<const SCEV *, 32> Visited;
3725
3726
3727
3729 return;
3730
3731 while (!Worklist.empty()) {
3733
3734
3735 if (!Visited.insert(S).second)
3736 continue;
3737
3741 Worklist.push_back(C->getOperand());
3746 const Value *V = US->getValue();
3748
3749 if (L->contains(Inst)) continue;
3751
3752 continue;
3753 for (const Use &U : V->uses()) {
3755
3756 if (!UserInst)
3757 continue;
3758
3759 if (UserInst->isEHPad())
3760 continue;
3761
3762
3763 if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
3764 continue;
3765
3770 if (!DT.dominates(L->getHeader(), UseBB))
3771 continue;
3772
3774 continue;
3775
3776
3777
3778
3779
3780
3781
3782
3785 bool HasIncompatibleEHPTerminatedBlock = false;
3787 for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {
3788 if (PhiNode->getIncomingValue(I) == ExpectedValue) {
3789 if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {
3790 HasIncompatibleEHPTerminatedBlock = true;
3791 break;
3792 }
3793 }
3794 }
3795 if (HasIncompatibleEHPTerminatedBlock) {
3796 continue;
3797 }
3798 }
3799
3800
3802 continue;
3803
3804
3806 const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
3807
3809 continue;
3810 if (UserS == US) {
3813 continue;
3814 }
3815 }
3816
3818 unsigned OtherIdx = .getOperandNo();
3819 Value *OtherOp = ICI->getOperand(OtherIdx);
3821 continue;
3822 }
3823
3824
3825
3827 continue;
3828
3829 std::pair<size_t, Immediate> P =
3830 getUse(S, LSRUse::Basic, MemAccessTy());
3831 size_t LUIdx = P.first;
3832 Immediate Offset = P.second;
3833 LSRUse &LU = Uses[LUIdx];
3834 LSRFixup &LF = LU.getNewFixup();
3835 LF.UserInst = const_cast<Instruction *>(UserInst);
3836 LF.OperandValToReplace = U;
3838 LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
3839 LU.AllFixupsUnconditional &= IsFixupExecutedEachIncrement(LF);
3840 if (!LU.WidestFixupType ||
3843 LU.WidestFixupType = LF.OperandValToReplace->getType();
3844 InsertSupplementalFormula(US, LU, LUIdx);
3845 CountRegisters(LU.Formulae.back(), Uses.size() - 1);
3846 break;
3847 }
3848 }
3849 }
3850}
3851
3852
3853
3854
3855
3856
3859 const Loop *L,
3861 unsigned Depth = 0) {
3862
3864 return S;
3865
3867
3868 for (const SCEV *S : Add->operands()) {
3870 if (Remainder)
3871 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3872 }
3873 return nullptr;
3874 }
3875 const SCEV *Start, *Step;
3877 const SCEV *Op1;
3879
3880 if (Start->isZero())
3881 return S;
3882
3884
3885
3888 Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
3889 Remainder = nullptr;
3890 }
3891 if (Remainder != Start) {
3892 if (!Remainder)
3896
3898 }
3900
3903 if (Remainder)
3905 return nullptr;
3906 }
3907 return S;
3908}
3909
3910
3911
3913 LSRUse &LU, const SCEV *S, const Loop *L,
3915 if (LU.Kind != LSRUse::Address ||
3916 !LU.AccessTy.getType()->isIntOrIntVectorTy())
3917 return false;
3918 const SCEV *Start;
3920 return false;
3921
3922 if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, S->getType()) ||
3923 TTI.isIndexedStoreLegal(TTI.MIM_PostInc, S->getType())) {
3925 return true;
3926 }
3927 return false;
3928}
3929
3930
3931void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
3932 const Formula &Base,
3933 unsigned Depth, size_t Idx,
3934 bool IsScaledReg) {
3935 const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
3936
3937
3938
3939
3941 return;
3943 const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
3944 if (Remainder)
3946
3947 if (AddOps.size() == 1)
3948 return;
3949
3951 JE = AddOps.end();
3952 J != JE; ++J) {
3953
3954
3956 continue;
3957
3958
3959
3961 LU.AccessTy, *J, Base.getNumRegs() > 1))
3962 continue;
3963
3964
3966 InnerAddOps.append(std::next(J), std::as_const(AddOps).end());
3967
3968
3969
3970 if (InnerAddOps.size() == 1 &&
3972 LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
3973 continue;
3974
3975 const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
3976 if (InnerSum->isZero())
3977 continue;
3979
3980 if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())
3981 continue;
3982
3983
3988 F.UnfoldedOffset =
3989 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
3991 if (IsScaledReg) {
3992 F.ScaledReg = nullptr;
3993 F.Scale = 0;
3994 } else
3995 F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
3996 } else if (IsScaledReg)
3997 F.ScaledReg = InnerSum;
3998 else
3999 F.BaseRegs[Idx] = InnerSum;
4000
4001
4006 F.UnfoldedOffset =
4007 Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
4009 else
4010 F.BaseRegs.push_back(*J);
4011
4012
4013 F.canonicalize(*L);
4014
4015 if (InsertFormula(LU, LUIdx, F))
4016
4017
4018
4019
4020
4021
4022 GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
4024 }
4025}
4026
4027
4028void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
4030 assert(Base.isCanonical(*L) && "Input must be in the canonical form");
4031
4033 return;
4034
4035 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
4036 GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
4037
4038 if (Base.Scale == 1)
4039 GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
4040 -1, true);
4041}
4042
4043
4044
4045void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
4046 Formula Base) {
4047
4048 if (Base.BaseRegs.size() + (Base.Scale == 1) +
4049 (Base.UnfoldedOffset.isNonZero()) <=
4050 1)
4051 return;
4052
4053
4054
4055 Base.unscale();
4057 Formula NewBase = Base;
4058 NewBase.BaseRegs.clear();
4059 Type *CombinedIntegerType = nullptr;
4060 for (const SCEV *BaseReg : Base.BaseRegs) {
4063 if (!CombinedIntegerType)
4065 Ops.push_back(BaseReg);
4066 }
4067 else
4068 NewBase.BaseRegs.push_back(BaseReg);
4069 }
4070
4071
4072 if (Ops.size() == 0)
4073 return;
4074
4075
4076
4077 auto GenerateFormula = [&](const SCEV *Sum) {
4078 Formula F = NewBase;
4079
4080
4081
4082
4084 return;
4085
4086 F.BaseRegs.push_back(Sum);
4087 F.canonicalize(*L);
4088 (void)InsertFormula(LU, LUIdx, F);
4089 };
4090
4091
4092 if (Ops.size() > 1) {
4094 GenerateFormula(SE.getAddExpr(OpsCopy));
4095 }
4096
4097
4098
4099 if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
4100 assert(CombinedIntegerType && "Missing a type for the unfolded offset");
4102 NewBase.UnfoldedOffset.getFixedValue(), true));
4103 NewBase.UnfoldedOffset = Immediate::getFixed(0);
4105 }
4106}
4107
4108
4109void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
4110 const Formula &Base, size_t Idx,
4111 bool IsScaledReg) {
4112 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
4114 if (G->isZero() || !GV)
4115 return;
4117 F.BaseGV = GV;
4118 if ((TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
4119 return;
4120 if (IsScaledReg)
4122 else
4124 (void)InsertFormula(LU, LUIdx, F);
4125}
4126
4127
4128void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
4129 Formula Base) {
4130
4131 if (Base.BaseGV) return;
4132
4133 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
4134 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
4135 if (Base.Scale == 1)
4136 GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, -1,
4137 true);
4138}
4139
4140
4141void LSRInstance::GenerateConstantOffsetsImpl(
4142 LSRUse &LU, unsigned LUIdx, const Formula &Base,
4143 const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) {
4144
4145 auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {
4147 if (.BaseOffset.isCompatibleImmediate(Offset))
4148 return;
4149 F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);
4150
4151 if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
4152
4153 const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());
4154 const SCEV *NewG = SE.getAddExpr(NewOffset, G);
4155
4156 if (NewG->isZero()) {
4157 if (IsScaledReg) {
4158 F.Scale = 0;
4159 F.ScaledReg = nullptr;
4160 } else
4161 F.deleteBaseReg(F.BaseRegs[Idx]);
4162 F.canonicalize(*L);
4163 } else if (IsScaledReg)
4164 F.ScaledReg = NewG;
4165 else
4166 F.BaseRegs[Idx] = NewG;
4167
4168 (void)InsertFormula(LU, LUIdx, F);
4169 }
4170 };
4171
4172 const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
4173
4174
4175
4176
4177
4178
4179
4180
4181
4183 const APInt *StepInt;
4187
4188 for (Immediate Offset : Worklist) {
4189 if (Offset.isFixed()) {
4190 Offset = Immediate::getFixed(Offset.getFixedValue() - Step);
4191 GenerateOffset(G, Offset);
4192 }
4193 }
4194 }
4195 }
4196 for (Immediate Offset : Worklist)
4197 GenerateOffset(G, Offset);
4198
4200 if (G->isZero() || Imm.isZero() ||
4201 .BaseOffset.isCompatibleImmediate(Imm))
4202 return;
4204 F.BaseOffset = F.BaseOffset.addUnsigned(Imm);
4205 if ((TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
4206 return;
4207 if (IsScaledReg) {
4209 } else {
4211
4212
4213 F.canonicalize(*L);
4214 }
4215 (void)InsertFormula(LU, LUIdx, F);
4216}
4217
4218
4219void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
4220 Formula Base) {
4221
4222
4224 Worklist.push_back(LU.MinOffset);
4225 if (LU.MaxOffset != LU.MinOffset)
4226 Worklist.push_back(LU.MaxOffset);
4227
4228 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
4229 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
4230 if (Base.Scale == 1)
4231 GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, -1,
4232 true);
4233}
4234
4235
4236
4237void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
4238 Formula Base) {
4239 if (LU.Kind != LSRUse::ICmpZero) return;
4240
4241
4242 Type *IntTy = Base.getType();
4243 if (!IntTy) return;
4245
4246
4247 if (LU.MinOffset != LU.MaxOffset) return;
4248
4249
4250 if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4251 return;
4252 for (const SCEV *BaseReg : Base.BaseRegs)
4253 if (BaseReg->getType()->isPointerTy())
4254 return;
4255 assert(.BaseGV && "ICmpZero use is not legal!");
4256
4257
4258 for (int64_t Factor : Factors) {
4259
4261 continue;
4262
4263 if (Base.BaseOffset.isMin() && Factor == -1)
4264 continue;
4265
4266 if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())
4267 continue;
4268 Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);
4269 assert(Factor != 0 && "Zero factor not expected!");
4270 if (NewBaseOffset.getFixedValue() / Factor !=
4271 Base.BaseOffset.getFixedValue())
4272 continue;
4273
4276 continue;
4277
4278
4279 Immediate Offset = LU.MinOffset;
4280 if (Offset.isMin() && Factor == -1)
4281 continue;
4283 if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
4284 continue;
4285
4288 continue;
4289
4291 F.BaseOffset = NewBaseOffset;
4292
4293
4295 continue;
4296
4297
4298 F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);
4299
4300 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
4301
4302
4303 for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
4304 F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
4305 if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
4306 goto next;
4307 }
4308
4309
4310 if (F.ScaledReg) {
4311 F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
4313 continue;
4314 }
4315
4316
4317 if (F.UnfoldedOffset.isNonZero()) {
4318 if (F.UnfoldedOffset.isMin() && Factor == -1)
4319 continue;
4320 F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);
4321 if (F.UnfoldedOffset.getFixedValue() / Factor !=
4322 Base.UnfoldedOffset.getFixedValue())
4323 continue;
4324
4326 IntTy, F.UnfoldedOffset.getFixedValue()))
4327 continue;
4328 }
4329
4330
4331 (void)InsertFormula(LU, LUIdx, F);
4332 next:;
4333 }
4334}
4335
4336
4337
4338void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
4339
4340 Type *IntTy = Base.getType();
4341 if (!IntTy) return;
4342
4343
4344
4345 if (Base.Scale != 0 && .unscale())
4346 return;
4347
4348 assert(Base.Scale == 0 && "unscale did not did its job!");
4349
4350
4351 for (int64_t Factor : Factors) {
4352 Base.Scale = Factor;
4353 Base.HasBaseReg = Base.BaseRegs.size() > 1;
4354
4355 if ((TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4357
4358
4359 if (LU.Kind == LSRUse::Basic &&
4360 isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
4361 LU.AccessTy, Base) &&
4362 LU.AllFixupsOutsideLoop)
4363 LU.Kind = LSRUse::Special;
4364 else
4365 continue;
4366 }
4367
4368
4369 if (LU.Kind == LSRUse::ICmpZero && .HasBaseReg &&
4370 Base.BaseOffset.isZero() && .BaseGV)
4371 continue;
4372
4373 for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
4375 if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
4376 const SCEV *FactorS = SE.getConstant(IntTy, Factor);
4377 if (FactorS->isZero())
4378 continue;
4379
4380
4381 if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
4382 if (!Quotient->isZero()) {
4383
4385 F.ScaledReg = Quotient;
4386 F.deleteBaseReg(F.BaseRegs[i]);
4387
4388
4389
4390 if (F.Scale == 1 && (F.BaseRegs.empty() ||
4391 (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
4392 continue;
4393
4394
4395 if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
4396 F.canonicalize(*L);
4397 (void)InsertFormula(LU, LUIdx, F);
4398 }
4399 }
4400 }
4401 }
4402}
4403
4404
4405
4406
4407
4408
4409static const SCEV *
4411 const SCEV *Expr, Type *ToTy,
4413 const SCEV *Result = nullptr;
4414 for (auto &L : Loops) {
4418 if (!New || (Result && New != Result))
4419 return nullptr;
4420 Result = New;
4421 }
4422
4423 assert(Result && "failed to create expression");
4424 return Result;
4425}
4426
4427
4428void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
4429
4430 if (Base.BaseGV) return;
4431
4432
4433 Type *DstTy = Base.getType();
4434 if (!DstTy) return;
4436 return;
4437
4438
4439
4440 if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4441 return;
4443 [](const SCEV *S) { return S->getType()->isPointerTy(); }))
4444 return;
4445
4447 for (auto &LF : LU.Fixups)
4448 Loops.push_back(LF.PostIncLoops);
4449
4450 for (Type *SrcTy : Types) {
4453
4454
4455
4456
4457
4458 if (F.ScaledReg) {
4459 const SCEV *NewScaledReg =
4461 if (!NewScaledReg || NewScaledReg->isZero())
4462 continue;
4463 F.ScaledReg = NewScaledReg;
4464 }
4465 bool HasZeroBaseReg = false;
4466 for (const SCEV *&BaseReg : F.BaseRegs) {
4467 const SCEV *NewBaseReg =
4469 if (!NewBaseReg || NewBaseReg->isZero()) {
4470 HasZeroBaseReg = true;
4471 break;
4472 }
4474 }
4475 if (HasZeroBaseReg)
4476 continue;
4477
4478
4479
4480 if (.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
4481 continue;
4482
4483 F.canonicalize(*L);
4484 (void)InsertFormula(LU, LUIdx, F);
4485 }
4486 }
4487}
4488
4489namespace {
4490
4491
4492
4493
4494struct WorkItem {
4495 size_t LUIdx;
4496 Immediate Imm;
4497 const SCEV *OrigReg;
4498
4499 WorkItem(size_t LI, Immediate I, const SCEV *R)
4500 : LUIdx(LI), Imm(I), OrigReg(R) {}
4501
4502 void print(raw_ostream &OS) const;
4503 void dump() const;
4504};
4505
4506}
4507
4508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4509void WorkItem::print(raw_ostream &OS) const {
4510 OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
4511 << " , add offset " << Imm;
4512}
4513
4516}
4517#endif
4518
4519
4520
4521void LSRInstance::GenerateCrossUseConstantOffsets() {
4522
4523 using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
4524
4525 DenseMap<const SCEV *, ImmMapTy> Map;
4526 DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
4528 for (const SCEV *Use : RegUses) {
4531 auto Pair = Map.try_emplace(Reg);
4532 if (Pair.second)
4534 Pair.first->second.insert(std::make_pair(Imm, Use));
4535 UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
4536 }
4537
4538
4539
4540
4542 SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
4543 UniqueItems;
4544 for (const SCEV *Reg : Sequence) {
4545 const ImmMapTy &Imms = Map.find(Reg)->second;
4546
4547
4548 if (Imms.size() == 1)
4549 continue;
4550
4551 LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
4552 for (const auto &Entry
4553 : Imms) dbgs()
4554 << ' ' << Entry.first;
4555 dbgs() << '\n');
4556
4557
4558 for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
4559 J != JE; ++J) {
4560 const SCEV *OrigReg = J->second;
4561
4562 Immediate JImm = J->first;
4563 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
4564
4566 UsedByIndicesMap[Reg].count() == 1) {
4567 LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4568 << '\n');
4569 continue;
4570 }
4571
4572
4573
4574 Immediate First = Imms.begin()->first;
4575 Immediate Last = std::prev(Imms.end())->first;
4576 if (.isCompatibleImmediate(Last)) {
4577 LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
4578 << "\n");
4579 continue;
4580 }
4581
4582
4583 bool Scalable = First.isScalable() || Last.isScalable();
4584 int64_t FI = First.getKnownMinValue();
4585 int64_t LI = Last.getKnownMinValue();
4586
4587
4588 int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
4589
4590
4591 Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
4592 ImmMapTy::const_iterator OtherImms[] = {
4593 Imms.begin(), std::prev(Imms.end()),
4594 Imms.lower_bound(Immediate::get(Avg, Scalable))};
4595 for (const auto &M : OtherImms) {
4596 if (M == J || M == JE) continue;
4597 if (!JImm.isCompatibleImmediate(M->first))
4598 continue;
4599
4600
4601 Immediate Imm = JImm.subUnsigned(M->first);
4602 for (unsigned LUIdx : UsedByIndices.set_bits())
4603
4604 if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
4605 WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
4606 }
4607 }
4608 }
4609
4610 Map.clear();
4612 UsedByIndicesMap.clear();
4613 UniqueItems.clear();
4614
4615
4616 for (const WorkItem &WI : WorkItems) {
4617 size_t LUIdx = WI.LUIdx;
4618 LSRUse &LU = Uses[LUIdx];
4619 Immediate Imm = WI.Imm;
4620 const SCEV *OrigReg = WI.OrigReg;
4621
4623 const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);
4625
4626
4627 for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
4628 Formula F = LU.Formulae[L];
4629
4630
4631
4632
4633 F.unscale();
4634
4635 if (F.ScaledReg == OrigReg) {
4636 if (.BaseOffset.isCompatibleImmediate(Imm))
4637 continue;
4638 Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));
4639
4640 const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);
4641 if (F.referencesReg(S))
4642 continue;
4643 Formula NewF = F;
4644 NewF.BaseOffset = Offset;
4645 if ((TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
4646 NewF))
4647 continue;
4648 NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
4649
4650
4651
4652
4654
4655
4656
4657 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4658 continue;
4659 if (C->getValue()->isNegative() !=
4660 (NewF.BaseOffset.isLessThanZero()) &&
4661 (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
4662 .ule(std::abs(NewF.BaseOffset.getFixedValue())))
4663 continue;
4664 }
4665
4666
4667 NewF.canonicalize(*this->L);
4668 (void)InsertFormula(LU, LUIdx, NewF);
4669 } else {
4670
4671 for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
4672 const SCEV *BaseReg = F.BaseRegs[N];
4673 if (BaseReg != OrigReg)
4674 continue;
4675 Formula NewF = F;
4676 if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
4677 !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
4678 !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
4679 continue;
4680 NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
4681 if ((TTI, LU.MinOffset, LU.MaxOffset,
4682 LU.Kind, LU.AccessTy, NewF)) {
4685 continue;
4686 Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
4688 continue;
4689 NewF = F;
4690 NewF.UnfoldedOffset = NewUnfoldedOffset;
4691 }
4692 NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
4693
4694
4695
4696
4697 for (const SCEV *NewReg : NewF.BaseRegs)
4699 if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
4700 goto skip_formula;
4701 if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
4702 .abs()
4703 .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
4704 (C->getAPInt() + NewF.BaseOffset.getFixedValue())
4705 .countr_zero() >=
4707 NewF.BaseOffset.getFixedValue()))
4708 goto skip_formula;
4709 }
4710
4711
4712 NewF.canonicalize(*this->L);
4713 (void)InsertFormula(LU, LUIdx, NewF);
4714 break;
4715 skip_formula:;
4716 }
4717 }
4718 }
4719 }
4720}
4721
4722
4723void
4724LSRInstance::GenerateAllReuseFormulae() {
4725
4726
4727 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4728 LSRUse &LU = Uses[LUIdx];
4729 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4730 GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
4731 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4732 GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
4733 }
4734 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4735 LSRUse &LU = Uses[LUIdx];
4736 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4737 GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
4738 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4739 GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
4740 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4741 GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
4742 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4743 GenerateScales(LU, LUIdx, LU.Formulae[i]);
4744 }
4745 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4746 LSRUse &LU = Uses[LUIdx];
4747 for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
4748 GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
4749 }
4750
4751 GenerateCrossUseConstantOffsets();
4752
4754 "After generating reuse formulae:\n";
4755 print_uses(dbgs()));
4756}
4757
4758
4759
4760void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4761 DenseSet<const SCEV *> VisitedRegs;
4762 SmallPtrSet<const SCEV *, 16> Regs;
4763 SmallPtrSet<const SCEV *, 16> LoserRegs;
4764#ifndef NDEBUG
4765 bool ChangedFormulae = false;
4766#endif
4767
4768
4769
4770 using BestFormulaeTy = DenseMap<SmallVector<const SCEV *, 4>, size_t>;
4771
4772 BestFormulaeTy BestFormulae;
4773
4774 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4775 LSRUse &LU = Uses[LUIdx];
4777 dbgs() << '\n');
4778
4779 bool Any = false;
4780 for (size_t FIdx = 0, NumForms = LU.Formulae.size();
4781 FIdx != NumForms; ++FIdx) {
4782 Formula &F = LU.Formulae[FIdx];
4783
4784
4785
4786
4787
4788
4789
4790
4791 Cost CostF(L, SE, TTI, AMK);
4793 CostF.RateFormula(F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4794 &LoserRegs);
4795 if (CostF.isLoser()) {
4796
4797
4798
4799
4800
4801
4803 dbgs() << "\n");
4804 }
4805 else {
4807 for (const SCEV *Reg : F.BaseRegs) {
4808 if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
4810 }
4811 if (F.ScaledReg &&
4812 RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
4813 Key.push_back(F.ScaledReg);
4814
4815
4817
4818 std::pair<BestFormulaeTy::const_iterator, bool> P =
4819 BestFormulae.insert(std::make_pair(Key, FIdx));
4820 if (P.second)
4821 continue;
4822
4823 Formula &Best = LU.Formulae[P.first->second];
4824
4825 Cost CostBest(L, SE, TTI, AMK);
4827 CostBest.RateFormula(Best, Regs, VisitedRegs, LU,
4828 HardwareLoopProfitable);
4829 if (CostF.isLess(CostBest))
4832 dbgs() << "\n"
4833 " in favor of formula ";
4834 Best.print(dbgs()); dbgs() << '\n');
4835 }
4836#ifndef NDEBUG
4837 ChangedFormulae = true;
4838#endif
4839 LU.DeleteFormula(F);
4840 --FIdx;
4841 --NumForms;
4842 Any = true;
4843 }
4844
4845
4846 if (Any)
4847 LU.RecomputeRegs(LUIdx, RegUses);
4848
4849
4850 BestFormulae.clear();
4851 }
4852
4854 dbgs() << "\n"
4855 "After filtering out undesirable candidates:\n";
4856 print_uses(dbgs());
4857 });
4858}
4859
4860
4861
4862
4863size_t LSRInstance::EstimateSearchSpaceComplexity() const {
4864 size_t Power = 1;
4865 for (const LSRUse &LU : Uses) {
4866 size_t FSize = LU.Formulae.size();
4869 break;
4870 }
4871 Power *= FSize;
4873 break;
4874 }
4875 return Power;
4876}
4877
4878
4879
4880
4881void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
4882 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
4883 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
4884
4885 LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
4886 "which use a superset of registers used by other "
4887 "formulae.\n");
4888
4889 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4890 LSRUse &LU = Uses[LUIdx];
4891 bool Any = false;
4892 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
4893 Formula &F = LU.Formulae[i];
4894 if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())
4895 continue;
4896
4897
4898
4900 I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
4902 Formula NewF = F;
4903
4904
4905 NewF.BaseOffset =
4906 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
4907 (uint64_t)C->getValue()->getSExtValue());
4908 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4909 (I - F.BaseRegs.begin()));
4910 if (LU.HasFormulaWithSameRegs(NewF)) {
4912 dbgs() << '\n');
4913 LU.DeleteFormula(F);
4914 --i;
4915 --e;
4916 Any = true;
4917 break;
4918 }
4921 if (.BaseGV) {
4922 Formula NewF = F;
4923 NewF.BaseGV = GV;
4924 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
4925 (I - F.BaseRegs.begin()));
4926 if (LU.HasFormulaWithSameRegs(NewF)) {
4928 dbgs() << '\n');
4929 LU.DeleteFormula(F);
4930 --i;
4931 --e;
4932 Any = true;
4933 break;
4934 }
4935 }
4936 }
4937 }
4938 }
4939 if (Any)
4940 LU.RecomputeRegs(LUIdx, RegUses);
4941 }
4942
4944 }
4945}
4946
4947
4948
4949void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
4951 return;
4952
4954 dbgs() << "The search space is too complex.\n"
4955 "Narrowing the search space by assuming that uses separated "
4956 "by a constant offset will use the same registers.\n");
4957
4958
4959
4960 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
4961 LSRUse &LU = Uses[LUIdx];
4962 for (const Formula &F : LU.Formulae) {
4963 if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))
4964 continue;
4965
4966 LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
4967 if (!LUThatHas)
4968 continue;
4969
4970 if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, false,
4971 LU.Kind, LU.AccessTy))
4972 continue;
4973
4975
4976 LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
4977 LUThatHas->AllFixupsUnconditional &= LU.AllFixupsUnconditional;
4978
4979
4980 for (LSRFixup &Fixup : LU.Fixups) {
4981 Fixup.Offset += F.BaseOffset;
4982 LUThatHas->pushFixup(Fixup);
4984 }
4985
4986
4987 bool Any = false;
4988 for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
4989 Formula &F = LUThatHas->Formulae[i];
4990 if ((TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
4991 LUThatHas->Kind, LUThatHas->AccessTy, F)) {
4993 LUThatHas->DeleteFormula(F);
4994 --i;
4995 --e;
4996 Any = true;
4997 }
4998 }
4999
5000 if (Any)
5001 LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
5002
5003
5004 DeleteUse(LU, LUIdx);
5005 --LUIdx;
5006 --NumUses;
5007 break;
5008 }
5009 }
5010
5012}
5013
5014
5015
5016
5017void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
5018 if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
5019 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5020
5021 LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
5022 "undesirable dedicated registers.\n");
5023
5024 FilterOutUndesirableDedicatedRegisters();
5025
5027 }
5028}
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5041 return;
5042
5044 dbgs() << "The search space is too complex.\n"
5045 "Narrowing the search space by choosing the best Formula "
5046 "from the Formulae with the same Scale and ScaledReg.\n");
5047
5048
5049 using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
5050
5051 BestFormulaeTy BestFormulae;
5052#ifndef NDEBUG
5053 bool ChangedFormulae = false;
5054#endif
5055 DenseSet<const SCEV *> VisitedRegs;
5056 SmallPtrSet<const SCEV *, 16> Regs;
5057
5058 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5059 LSRUse &LU = Uses[LUIdx];
5061 dbgs() << '\n');
5062
5063
5064 auto IsBetterThan = [&](Formula &FA, Formula &FB) {
5065
5066
5067
5068
5069 size_t FARegNum = 0;
5070 for (const SCEV *Reg : FA.BaseRegs) {
5071 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5072 FARegNum += (NumUses - UsedByIndices.count() + 1);
5073 }
5074 size_t FBRegNum = 0;
5075 for (const SCEV *Reg : FB.BaseRegs) {
5076 const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
5077 FBRegNum += (NumUses - UsedByIndices.count() + 1);
5078 }
5079 if (FARegNum != FBRegNum)
5080 return FARegNum < FBRegNum;
5081
5082
5083
5084 Cost CostFA(L, SE, TTI, AMK);
5085 Cost CostFB(L, SE, TTI, AMK);
5087 CostFA.RateFormula(FA, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5089 CostFB.RateFormula(FB, Regs, VisitedRegs, LU, HardwareLoopProfitable);
5090 return CostFA.isLess(CostFB);
5091 };
5092
5093 bool Any = false;
5094 for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5095 ++FIdx) {
5096 Formula &F = LU.Formulae[FIdx];
5097 if (.ScaledReg)
5098 continue;
5099 auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
5100 if (P.second)
5101 continue;
5102
5103 Formula &Best = LU.Formulae[P.first->second];
5104 if (IsBetterThan(F, Best))
5107 dbgs() << "\n"
5108 " in favor of formula ";
5109 Best.print(dbgs()); dbgs() << '\n');
5110#ifndef NDEBUG
5111 ChangedFormulae = true;
5112#endif
5113 LU.DeleteFormula(F);
5114 --FIdx;
5115 --NumForms;
5116 Any = true;
5117 }
5118 if (Any)
5119 LU.RecomputeRegs(LUIdx, RegUses);
5120
5121
5122 BestFormulae.clear();
5123 }
5124
5126 dbgs() << "\n"
5127 "After filtering out undesirable candidates:\n";
5128 print_uses(dbgs());
5129 });
5130}
5131
5132
5133
5134void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5136 return;
5138 return;
5139
5140 LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
5141 "Narrowing the search space by choosing the lowest "
5142 "register Formula for PostInc Uses.\n");
5143
5144 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5145 LSRUse &LU = Uses[LUIdx];
5146
5147 if (LU.Kind != LSRUse::Address)
5148 continue;
5151 continue;
5152
5153 size_t MinRegs = std::numeric_limits<size_t>::max();
5154 for (const Formula &F : LU.Formulae)
5155 MinRegs = std::min(F.getNumRegs(), MinRegs);
5156
5157 bool Any = false;
5158 for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
5159 ++FIdx) {
5160 Formula &F = LU.Formulae[FIdx];
5161 if (F.getNumRegs() > MinRegs) {
5163 dbgs() << "\n");
5164 LU.DeleteFormula(F);
5165 --FIdx;
5166 --NumForms;
5167 Any = true;
5168 }
5169 }
5170 if (Any)
5171 LU.RecomputeRegs(LUIdx, RegUses);
5172
5174 break;
5175 }
5176
5178}
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
5224 return;
5225
5226
5227
5228
5229
5230
5231 SmallPtrSet<const SCEV *, 4> UniqRegs;
5232 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5233
5234
5235 DenseMap <const SCEV *, float> RegNumMap;
5236 for (const SCEV *Reg : RegUses) {
5238 continue;
5239 float PNotSel = 1;
5240 for (const LSRUse &LU : Uses) {
5241 if (!LU.Regs.count(Reg))
5242 continue;
5243 float P = LU.getNotSelectedProbability(Reg);
5244 if (P != 0.0)
5245 PNotSel *= P;
5246 else
5248 }
5249 RegNumMap.insert(std::make_pair(Reg, PNotSel));
5250 }
5251
5253 dbgs() << "Narrowing the search space by deleting costly formulas\n");
5254
5255
5256 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5257 LSRUse &LU = Uses[LUIdx];
5258
5259 if (LU.Formulae.size() < 2)
5260 continue;
5261
5262
5263
5264 float FMinRegNum = LU.Formulae[0].getNumRegs();
5265 float FMinARegNum = LU.Formulae[0].getNumRegs();
5266 size_t MinIdx = 0;
5267 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5268 Formula &F = LU.Formulae[i];
5269 float FRegNum = 0;
5270 float FARegNum = 0;
5271 for (const SCEV *BaseReg : F.BaseRegs) {
5272 if (UniqRegs.count(BaseReg))
5273 continue;
5274 FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5276 FARegNum +=
5277 RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
5278 }
5279 if (const SCEV *ScaledReg = F.ScaledReg) {
5280 if (!UniqRegs.count(ScaledReg)) {
5281 FRegNum +=
5282 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5284 FARegNum +=
5285 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
5286 }
5287 }
5288 if (FMinRegNum > FRegNum ||
5289 (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
5290 FMinRegNum = FRegNum;
5291 FMinARegNum = FARegNum;
5292 MinIdx = i;
5293 }
5294 }
5295 LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
5296 dbgs() << " with min reg num " << FMinRegNum << '\n');
5297 if (MinIdx != 0)
5298 std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
5299 while (LU.Formulae.size() != 1) {
5301 dbgs() << '\n');
5302 LU.Formulae.pop_back();
5303 }
5304 LU.RecomputeRegs(LUIdx, RegUses);
5305 assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
5306 Formula &F = LU.Formulae[0];
5308
5310 if (F.ScaledReg)
5311 UniqRegs.insert(F.ScaledReg);
5312 }
5314}
5315
5316
5317
5318
5322 MemAccessTy AccessType) {
5323 if (Best->getType() != Reg->getType() ||
5327 return false;
5329 if (!Diff)
5330 return false;
5331
5332 return TTI.isLegalAddressingMode(
5333 AccessType.MemTy, nullptr,
5334 Diff->getSExtValue(),
5335 true, 0, AccessType.AddrSpace) &&
5336 .isLegalAddressingMode(
5337 AccessType.MemTy, nullptr,
5338 -Diff->getSExtValue(),
5339 true, 0, AccessType.AddrSpace);
5340}
5341
5342
5343
5344
5345void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
5346
5347
5348 SmallPtrSet<const SCEV *, 4> Taken;
5349 while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
5350
5351
5352 LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
5353
5354
5355
5356 const SCEV *Best = nullptr;
5357 unsigned BestNum = 0;
5358 for (const SCEV *Reg : RegUses) {
5360 continue;
5361 if (!Best) {
5362 Best = Reg;
5363 BestNum = RegUses.getUsedByIndices(Reg).count();
5364 } else {
5365 unsigned Count = RegUses.getUsedByIndices(Reg).count();
5366 if (Count > BestNum) {
5367 Best = Reg;
5368 BestNum = Count;
5369 }
5370
5371
5372
5373
5374 if (Count == BestNum) {
5375 int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
5376 if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&
5378 Uses[LUIdx].AccessTy)) {
5379 Best = Reg;
5380 BestNum = Count;
5381 }
5382 }
5383 }
5384 }
5385 assert(Best && "Failed to find best LSRUse candidate");
5386
5387 LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
5388 << " will yield profitable reuse.\n");
5390
5391
5392
5393 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
5394 LSRUse &LU = Uses[LUIdx];
5395 if (!LU.Regs.count(Best)) continue;
5396
5397 bool Any = false;
5398 for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
5399 Formula &F = LU.Formulae[i];
5400 if (.referencesReg(Best)) {
5402 LU.DeleteFormula(F);
5403 --e;
5404 --i;
5405 Any = true;
5406 assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
5407 continue;
5408 }
5409 }
5410
5411 if (Any)
5412 LU.RecomputeRegs(LUIdx, RegUses);
5413 }
5414
5416 }
5417}
5418
5419
5420
5421
5422
5423void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
5424 NarrowSearchSpaceByDetectingSupersets();
5425 NarrowSearchSpaceByCollapsingUnrolledCode();
5426 NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
5428 NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
5429 NarrowSearchSpaceByFilterPostInc();
5431 NarrowSearchSpaceByDeletingCostlyFormulas();
5432 else
5433 NarrowSearchSpaceByPickingWinnerRegs();
5434}
5435
5436
5437void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5438 Cost &SolutionCost,
5439 SmallVectorImpl<const Formula *> &Workspace,
5440 const Cost &CurCost,
5441 const SmallPtrSet<const SCEV *, 16> &CurRegs,
5442 DenseSet<const SCEV *> &VisitedRegs) const {
5443
5444
5445
5446
5447
5448
5449
5450
5451
5452
5453 const LSRUse &LU = Uses[Workspace.size()];
5454
5455
5456
5457
5458
5459 SmallSetVector<const SCEV *, 4> ReqRegs;
5460 for (const SCEV *S : CurRegs)
5461 if (LU.Regs.count(S))
5463
5464 SmallPtrSet<const SCEV *, 16> NewRegs;
5465 Cost NewCost(L, SE, TTI, AMK);
5466 for (const Formula &F : LU.Formulae) {
5467
5468
5469
5470
5471
5472
5474 int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
5475 for (const SCEV *Reg : ReqRegs) {
5476 if ((F.ScaledReg && F.ScaledReg == Reg) ||
5478 --NumReqRegsToFind;
5479 if (NumReqRegsToFind == 0)
5480 break;
5481 }
5482 }
5483 if (NumReqRegsToFind != 0) {
5484
5485
5486 continue;
5487 }
5488 }
5489
5490
5491
5492 NewCost = CurCost;
5493 NewRegs = CurRegs;
5494 NewCost.RateFormula(F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable);
5495 if (NewCost.isLess(SolutionCost)) {
5497 if (Workspace.size() != Uses.size()) {
5498 SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
5499 NewRegs, VisitedRegs);
5500 if (F.getNumRegs() == 1 && Workspace.size() == 1)
5501 VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
5502 } else {
5504 dbgs() << ".\nRegs:\n";
5505 for (const SCEV *S : NewRegs) dbgs()
5506 << "- " << *S << "\n";
5507 dbgs() << '\n');
5508
5509 SolutionCost = NewCost;
5510 Solution = Workspace;
5511 }
5513 }
5514 }
5515}
5516
5517
5518
5519void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
5521 Cost SolutionCost(L, SE, TTI, AMK);
5522 SolutionCost.Lose();
5523 Cost CurCost(L, SE, TTI, AMK);
5524 SmallPtrSet<const SCEV *, 16> CurRegs;
5525 DenseSet<const SCEV *> VisitedRegs;
5527
5528
5529 SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
5530 CurRegs, VisitedRegs);
5531 if (Solution.empty()) {
5532 LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
5533 return;
5534 }
5535
5536
5538 "The chosen solution requires ";
5539 SolutionCost.print(dbgs()); dbgs() << ":\n";
5540 for (size_t i = 0, e = Uses.size(); i != e; ++i) {
5541 dbgs() << " ";
5543 dbgs() << "\n"
5544 " ";
5545 Solution[i]->print(dbgs());
5546 dbgs() << '\n';
5547 });
5548
5549 assert(Solution.size() == Uses.size() && "Malformed solution!");
5550
5551 const bool EnableDropUnprofitableSolution = [&] {
5554 return true;
5556 return false;
5559 }
5561 }();
5562
5563 if (BaselineCost.isLess(SolutionCost)) {
5564 if (!EnableDropUnprofitableSolution)
5566 dbgs() << "Baseline is more profitable than chosen solution, "
5567 "add option 'lsr-drop-solution' to drop LSR solution.\n");
5568 else {
5569 LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5570 "solution, dropping LSR solution.\n";);
5571 Solution.clear();
5572 }
5573 }
5574}
5575
5576
5577
5578
5581 const SmallVectorImpl<Instruction *> &Inputs)
5582 const {
5584 while (true) {
5585 bool AllDominate = true;
5587
5588
5590 return IP;
5591
5592 for (Instruction *Inst : Inputs) {
5593 if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
5594 AllDominate = false;
5595 break;
5596 }
5597
5598
5599 if (Tentative->getParent() == Inst->getParent() &&
5600 (!BetterPos || !DT.dominates(Inst, BetterPos)))
5602 }
5603 if (!AllDominate)
5604 break;
5605 if (BetterPos)
5607 else
5609
5610 const Loop *IPLoop = LI.getLoopFor(IP->getParent());
5611 unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
5612
5615 if (!Rung) return IP;
5616 Rung = Rung->getIDom();
5617 if (!Rung) return IP;
5618 IDom = Rung->getBlock();
5619
5620
5621 const Loop *IDomLoop = LI.getLoopFor(IDom);
5622 unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
5623 if (IDomDepth <= IPLoopDepth &&
5624 (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
5625 break;
5626 }
5627
5629 }
5630
5631 return IP;
5632}
5633
5634
5635
5637 BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
5638
5639
5640
5641 SmallVector<Instruction *, 4> Inputs;
5644 if (LU.Kind == LSRUse::ICmpZero)
5645 if (Instruction *I =
5648 if (LF.PostIncLoops.count(L)) {
5649 if (LF.isUseFullyOutsideLoop(L))
5650 Inputs.push_back(L->getLoopLatch()->getTerminator());
5651 else
5652 Inputs.push_back(IVIncInsertPos);
5653 }
5654
5655
5656 for (const Loop *PIL : LF.PostIncLoops) {
5657 if (PIL == L) continue;
5658
5659
5662 if (!ExitingBlocks.empty()) {
5664 for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
5667 }
5668 }
5669
5671 "Insertion point must be a normal instruction");
5672
5673
5674
5676
5677
5679
5680
5681 while (IP->isEHPad()) ++IP;
5682
5683
5684
5685
5686 while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
5687 ++IP;
5688
5689 return IP;
5690}
5691
5692
5693
5694Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
5696 SmallVectorImpl &DeadInsts) const {
5697 if (LU.RigidFormula)
5698 return LF.OperandValToReplace;
5699
5700
5701
5702 IP = AdjustInsertPositionForExpand(IP, LF, LU);
5703 Rewriter.setInsertPoint(&*IP);
5704
5705
5706
5707 Rewriter.setPostInc(LF.PostIncLoops);
5708
5709
5710 Type *OpTy = LF.OperandValToReplace->getType();
5711
5713 if (!Ty)
5714
5715 Ty = OpTy;
5717
5718 Ty = OpTy;
5719
5721
5722
5724
5725
5726 for (const SCEV *Reg : F.BaseRegs) {
5727 assert(->isZero() && "Zero allocated in a base register!");
5728
5729
5732 }
5733
5734
5735 Value *ICmpScaledV = nullptr;
5736 if (F.Scale != 0) {
5737 const SCEV *ScaledS = F.ScaledReg;
5738
5739
5742
5743 if (LU.Kind == LSRUse::ICmpZero) {
5744
5745 if (F.Scale == 1)
5746 Ops.push_back(
5748 else {
5749
5750
5751
5753 "The only scale supported by ICmpZero uses is -1!");
5754 ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
5755 }
5756 } else {
5757
5758
5759
5760
5761
5762 if (.empty() && LU.Kind == LSRUse::Address &&
5765 Ops.clear();
5767 }
5769 if (F.Scale != 1)
5770 ScaledS =
5772 Ops.push_back(ScaledS);
5773 }
5774 }
5775
5776
5777 if (F.BaseGV) {
5778
5779 if (.empty()) {
5781 Ops.clear();
5783 }
5785 }
5786
5787
5788
5789 if (.empty()) {
5791 Ops.clear();
5793 }
5794
5795
5796
5797
5798 assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
5799 "Expanding mismatched offsets\n");
5800
5801 Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);
5802 if (Offset.isNonZero()) {
5803 if (LU.Kind == LSRUse::ICmpZero) {
5804
5805
5806 if (!ICmpScaledV)
5807 ICmpScaledV =
5809 else {
5811 ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());
5812 }
5813 } else {
5814
5815
5816 Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));
5817 }
5818 }
5819
5820
5821 Immediate UnfoldedOffset = F.UnfoldedOffset;
5822 if (UnfoldedOffset.isNonZero()) {
5823
5824 Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
5825 }
5826
5827
5828 const SCEV *FullS = Ops.empty() ?
5831 Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
5832
5833
5835
5836
5837
5838
5839 if (LU.Kind == LSRUse::ICmpZero) {
5843 assert(.BaseGV && "ICmp does not support folding a global value and "
5844 "a scale at the same time!");
5845 if (F.Scale == -1) {
5846 if (ICmpScaledV->getType() != OpTy) {
5849 ICmpScaledV, OpTy, "tmp", CI->getIterator());
5850 ICmpScaledV = Cast;
5851 }
5853 } else {
5854
5855
5856 assert((F.Scale == 0 || F.Scale == 1) &&
5857 "ICmp does not support folding a global value and "
5858 "a scale at the same time!");
5860 -(uint64_t)Offset.getFixedValue());
5861 if (C->getType() != OpTy) {
5865 assert(C && "Cast of ConstantInt should have folded");
5866 }
5867
5869 }
5870 }
5871
5872 return FullV;
5873}
5874
5875
5876
5877
5878void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
5879 const LSRFixup &LF, const Formula &F,
5880 SmallVectorImpl &DeadInsts) {
5881 DenseMap<BasicBlock *, Value *> Inserted;
5882
5885 bool needUpdateFixups = false;
5887
5888
5889
5890
5891
5896 Loop *PNLoop = LI.getLoopFor(Parent);
5897 if (!PNLoop || Parent != PNLoop->getHeader()) {
5898
5901 NewBB =
5903 CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
5904 .setMergeIdenticalEdges()
5905 .setKeepOneInputPHIs());
5906 } else {
5908 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
5910 NewBB = NewBBs[0];
5911 }
5912
5913
5914
5915 if (NewBB) {
5916
5917
5918
5919 if (L->contains(BB) && ->contains(PN))
5921
5922
5924 BB = NewBB;
5926
5927 needUpdateFixups = true;
5928 }
5929 }
5930 }
5931
5932 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
5934 if (!Pair.second)
5936 else {
5939
5940
5941 Type *OpTy = LF.OperandValToReplace->getType();
5942 if (FullV->getType() != OpTy)
5945 LF.OperandValToReplace->getType(), "tmp",
5947
5948
5949
5950
5952 if (L->contains(I) && ->contains(BB))
5953 InsertedNonLCSSAInsts.insert(I);
5954
5956 Pair.first->second = FullV;
5957 }
5958
5959
5960
5961
5962
5963 if (needUpdateFixups) {
5964 for (LSRUse &LU : Uses)
5965 for (LSRFixup &Fixup : LU.Fixups)
5966
5967
5968
5969 if (Fixup.UserInst == PN) {
5970
5971
5972 bool foundInOriginalPHI = false;
5974 if (val == Fixup.OperandValToReplace) {
5975 foundInOriginalPHI = true;
5976 break;
5977 }
5978
5979
5980 if (foundInOriginalPHI)
5981 continue;
5982
5983
5984
5985
5988 ++I) {
5991 if (val == Fixup.OperandValToReplace)
5992 Fixup.UserInst = NewPN;
5993 }
5994 }
5995 }
5996 }
5997}
5998
5999
6000
6001
6002void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
6003 const Formula &F,
6004 SmallVectorImpl &DeadInsts) {
6005
6006
6008 RewriteForPHI(PN, LU, LF, F, DeadInsts);
6009 } else {
6010 Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);
6011
6012
6013 Type *OpTy = LF.OperandValToReplace->getType();
6014 if (FullV->getType() != OpTy) {
6017 FullV, OpTy, "tmp", LF.UserInst->getIterator());
6018 FullV = Cast;
6019 }
6020
6021
6022
6023
6024
6025
6026 if (LU.Kind == LSRUse::ICmpZero)
6028 else
6030 }
6031
6034}
6035
6036
6037
6038
6039
6040
6042 const LSRFixup &Fixup, const LSRUse &LU,
6045
6046 if (LU.Kind != LSRUse::Address)
6047 return IVIncInsertPos;
6048
6049
6054 return IVIncInsertPos;
6055
6056
6057
6058 BasicBlock *HoistBlock = I->getParent();
6061 return IVIncInsertPos;
6062
6064}
6065
6066
6067
6068void LSRInstance::ImplementSolution(
6069 const SmallVectorImpl<const Formula *> &Solution) {
6070
6071
6073
6074
6075 for (const IVChain &Chain : IVChainVec) {
6078 }
6079
6080
6081 for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
6082 for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
6085 Rewriter.setIVIncInsertPos(L, InsertPos);
6086 Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
6088 }
6089
6090 auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6092
6093 for (const IVChain &Chain : IVChainVec) {
6094 GenerateIVChain(Chain, DeadInsts);
6096 }
6097
6098 for (const WeakVH &IV : Rewriter.getInsertedIVs())
6101
6102
6103
6105
6107 &TLI, MSSAU);
6108
6109
6110
6111
6112
6113
6114
6115
6116 for (PHINode &PN : L->getHeader()->phis()) {
6117 BinaryOperator *BO = nullptr;
6118 Value *Start = nullptr, *Step = nullptr;
6120 continue;
6121
6123 case Instruction::Sub:
6125
6126 continue;
6127 break;
6128 case Instruction::Add:
6129 break;
6130 default:
6131 continue;
6132 };
6133
6135
6136
6137 continue;
6138
6140
6141 continue;
6142
6143
6145 [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6146 continue;
6149 }
6150
6151
6152}
6153
6154LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6155 DominatorTree &DT, LoopInfo &LI,
6156 const TargetTransformInfo &TTI, AssumptionCache &AC,
6157 TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
6158 : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
6161 : TTI.getPreferredAddressingMode(L, &SE)),
6163
6164 if (->isLoopSimplifyForm())
6165 return;
6166
6167
6169
6170
6171
6172 unsigned NumUsers = 0;
6175 (void)U;
6176 LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
6177 << "\n");
6178 return;
6179 }
6180
6181
6182
6184 auto FirstNonPHI = PN->getParent()->getFirstNonPHIIt();
6189 return;
6190 }
6191 }
6192
6194 L->getHeader()->printAsOperand(dbgs(), false);
6195 dbgs() << ":\n");
6196
6197
6198
6200 HardwareLoopProfitable =
6201 TTI.isHardwareLoopProfitable(L, SE, AC, &TLI, HWLoopInfo);
6202
6203
6204
6205#if LLVM_ENABLE_ABI_BREAKING_CHECKS
6207#endif
6208 Rewriter.disableCanonicalMode();
6209 Rewriter.enableLSRMode();
6210
6211
6212 OptimizeShadowIV();
6213 OptimizeLoopTermCond();
6214
6215
6216 if (IU.empty()) return;
6217
6218
6219 if (->isInnermost()) {
6220 LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
6221 return;
6222 }
6223
6224
6225
6226
6227
6228
6229
6231 CollectChains();
6232 CollectInterestingTypesAndFactors();
6233 CollectFixupsAndInitialFormulae();
6234 CollectLoopInvariantFixupsAndFormulae();
6235
6236 if (Uses.empty())
6237 return;
6238
6240 print_uses(dbgs()));
6241 LLVM_DEBUG(dbgs() << "The baseline solution requires ";
6242 BaselineCost.print(dbgs()); dbgs() << "\n");
6243
6244
6245
6246 GenerateAllReuseFormulae();
6247
6248 FilterOutUndesirableDedicatedRegisters();
6249 NarrowSearchSpaceUsingHeuristics();
6250
6252 Solve(Solution);
6253
6254
6255 Factors.clear();
6256 Types.clear();
6257 RegUses.clear();
6258
6259 if (Solution.empty())
6260 return;
6261
6262#ifndef NDEBUG
6263
6264 for (const LSRUse &LU : Uses) {
6265 for (const Formula &F : LU.Formulae)
6266 assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
6267 F) && "Illegal formula generated!");
6268 };
6269#endif
6270
6271
6272 ImplementSolution(Solution);
6273}
6274
6275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
6276void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
6277 if (Factors.empty() && Types.empty()) return;
6278
6279 OS << "LSR has identified the following interesting factors and types: ";
6280 bool First = true;
6281
6282 for (int64_t Factor : Factors) {
6283 if () OS << ", ";
6285 OS << '*' << Factor;
6286 }
6287
6288 for (Type *Ty : Types) {
6289 if () OS << ", ";
6291 OS << '(' << *Ty << ')';
6292 }
6293 OS << '\n';
6294}
6295
6296void LSRInstance::print_fixups(raw_ostream &OS) const {
6297 OS << "LSR is examining the following fixup sites:\n";
6298 for (const LSRUse &LU : Uses)
6299 for (const LSRFixup &LF : LU.Fixups) {
6300 dbgs() << " ";
6301 LF.print(OS);
6302 OS << '\n';
6303 }
6304}
6305
6306void LSRInstance::print_uses(raw_ostream &OS) const {
6307 OS << "LSR is examining the following uses:\n";
6308 for (const LSRUse &LU : Uses) {
6309 dbgs() << " ";
6310 LU.print(OS);
6311 OS << '\n';
6312 for (const Formula &F : LU.Formulae) {
6313 OS << " ";
6314 F.print(OS);
6315 OS << '\n';
6316 }
6317 }
6318}
6319
6320void LSRInstance::print(raw_ostream &OS) const {
6321 print_factors_and_types(OS);
6322 print_fixups(OS);
6323 print_uses(OS);
6324}
6325
6328}
6329#endif
6330
6331namespace {
6332
6333class LoopStrengthReduce : public LoopPass {
6334public:
6335 static char ID;
6336
6337 LoopStrengthReduce();
6338
6339private:
6340 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
6341 void getAnalysisUsage(AnalysisUsage &AU) const override;
6342};
6343
6344}
6345
6346LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
6348}
6349
6350void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
6351
6352
6354
6358 AU.addRequired();
6359 AU.addPreserved();
6360 AU.addRequired();
6361 AU.addPreserved();
6362 AU.addRequired();
6363 AU.addRequired();
6364
6365
6369 AU.addRequired();
6371}
6372
6373namespace {
6374
6375
6377ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
6378 llvm::DIExpression::expr_op_iterator Begin =
6379 llvm::DIExpression::expr_op_iterator(Expr.begin());
6380 llvm::DIExpression::expr_op_iterator End =
6381 llvm::DIExpression::expr_op_iterator(Expr.end());
6382 return {Begin, End};
6383}
6384
6385struct SCEVDbgValueBuilder {
6386 SCEVDbgValueBuilder() = default;
6387 SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
6388
6389 void clone(const SCEVDbgValueBuilder &Base) {
6390 LocationOps = Base.LocationOps;
6391 Expr = Base.Expr;
6392 }
6393
6394 void clear() {
6395 LocationOps.clear();
6396 Expr.clear();
6397 }
6398
6399
6401
6402 SmallVector<Value *, 2> LocationOps;
6403
6404 void pushOperator(uint64_t Op) { Expr.push_back(Op); }
6405 void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
6406
6407
6408
6411 auto *It = llvm::find(LocationOps, V);
6412 unsigned ArgIndex = 0;
6413 if (It != LocationOps.end()) {
6414 ArgIndex = std::distance(LocationOps.begin(), It);
6415 } else {
6416 ArgIndex = LocationOps.size();
6418 }
6420 }
6421
6422 void pushValue(const SCEVUnknown *U) {
6424 pushLocation(V);
6425 }
6426
6427 bool pushConst(const SCEVConstant *C) {
6428 if (C->getAPInt().getSignificantBits() > 64)
6429 return false;
6430 Expr.push_back(llvm::dwarf::DW_OP_consts);
6431 Expr.push_back(C->getAPInt().getSExtValue());
6432 return true;
6433 }
6434
6435
6436
6438 return ToDwarfOpIter(Expr);
6439 }
6440
6441
6442
6443 bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
6444 uint64_t DwarfOp) {
6446 "Expected arithmetic SCEV type");
6448 unsigned EmitOperator = 0;
6449 for (const auto &Op : CommExpr->operands()) {
6451
6452 if (EmitOperator >= 1)
6453 pushOperator(DwarfOp);
6454 ++EmitOperator;
6455 }
6457 }
6458
6459
6460 bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
6461 const llvm::SCEV *Inner = C->getOperand(0);
6462 const llvm::Type *Type = C->getType();
6463 uint64_t ToWidth = Type->getIntegerBitWidth();
6464 bool Success = pushSCEV(Inner);
6466 IsSigned ? llvm::dwarf::DW_ATE_signed
6467 : llvm::dwarf::DW_ATE_unsigned};
6468 for (const auto &Op : CastOps)
6469 pushOperator(Op);
6471 }
6472
6473
6474 bool pushSCEV(const llvm::SCEV *S) {
6477 Success &= pushConst(StartInt);
6478
6480 if (->getValue())
6481 return false;
6482 pushLocation(U->getValue());
6483
6485 Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6486
6488 Success &= pushSCEV(UDiv->getLHS());
6489 Success &= pushSCEV(UDiv->getRHS());
6490 pushOperator(llvm::dwarf::DW_OP_div);
6491
6493
6496 "Unexpected cast type in SCEV.");
6498
6500 Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6501
6503
6504
6505 return false;
6506
6507 } else {
6508 return false;
6509 }
6511 }
6512
6513
6514
6515 bool isIdentityFunction(uint64_t Op, const SCEV *S) {
6517 if (C->getAPInt().getSignificantBits() > 64)
6518 return false;
6519 int64_t I = C->getAPInt().getSExtValue();
6520 switch (Op) {
6521 case llvm::dwarf::DW_OP_plus:
6522 case llvm::dwarf::DW_OP_minus:
6523 return I == 0;
6524 case llvm::dwarf::DW_OP_mul:
6525 case llvm::dwarf::DW_OP_div:
6526 return I == 1;
6527 }
6528 }
6529 return false;
6530 }
6531
6532
6533
6534
6535
6536
6537
6538 bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6542
6543
6544 if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6545 if (!pushSCEV(Stride))
6546 return false;
6547 pushOperator(llvm::dwarf::DW_OP_mul);
6548 }
6549 if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6550 if (!pushSCEV(Start))
6551 return false;
6552 pushOperator(llvm::dwarf::DW_OP_plus);
6553 }
6554 return true;
6555 }
6556
6557
6558 void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
6559 pushLocation(OffsetValue);
6562 dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
6563 << std::to_string(Offset) << "\n");
6564 }
6565
6566
6567
6568
6569 bool createIterCountExpr(const SCEV *S,
6570 const SCEVDbgValueBuilder &IterationCount,
6571 ScalarEvolution &SE) {
6572
6573
6574
6575
6576
6578 return false;
6579
6580 LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
6581 << '\n');
6582
6584 if (!Rec->isAffine())
6585 return false;
6586
6588 return false;
6589
6590
6591
6592 clone(IterationCount);
6593 if (!SCEVToValueExpr(*Rec, SE))
6594 return false;
6595
6596 return true;
6597 }
6598
6599
6600
6601
6602
6603
6604 bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
6605 ScalarEvolution &SE) {
6609
6610
6611 if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6612 if (!pushSCEV(Start))
6613 return false;
6614 pushOperator(llvm::dwarf::DW_OP_minus);
6615 }
6616 if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6617 if (!pushSCEV(Stride))
6618 return false;
6619 pushOperator(llvm::dwarf::DW_OP_div);
6620 }
6621 return true;
6622 }
6623
6624
6625
6626
6627 void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
6628 SmallVectorImpl<Value *> &DestLocations) {
6630 "Expected the locations vector to contain the IV");
6631
6632
6633
6635 "Expected the location ops to contain the IV.");
6636
6637
6639 for (const auto &Op : LocationOps) {
6640 auto It = find(DestLocations, Op);
6641 if (It != DestLocations.end()) {
6642
6643 DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
6644 continue;
6645 }
6646
6649 }
6650
6651 for (const auto &Op : expr_ops()) {
6653 Op.appendToVector(DestExpr);
6654 continue;
6655 }
6656
6658
6659
6660 uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
6662 }
6663 }
6664};
6665
6666
6667
6668struct DVIRecoveryRec {
6669 DVIRecoveryRec(DbgVariableRecord *DVR)
6670 : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
6671
6672 DbgVariableRecord *DbgRef;
6673 DIExpression *Expr;
6674 bool HadLocationArgList;
6678
6679 void clear() {
6680 for (auto &RE : RecoveryExprs)
6681 RE.reset();
6682 RecoveryExprs.clear();
6683 }
6684
6685 ~DVIRecoveryRec() { clear(); }
6686};
6687}
6688
6689
6690
6691
6693 auto expr_ops = ToDwarfOpIter(Expr);
6694 unsigned Count = 0;
6695 for (auto Op : expr_ops)
6699}
6700
6701
6702
6703
6704template
6708 "contain any DW_OP_llvm_arg operands.");
6712}
6713
6714
6715template
6720 "Expected expression that references DIArglist locations using "
6721 "DW_OP_llvm_arg operands.");
6723 for (Value *V : Locations)
6726 DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));
6728}
6729
6730
6731
6732
6733
6734
6740 if (NumLLVMArgs == 0) {
6741
6744
6745
6747 "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
6750 } else {
6751
6753 }
6754
6755
6756
6757
6760 SalvageExpr = DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
6762 }
6763}
6764
6765
6766
6767
6768
6769
6773
6774
6777 LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
6778 << "scev-salvage: post-LSR: " << *DbgVal << '\n');
6779 assert(DVIRec.Expr && "Expected an expression");
6781
6782
6783
6784 if (!DVIRec.HadLocationArgList) {
6785 assert(DVIRec.LocationOps.size() == 1 &&
6786 "Unexpected number of location ops.");
6787
6788
6789
6790 Value *CachedValue =
6793 } else {
6795 for (WeakVH VH : DVIRec.LocationOps) {
6798 }
6802 }
6803 LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');
6804}
6805
6807 llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
6808 const SCEV *SCEVInductionVar,
6809 SCEVDbgValueBuilder IterCountExpr) {
6810
6812 return false;
6813
6814
6815
6816
6817
6819
6820
6821
6823 LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
6825 NewLocationOps.push_back(LSRInductionVar);
6826
6827 for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
6828 WeakVH VH = DVIRec.LocationOps[i];
6829
6830
6831
6834 LocationOpIndexMap[i] = NewLocationOps.size() - 1;
6835 LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
6836 << " now at index " << LocationOpIndexMap[i] << "\n");
6837 continue;
6838 }
6839
6840
6841
6844 LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
6845 << " refers to a location that is now undef or erased. "
6846 "Salvage abandoned.\n");
6847 return false;
6848 }
6849
6850 LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
6851 << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
6852
6853 DVIRec.RecoveryExprs[i] = std::make_unique();
6854 SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
6855
6856
6857
6858 if (std::optional Offset =
6860 if (Offset->getSignificantBits() <= 64)
6861 SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);
6862 else
6863 return false;
6864 } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
6865 SE))
6866 return false;
6867 }
6868
6869
6870
6873 assert(DVIRec.RecoveryExprs.size() == 1 &&
6874 "Expected only a single recovery expression for an empty "
6875 "DIExpression.");
6876 assert(DVIRec.RecoveryExprs[0] &&
6877 "Expected a SCEVDbgSalvageBuilder for location 0");
6878 SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
6879 B->appendToVectors(NewExpr, NewLocationOps);
6880 }
6881 for (const auto &Op : DVIRec.Expr->expr_ops()) {
6882
6885 continue;
6886 }
6887
6888 uint64_t LocationArgIndex = Op.getArg(0);
6889 SCEVDbgValueBuilder *DbgBuilder =
6890 DVIRec.RecoveryExprs[LocationArgIndex].get();
6891
6892
6893
6894 if (!DbgBuilder) {
6896 assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
6897 "Expected a positive index for the location-op position.");
6898 NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
6899 continue;
6900 }
6901
6902 DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
6903 }
6904
6906 LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " << *DVIRec.DbgRef << "\n");
6907 return true;
6908}
6909
6910
6911
6914 SmallVector<std::unique_ptr, 2> &DVIToUpdate) {
6915 if (DVIToUpdate.empty())
6916 return;
6917
6918 const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
6919 assert(SCEVInductionVar &&
6920 "Anticipated a SCEV for the post-LSR induction variable");
6921
6924 if (!IVAddRec->isAffine())
6925 return;
6926
6927
6929 return;
6930
6931
6932 SCEVDbgValueBuilder IterCountExpr;
6933 IterCountExpr.pushLocation(LSRInductionVar);
6934 if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6935 return;
6936
6937 LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
6938 << '\n');
6939
6940 for (auto &DVIRec : DVIToUpdate) {
6941 SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
6942 IterCountExpr);
6943 }
6944 }
6945}
6946
6947
6948
6949
6952 SmallVector<std::unique_ptr, 2> &SalvageableDVISCEVs) {
6953 for (const auto &B : L->getBlocks()) {
6956 if (!DbgVal.isDbgValue() && !DbgVal.isDbgAssign())
6957 continue;
6958
6959
6960
6961 if (DbgVal.isKillLocation())
6962 continue;
6963
6964
6965
6966 const auto &HasTranslatableLocationOps =
6968 for (const auto LocOp : DbgValToTranslate.location_ops()) {
6969 if (!LocOp)
6970 return false;
6971
6972 if (!SE.isSCEVable(LocOp->getType()))
6973 return false;
6974
6977 return false;
6978 }
6979 return true;
6980 };
6981
6982 if (!HasTranslatableLocationOps(DbgVal))
6983 continue;
6984
6985 std::unique_ptr NewRec =
6986 std::make_unique(&DbgVal);
6987
6988
6989
6990 NewRec->RecoveryExprs.resize(DbgVal.getNumVariableLocationOps());
6991 for (const auto LocOp : DbgVal.location_ops()) {
6992 NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
6993 NewRec->LocationOps.push_back(LocOp);
6994 NewRec->HadLocationArgList = DbgVal.hasArgList();
6995 }
6996 SalvageableDVISCEVs.push_back(std::move(NewRec));
6997 }
6998 }
6999 }
7000}
7001
7002
7003
7004
7006 const LSRInstance &LSR) {
7007
7008 auto IsSuitableIV = [&](PHINode *P) {
7010 return false;
7013 return false;
7014 };
7015
7016
7017
7018
7019 for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
7020 if ()
7021 continue;
7022
7023
7025
7026 if (IsSuitableIV(P))
7027 return P;
7028 }
7029
7030 for (PHINode &P : L.getHeader()->phis()) {
7031 if (IsSuitableIV(&P))
7032 return &P;
7033 }
7034 return nullptr;
7035}
7036
7042
7043
7044
7047
7049 std::unique_ptr MSSAU;
7050 if (MSSA)
7051 MSSAU = std::make_unique(MSSA);
7052
7053
7054 const LSRInstance &Reducer =
7055 LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
7056 Changed |= Reducer.getChanged();
7057
7058
7063#if LLVM_ENABLE_ABI_BREAKING_CHECKS
7065#endif
7066 unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
7067 Rewriter.clear();
7068 if (numFolded) {
7071 MSSAU.get());
7073 }
7074 }
7075
7076
7077
7078
7079
7080 if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
7085 Rewriter.clear();
7086 if (Rewrites) {
7089 MSSAU.get());
7091 }
7092 }
7093
7094 if (SalvageableDVIRecords.empty())
7096
7097
7098
7099
7100 for (const auto &L : LI) {
7103 else {
7104 LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
7105 "could not be identified.\n");
7106 }
7107 }
7108
7109 for (auto &Rec : SalvageableDVIRecords)
7110 Rec->clear();
7111 SalvageableDVIRecords.clear();
7113}
7114
7115bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & ) {
7116 if (skipLoop(L))
7117 return false;
7118
7119 auto &IU = getAnalysis().getIU();
7120 auto &SE = getAnalysis().getSE();
7121 auto &DT = getAnalysis().getDomTree();
7122 auto &LI = getAnalysis().getLoopInfo();
7123 const auto &TTI = getAnalysis().getTTI(
7124 *L->getHeader()->getParent());
7125 auto &AC = getAnalysis().getAssumptionCache(
7126 *L->getHeader()->getParent());
7127 auto &TLI = getAnalysis().getTLI(
7128 *L->getHeader()->getParent());
7129 auto *MSSAAnalysis = getAnalysisIfAvailable();
7131 if (MSSAAnalysis)
7132 MSSA = &MSSAAnalysis->getMSSA();
7134}
7135
7142
7144 if (AR.MSSA)
7146 return PA;
7147}
7148
7149char LoopStrengthReduce::ID = 0;
7150
7152 "Loop Strength Reduction", false, false)
7161
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
Function Alias Analysis false
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This file contains constants used for implementing Dwarf debug support.
early cse Early CSE w MemorySSA
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This header provides classes for managing per-loop analyses.
static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, const SCEV *SCEVInductionVar, SCEVDbgValueBuilder IterCountExpr)
Definition LoopStrengthReduce.cpp:6806
static cl::opt< bool > DropScaledForVScale("lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), cl::desc("Avoid using scaled registers with vscale-relative addressing"))
static Value * getWideOperand(Value *Oper)
IVChain logic must consistently peek base TruncInst operands, so wrap it in a convenient helper.
Definition LoopStrengthReduce.cpp:2993
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE)
Return true if the given add can be sign-extended without changing its value.
Definition LoopStrengthReduce.cpp:810
static bool mayUsePostIncMode(const TargetTransformInfo &TTI, LSRUse &LU, const SCEV *S, const Loop *L, ScalarEvolution &SE)
Return true if the SCEV represents a value that may end up as a post-increment operation.
Definition LoopStrengthReduce.cpp:3912
static void restorePreTransformState(DVIRecoveryRec &DVIRec)
Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
Definition LoopStrengthReduce.cpp:6775
static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a constant integer value, return that integer value,...
Definition LoopStrengthReduce.cpp:935
static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L)
Definition LoopStrengthReduce.cpp:615
static User::op_iterator findIVOperand(User::op_iterator OI, User::op_iterator OE, Loop *L, ScalarEvolution &SE)
Helper for CollectChains that finds an IV operand (computed by an AddRec in this loop) within [OI,...
Definition LoopStrengthReduce.cpp:2974
static cl::opt< TTI::AddressingModeKind > PreferredAddresingMode("lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), cl::desc("A flag that overrides the target's preferred addressing mode."), cl::values(clEnumValN(TTI::AMK_None, "none", "Don't prefer any addressing mode"), clEnumValN(TTI::AMK_PreIndexed, "preindexed", "Prefer pre-indexed addressing mode"), clEnumValN(TTI::AMK_PostIndexed, "postindexed", "Prefer post-indexed addressing mode"), clEnumValN(TTI::AMK_All, "all", "Consider all addressing modes")))
static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, Immediate MaxOffset, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg, int64_t Scale)
Test whether we know how to expand the current formula.
Definition LoopStrengthReduce.cpp:1916
static void DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &SalvageableDVISCEVs)
Identify and cache salvageable DVI locations and expressions along with the corresponding SCEV(s).
Definition LoopStrengthReduce.cpp:6950
static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE)
Return true if the given mul can be sign-extended without changing its value.
Definition LoopStrengthReduce.cpp:818
static const unsigned MaxSCEVSalvageExpressionSize
Limit the size of expression that SCEV-based salvaging will attempt to translate into a DIExpression.
Definition LoopStrengthReduce.cpp:144
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if this AddRec is already a phi in its loop.
Definition LoopStrengthReduce.cpp:1090
static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, const Loop &L)
Definition LoopStrengthReduce.cpp:1962
static cl::opt< bool > InsnsCost("lsr-insns-cost", cl::Hidden, cl::init(true), cl::desc("Add instruction count to a LSR cost model"))
static cl::opt< bool > StressIVChain("stress-ivchain", cl::Hidden, cl::init(false), cl::desc("Stress test LSR IV chains"))
static bool isAddressUse(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Returns true if the specified instruction is using the specified value as an address.
Definition LoopStrengthReduce.cpp:992
static GlobalValue * ExtractSymbol(const SCEV *&S, ScalarEvolution &SE)
If S involves the addition of a GlobalValue address, return that symbol, and mutate S to point to a n...
Definition LoopStrengthReduce.cpp:966
static void updateDVIWithLocation(T &DbgVal, Value *Location, SmallVectorImpl< uint64_t > &Ops)
Overwrites DVI with the location and Ops as the DIExpression.
Definition LoopStrengthReduce.cpp:6705
static bool isLegalAddImmediate(const TargetTransformInfo &TTI, Immediate Offset)
Definition LoopStrengthReduce.cpp:1937
static cl::opt< cl::boolOrDefault > AllowDropSolutionIfLessProfitable("lsr-drop-solution", cl::Hidden, cl::desc("Attempt to drop solution if it is less profitable"))
static cl::opt< bool > EnableVScaleImmediates("lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), cl::desc("Enable analysis of vscale-relative immediates in LSR"))
static Instruction * getFixupInsertPos(const TargetTransformInfo &TTI, const LSRFixup &Fixup, const LSRUse &LU, Instruction *IVIncInsertPos, DominatorTree &DT)
Definition LoopStrengthReduce.cpp:6041
static const SCEV * getExprBase(const SCEV *S)
Return an approximation of this SCEV expression's "base", or NULL for any constant.
Definition LoopStrengthReduce.cpp:3009
static bool isAlwaysFoldable(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, Immediate BaseOffset, bool HasBaseReg)
Definition LoopStrengthReduce.cpp:2007
static llvm::PHINode * GetInductionVariable(const Loop &L, ScalarEvolution &SE, const LSRInstance &LSR)
Ideally pick the PHI IV inserted by ScalarEvolutionExpander.
Definition LoopStrengthReduce.cpp:7005
static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, ScalarEvolution &SE, const SCEV *Best, const SCEV *Reg, MemAccessTy AccessType)
Definition LoopStrengthReduce.cpp:5319
static const unsigned MaxIVUsers
MaxIVUsers is an arbitrary threshold that provides an early opportunity for bail out.
Definition LoopStrengthReduce.cpp:138
static bool isHighCostExpansion(const SCEV *S, SmallPtrSetImpl< const SCEV * > &Processed, ScalarEvolution &SE)
Check if expanding this expression is likely to incur significant cost.
Definition LoopStrengthReduce.cpp:1110
static Value * getValueOrPoison(WeakVH &VH, LLVMContext &C)
Cached location ops may be erased during LSR, in which case a poison is required when restoring from ...
Definition LoopStrengthReduce.cpp:6770
static MemAccessTy getAccessType(const TargetTransformInfo &TTI, Instruction *Inst, Value *OperandVal)
Return the type of the memory being accessed.
Definition LoopStrengthReduce.cpp:1037
static unsigned numLLVMArgOps(SmallVectorImpl< uint64_t > &Expr)
Returns the total number of DW_OP_llvm_arg operands in the expression.
Definition LoopStrengthReduce.cpp:6692
static void DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, SmallVector< std::unique_ptr< DVIRecoveryRec >, 2 > &DVIToUpdate)
Obtain an expression for the iteration count, then attempt to salvage the dbg.value intrinsics.
Definition LoopStrengthReduce.cpp:6912
static cl::opt< bool > EnablePhiElim("enable-lsr-phielim", cl::Hidden, cl::init(true), cl::desc("Enable LSR phi elimination"))
static void UpdateDbgValue(DVIRecoveryRec &DVIRec, SmallVectorImpl< Value * > &NewLocationOps, SmallVectorImpl< uint64_t > &NewExpr)
Write the new expression and new location ops for the dbg.value.
Definition LoopStrengthReduce.cpp:6735
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE)
Return true if the given addrec can be sign-extended without changing its value.
Definition LoopStrengthReduce.cpp:802
static void DoInitialMatch(const SCEV *S, Loop *L, SmallVectorImpl< const SCEV * > &Good, SmallVectorImpl< const SCEV * > &Bad, ScalarEvolution &SE)
Recursion helper for initialMatch.
Definition LoopStrengthReduce.cpp:540
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F)
Check if the addressing mode defined by F is completely folded in LU at isel time.
Definition LoopStrengthReduce.cpp:1945
static cl::opt< bool > LSRExpNarrow("lsr-exp-narrow", cl::Hidden, cl::init(false), cl::desc("Narrow LSR complex solution using" " expectation of registers number"))
static cl::opt< bool > FilterSameScaledReg("lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Narrow LSR search space by filtering non-optimal formulae" " with the same ScaledReg and Scale"))
static void updateDVIWithLocations(T &DbgVal, SmallVectorImpl< Value * > &Locations, SmallVectorImpl< uint64_t > &Ops)
Overwrite DVI with locations placed into a DIArglist.
Definition LoopStrengthReduce.cpp:6716
static cl::opt< unsigned > ComplexityLimit("lsr-complexity-limit", cl::Hidden, cl::init(std::numeric_limits< uint16_t >::max()), cl::desc("LSR search space complexity limit"))
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, MemorySSA *MSSA)
Definition LoopStrengthReduce.cpp:7037
static bool isProfitableChain(IVChain &Chain, SmallPtrSetImpl< Instruction * > &Users, ScalarEvolution &SE, const TargetTransformInfo &TTI)
Return true if the number of registers needed for the chain is estimated to be less than the number r...
Definition LoopStrengthReduce.cpp:3076
static const SCEV * CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl< const SCEV * > &Ops, const Loop *L, ScalarEvolution &SE, unsigned Depth=0)
Split S into subexpressions which can be pulled out into separate registers.
Definition LoopStrengthReduce.cpp:3857
static const SCEV * getExactSDiv(const SCEV *LHS, const SCEV *RHS, ScalarEvolution &SE, bool IgnoreSignificantBits=false)
Return an expression for LHS /s RHS, if it can be determined and if the remainder is known to be zero...
Definition LoopStrengthReduce.cpp:830
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, Value *Operand, const TargetTransformInfo &TTI)
Return true if the IVInc can be folded into an addressing mode.
Definition LoopStrengthReduce.cpp:3368
static const SCEV * getAnyExtendConsideringPostIncUses(ArrayRef< PostIncLoopSet > Loops, const SCEV *Expr, Type *ToTy, ScalarEvolution &SE)
Extend/Truncate Expr to ToTy considering post-inc uses in Loops.
Definition LoopStrengthReduce.cpp:4410
static unsigned getSetupCost(const SCEV *Reg, unsigned Depth)
Definition LoopStrengthReduce.cpp:1380
static cl::opt< unsigned > SetupCostDepthLimit("lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), cl::desc("The limit on recursion depth for LSRs setup cost"))
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
uint64_t IntrinsicInst * II
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
SI optimize exec mask operations pre RA
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
static const unsigned UnknownAddressSpace
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This pass exposes codegen information to IR-level passes.
Virtual Register Rewriter
static const uint32_t IV[8]
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isNegative() const
Determine sign of this APInt.
LLVM_ABI APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
int64_t getSExtValue() const
Get sign extended value.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
InstListType::iterator iterator
Instruction iterators...
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
BinaryOps getOpcode() const
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
bool isUnconditional() const
Value * getCondition() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
static LLVM_ABI bool isValueValidForType(Type *Ty, uint64_t V)
This static method returns true if the type Ty is big enough to represent the value V.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI DIArgList * get(LLVMContext &Context, ArrayRef< ValueAsMetadata * > Args)
iterator_range< expr_op_iterator > expr_ops() const
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
unsigned getNumElements() const
static LLVM_ABI void appendOffset(SmallVectorImpl< uint64_t > &Ops, int64_t Offset)
Append Ops with operations to apply the Offset.
LLVM_ABI bool isComplex() const
Return whether the location is computed on the expression stack, meaning it cannot be a simple regist...
LLVM_ABI LLVMContext & getContext()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI bool isKillLocation() const
void setRawLocation(Metadata *NewLocation)
Use of this should generally be avoided; instead, replaceVariableLocationOp and addVariableLocationOp...
void setExpression(DIExpression *NewExpr)
DIExpression * getExpression() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
PointerType * getType() const
Global values are always pointers.
IVStrideUse - Keep track of one use of a strided induction variable.
void transformToPostInc(const Loop *L)
transformToPostInc - Transform the expression to post-inc form for the given loop.
Value * getOperandValToReplace() const
getOperandValToReplace - Return the Value of the operand in the user instruction that this IVStrideUs...
void setUser(Instruction *NewUser)
setUser - Assign a new user instruction for this use.
Analysis pass that exposes the IVUsers for a loop.
ilist< IVStrideUse >::const_iterator const_iterator
LLVM_ABI void print(raw_ostream &OS) const
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI Type * getAccessType() const LLVM_READONLY
Return the type this instruction accesses in memory, if any.
const char * getOpcodeName() const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
This class provides an interface for updating the loop pass manager based on mutations to the loop ne...
An instruction for reading from memory.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
unsigned getLoopDepth() const
Return the nesting level of this loop.
The legacy pass manager's analysis pass to compute loop information.
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)
Definition LoopStrengthReduce.cpp:7136
Represents a single loop in the control flow graph.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
An analysis that produces MemorySSA for a function.
Encapsulates MemorySSA, including all data associated with memory accesses.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
static unsigned getIncomingValueNumForOperand(unsigned i)
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This node represents an addition of some number of SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
ConstantInt * getValue() const
const APInt & getAPInt() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
This node represents multiplication of some number of SCEVs.
bool hasNoUnsignedWrap() const
bool hasNoSignedWrap() const
ArrayRef< const SCEV * > operands() const
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI ArrayRef< const SCEV * > operands() const
Return operands of this SCEV expression.
unsigned short getExpressionSize() const
LLVM_ABI bool isZero() const
Return true if the expression is a constant zero.
SCEVTypes getSCEVType() const
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getZero(Type *Ty)
Return a SCEV for the constant 0 of a specific type.
LLVM_ABI uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI const SCEV * getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L, SCEV::NoWrapFlags Flags)
Get an add recurrence expression for the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getAnyExtendExpr(const SCEV *Op, Type *Ty)
getAnyExtendExpr - Return a SCEV for the given operand extended with unspecified bits out to the give...
LLVM_ABI bool containsUndefs(const SCEV *S) const
Return true if the SCEV expression contains an undef value.
LLVM_ABI const SCEV * getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
LLVM_ABI const SCEV * getVScale(Type *Ty)
LLVM_ABI bool hasComputableLoopEvolution(const SCEV *S, const Loop *L)
Return true if the given SCEV changes value in a known way in the specified loop.
LLVM_ABI const SCEV * getPointerBase(const SCEV *V)
Transitively follow the chain of pointer-type operands until reaching a SCEV that does not have a sin...
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getUnknown(Value *V)
LLVM_ABI std::optional< APInt > computeConstantDifference(const SCEV *LHS, const SCEV *RHS)
Compute LHS - RHS and returns the result as an APInt if it is a constant, and std::nullopt if it isn'...
LLVM_ABI bool properlyDominates(const SCEV *S, const BasicBlock *BB)
Return true if elements that makes up the given SCEV properly dominate the specified basic block.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
LLVM_ABI bool containsErasedValue(const SCEV *S) const
Return true if the SCEV expression contains a Value that has been optimised out and is now a nullptr.
LLVMContext & getContext() const
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
iterator_range< const_set_bits_iterator > set_bits() const
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
size_type size() const
Returns the number of bits in this bitvector.
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
size_type count() const
Returns the number of bits which are set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static StackOffset get(int64_t Fixed, int64_t Scalable)
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI bool shouldDropLSRSolutionIfLessProfitable() const
Return true if LSR should drop a found solution if it's calculated to be less profitable than the bas...
LLVM_ABI bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
LLVM_ABI bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
LLVM_ABI unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
LLVM_ABI bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr, int64_t ScalableOffset=0) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
LLVM_ABI bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
LLVM_ABI bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
LLVM_ABI bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
LLVM_ABI bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
LLVM_ABI unsigned getNumberOfRegisters(unsigned ClassID) const
@ MIM_PostInc
Post-incrementing.
LLVM_ABI bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_PostIndexed
Prefer post-indexed addressing mode.
@ AMK_All
Consider all addressing modes.
@ AMK_PreIndexed
Prefer pre-indexed addressing mode.
@ AMK_None
Don't prefer any addressing mode.
LLVM_ABI bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isVoidTy() const
Return true if this is 'void'.
void setOperand(unsigned i, Value *Val)
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
A nullable Value handle that is nullable.
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
class_match< const SCEVVScale > m_SCEVVScale()
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
class_match< const SCEVConstant > m_SCEVConstant()
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bind_ty< const SCEVMulExpr > m_scev_Mul(const SCEVMulExpr *&V)
bool match(const SCEV *S, const Pattern &P)
class_match< const Loop > m_Loop()
cst_pred_ty< is_specific_cst > m_scev_SpecificInt(uint64_t V)
Match an SCEV constant with a plain unsigned integer.
class_match< const SCEV > m_SCEV()
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_arg
Only used in LLVM metadata.
@ DW_OP_LLVM_convert
Only used in LLVM metadata.
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
unsigned KindType
For isa, dyn_cast, etc operations on TelemetryInfo.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
bool operator!=(uint64_t V1, const APInt &V2)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI char & LoopSimplifyID
bool isa_and_nonnull(const Y &Val)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
DomTreeNodeBase< BasicBlock > DomTreeNode
AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager
The loop analysis manager.
LLVM_ABI bool matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO, Value *&Start, Value *&Step)
Attempt to match a simple first order recurrence cycle of the form: iv = phi Ty [Start,...
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI void initializeLoopStrengthReducePass(PassRegistry &)
auto reverse(ContainerTy &&C)
LLVM_ABI const SCEV * denormalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE)
Denormalize S to be post-increment for all loops present in Loops.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
FunctionAddr VTableAddr Count
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef< BasicBlock * > Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl< BasicBlock * > &NewBBs, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method transforms the landing pad, OrigBB, by introducing two new basic blocks into the function...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI const SCEV * normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops, ScalarEvolution &SE, bool CheckInvertible=true)
Normalize S to be post-increment for all loops present in Loops.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
LLVM_ABI Pass * createLoopStrengthReducePass()
Definition LoopStrengthReduce.cpp:7162
LLVM_ABI BasicBlock * SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
If this edge is a critical edge, insert a new node to split the critical edge.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
constexpr unsigned BitWidth
LLVM_ABI bool formLCSSAForInstructions(SmallVectorImpl< Instruction * > &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, SmallVectorImpl< PHINode * > *PHIsToRemove=nullptr, SmallVectorImpl< PHINode * > *InsertedPHIs=nullptr)
Ensures LCSSA form for every instruction from the Worklist in the scope of innermost containing loop.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
LLVM_ABI PreservedAnalyses getLoopPassPreservedAnalyses()
Returns the minimum set of Analyses that all loop passes must preserve.
SmallPtrSet< const Loop *, 2 > PostIncLoopSet
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector< WeakTrackingVH, 16 > &DeadInsts)
If the final value of any expressions that are recurrent in the loop can be computed,...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
bool SCEVExprContains(const SCEV *Root, PredTy Pred)
Return true if any node in Root satisfies the predicate Pred.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Attributes of a target dependent hardware loop.
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
TargetTransformInfo & TTI
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.