LLVM: lib/Target/SystemZ/SystemZTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
27
28using namespace llvm;
29
30#define DEBUG_TYPE "systemztti"
31
32
33
34
35
36
37
39 bool UsedAsMemCpySource = false;
44 continue;
45 }
47 if (Memcpy->getOperand(1) == V && !Memcpy->isVolatile()) {
48 UsedAsMemCpySource = true;
49 continue;
50 }
51 }
52 OtherUse = true;
53 }
54 return UsedAsMemCpySource;
55}
56
58 unsigned &NumLoads, const Function *F) {
60 return;
61 for (const User *U : Ptr->users())
63 if (User->getParent()->getParent() == F) {
65 if (SI->getPointerOperand() == Ptr && ->isVolatile())
66 NumStores++;
68 if (LI->getPointerOperand() == Ptr && !LI->isVolatile())
69 NumLoads++;
71 if (GEP->getPointerOperand() == Ptr)
73 }
74 }
75 }
76}
77
79 unsigned Bonus = 0;
82 if (!Callee)
83 return 0;
84
85
86
87 for (const Argument &Arg : Callee->args()) {
88 bool OtherUse = false;
90 Bonus = 1000;
91 break;
92 }
93 }
94
95
96
101 Ptr2NumUses.clear();
102 break;
103 }
105 if (->isVolatile())
107 Ptr2NumUses[GV]++;
109 if (!LI->isVolatile())
111 Ptr2NumUses[GV]++;
114 unsigned NumStores = 0, NumLoads = 0;
116 Ptr2NumUses[GV] += NumLoads + NumStores;
117 }
118 }
119 }
120
121 for (auto [Ptr, NumCalleeUses] : Ptr2NumUses)
122 if (NumCalleeUses > 10) {
123 unsigned CallerStores = 0, CallerLoads = 0;
125 if (CallerStores + CallerLoads > 10) {
126 Bonus = 1000;
127 break;
128 }
129 }
130
131
132 unsigned NumStores = 0;
133 unsigned NumLoads = 0;
134 for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
139 }
140 if (NumLoads > 10)
141 Bonus += NumLoads * 50;
142 if (NumStores > 10)
143 Bonus += NumStores * 50;
144 Bonus = std::min(Bonus, unsigned(1000));
145
147 dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
148 return Bonus;
149}
150
154 assert(Ty->isIntegerTy());
155
156 unsigned BitSize = Ty->getPrimitiveSizeInBits();
157
158
159 if (BitSize == 0)
161
162 if ((!ST->hasVector() && BitSize > 64) || BitSize > 128)
164
165 if (Imm == 0)
167
168 if (Imm.getBitWidth() <= 64) {
169
170 if (isInt<32>(Imm.getSExtValue()))
172
175
176 if ((Imm.getZExtValue() & 0xffffffff) == 0)
178
180 }
181
182
184}
185
190 assert(Ty->isIntegerTy());
191
192 unsigned BitSize = Ty->getPrimitiveSizeInBits();
193
194
195 if (BitSize == 0)
197
198 if (BitSize > 64)
200
201 switch (Opcode) {
202 default:
204 case Instruction::GetElementPtr:
205
206
207
208 if (Idx == 0)
211 case Instruction::Store:
212 if (Idx == 0 && Imm.getBitWidth() <= 64) {
213
214 if (BitSize == 8)
216
217 if (isInt<16>(Imm.getSExtValue()))
219 }
220 break;
221 case Instruction::ICmp:
222 if (Idx == 1 && Imm.getBitWidth() <= 64) {
223
224 if (isInt<32>(Imm.getSExtValue()))
226
229 }
230 break;
231 case Instruction::Add:
232 case Instruction::Sub:
233 if (Idx == 1 && Imm.getBitWidth() <= 64) {
234
237
240 }
241 break;
242 case Instruction::Mul:
243 if (Idx == 1 && Imm.getBitWidth() <= 64) {
244
245 if (isInt<32>(Imm.getSExtValue()))
247 }
248 break;
249 case Instruction::Or:
250 case Instruction::Xor:
251 if (Idx == 1 && Imm.getBitWidth() <= 64) {
252
255
256 if ((Imm.getZExtValue() & 0xffffffff) == 0)
258 }
259 break;
260 case Instruction::And:
261 if (Idx == 1 && Imm.getBitWidth() <= 64) {
262
263 if (BitSize <= 32)
265
268
269 if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
271
273 unsigned Start, End;
274 if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
276 }
277 break;
278 case Instruction::Shl:
279 case Instruction::LShr:
280 case Instruction::AShr:
281
282 if (Idx == 1)
284 break;
285 case Instruction::UDiv:
286 case Instruction::SDiv:
287 case Instruction::URem:
288 case Instruction::SRem:
289 case Instruction::Trunc:
290 case Instruction::ZExt:
291 case Instruction::SExt:
292 case Instruction::IntToPtr:
293 case Instruction::PtrToInt:
294 case Instruction::BitCast:
295 case Instruction::PHI:
296 case Instruction::Call:
297 case Instruction::Select:
298 case Instruction::Ret:
299 case Instruction::Load:
300 break;
301 }
302
304}
305
310 assert(Ty->isIntegerTy());
311
312 unsigned BitSize = Ty->getPrimitiveSizeInBits();
313
314
315 if (BitSize == 0)
317
318 if (BitSize > 64)
320
321 switch (IID) {
322 default:
324 case Intrinsic::sadd_with_overflow:
325 case Intrinsic::uadd_with_overflow:
326 case Intrinsic::ssub_with_overflow:
327 case Intrinsic::usub_with_overflow:
328
329 if (Idx == 1 && Imm.getBitWidth() <= 64) {
334 }
335 break;
336 case Intrinsic::smul_with_overflow:
337 case Intrinsic::umul_with_overflow:
338
339 if (Idx == 1 && Imm.getBitWidth() <= 64) {
340 if (isInt<32>(Imm.getSExtValue()))
342 }
343 break;
344 case Intrinsic::experimental_stackmap:
345 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
347 break;
348 case Intrinsic::experimental_patchpoint_void:
349 case Intrinsic::experimental_patchpoint:
350 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
352 break;
353 }
355}
356
360 if (ST->hasPopulationCount() && TyWidth <= 64)
363}
364
368
369
370 bool HasCall = false;
372 for (auto &BB : L->blocks())
373 for (auto &I : *BB) {
377 HasCall = true;
378 if (F->getIntrinsicID() == Intrinsic::memcpy ||
379 F->getIntrinsicID() == Intrinsic::memset)
380 NumStores++;
381 } else {
382 HasCall = true;
383 }
384 }
386 Type *MemAccessTy = I.getOperand(0)->getType();
389 }
390 }
391
392
393
394
395 unsigned const NumStoresVal = NumStores.getValue();
396 unsigned const Max = (NumStoresVal ? (12 / NumStoresVal) : UINT_MAX);
397
398 if (HasCall) {
399
402 return;
403 }
404
407 return;
408
409
411
414
415
417
419}
420
425
438
440 bool Vector = (ClassID == 1);
442
443
444 return 14;
445 if (ST->hasVector())
446 return 32;
447 return 0;
448}
449
452 switch (K) {
459 }
460
462}
463
465 unsigned NumStridedMemAccesses,
466 unsigned NumPrefetches,
467 bool HasCall) const {
468
469 if (NumPrefetches > 16)
470 return UINT_MAX;
471
472
473
474 if (NumStridedMemAccesses > 32 && !HasCall &&
475 (NumMemAccesses - NumStridedMemAccesses) * 32 <= NumStridedMemAccesses)
476 return 1;
477
478 return ST->hasMiscellaneousExtensions3() ? 8192 : 2048;
479}
480
482 EVT VT = TLI->getValueType(DL, DataType);
484}
485
490 }
491 return false;
492}
493
495 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
500
501 if (Insert && Ty->isIntOrIntVectorTy(64)) {
502
503
505 "Type does not match the number of values.");
507 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
509 ++CurrVectorCost;
510 if (Idx % 2 == 1) {
512 CurrVectorCost = 0;
513 }
514 }
515 Insert = false;
516 }
517
521}
522
523
524
526 unsigned Size =
527 (Ty->isPtrOrPtrVectorTy() ? 64U : Ty->getScalarSizeInBits());
528 assert(Size > 0 && "Element must have non-zero size.");
530}
531
532
533
534
538 assert(WideBits > 0 && "Could not compute size of vector");
539 return ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
540}
541
546
547
550 Op2Info, Args, CxtI);
551
552
553
554
555
556
557
558 unsigned ScalarBits = Ty->getScalarSizeInBits();
559
560
561
562
563
564 const unsigned DivInstrCost = 20;
565 const unsigned DivMulSeqCost = 10;
566 const unsigned SDivPow2Cost = 4;
567
568 bool SignedDivRem =
569 Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
570 bool UnsignedDivRem =
571 Opcode == Instruction::UDiv || Opcode == Instruction::URem;
572
573
574 bool DivRemConst = false;
575 bool DivRemConstPow2 = false;
576 if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
579 (C->getType()->isVectorTy()
584 DivRemConstPow2 = true;
585 else
586 DivRemConst = true;
587 }
588 }
589
590 if (!Ty->isVectorTy()) {
591
592
593
594 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
595 Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
596 return 1;
597
598
599 if (Opcode == Instruction::FRem)
600 return LIBCALL_COST;
601
602
603 if (Args.size() == 2) {
604 if (Opcode == Instruction::Xor) {
605 for (const Value *A : Args) {
607 if (I->hasOneUse() &&
608 (I->getOpcode() == Instruction::Or ||
609 I->getOpcode() == Instruction::And ||
610 I->getOpcode() == Instruction::Xor))
611 if ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
612 (isInt128InVR(Ty) &&
613 (I->getOpcode() == Instruction::Or || ST->hasVectorEnhancements1())))
614 return 0;
615 }
616 }
617 else if (Opcode == Instruction::And || Opcode == Instruction::Or) {
618 for (const Value *A : Args) {
620 if ((I->hasOneUse() && I->getOpcode() == Instruction::Xor) &&
621 ((ScalarBits <= 64 && ST->hasMiscellaneousExtensions3()) ||
622 (isInt128InVR(Ty) &&
623 (Opcode == Instruction::And || ST->hasVectorEnhancements1()))))
624 return 0;
625 }
626 }
627 }
628
629
630 if (Opcode == Instruction::Or)
631 return 1;
632
633 if (Opcode == Instruction::Xor && ScalarBits == 1) {
634 if (ST->hasLoadStoreOnCond2())
635 return 5;
636 return 7;
637 }
638
639 if (DivRemConstPow2)
640 return (SignedDivRem ? SDivPow2Cost : 1);
641 if (DivRemConst)
642 return DivMulSeqCost;
643 if (SignedDivRem || UnsignedDivRem)
644 return DivInstrCost;
645 }
646 else if (ST->hasVector()) {
648 unsigned VF = VTy->getNumElements();
650
651
652
653 if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
654 Opcode == Instruction::AShr) {
655 return NumVectors;
656 }
657
658 if (DivRemConstPow2)
659 return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
660 if (DivRemConst) {
662 return VF * DivMulSeqCost +
664 }
665 if (SignedDivRem || UnsignedDivRem) {
666 if (ST->hasVectorEnhancements3() && ScalarBits >= 32)
667 return NumVectors * DivInstrCost;
668 else if (VF > 4)
669
670
671
672
673 return 1000;
674 }
675
676
677
678
679
680 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
681 Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
682 switch (ScalarBits) {
683 case 32: {
684
685 if (ST->hasVectorEnhancements1())
686 return NumVectors;
687
688
693 (VF * ScalarCost) +
695
696
697 if (VF == 2)
700 }
701 case 64:
702 case 128:
703 return NumVectors;
704 default:
705 break;
706 }
707 }
708
709
710 if (Opcode == Instruction::FRem) {
713 (VF * LIBCALL_COST) +
715
716 if (VF == 2 && ScalarBits == 32)
719 }
720 }
721
722
724 Args, CxtI);
725}
726
734 if (ST->hasVector()) {
736
737
738
739
740
741
742 if (SrcTy->getScalarType()->isFP128Ty())
744
745 switch (Kind) {
747
748
749
750 return (Index == 0 ? 0 : NumVectors);
751
753
754
755
756
757 return NumVectors - 1;
758
759 default:
760
761
762 return NumVectors;
763 }
764 }
765
767 SubTp);
768}
769
770
774
775 if (Bits1 > Bits0)
777
779}
780
781
784 assert(SrcTy->getPrimitiveSizeInBits().getFixedValue() >
786 "Packing must reduce size of vector type.");
789 "Packing should not change number of elements.");
790
791
792
794 if (NumParts <= 2)
795
796
797
798
799
800 return 1;
801
802 unsigned Cost = 0;
805 for (unsigned P = 0; P < Log2Diff; ++P) {
806 if (NumParts > 1)
807 NumParts /= 2;
808 Cost += NumParts;
809 }
810
811
812
813
814 if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 &&
817
819}
820
821
822
824 Type *DstTy) const {
826 "Should only be called with vector types.");
827
828 unsigned PackCost = 0;
829 unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
832 if (SrcScalarBits > DstScalarBits)
833
835 else if (SrcScalarBits < DstScalarBits) {
837
838 PackCost = Log2Diff * DstNumParts;
839
840 PackCost += DstNumParts - 1;
841 }
842
843 return PackCost;
844}
845
846
847
849 Type *OpTy = nullptr;
851 OpTy = CI->getOperand(0)->getType();
853 if (LogicI->getNumOperands() == 2)
856 OpTy = CI0->getOperand(0)->getType();
857
858 if (OpTy != nullptr) {
859 if (VF == 1) {
861 return OpTy;
862 }
863
864
867 }
868
869 return nullptr;
870}
871
872
873
874unsigned
878 unsigned VF = DstVTy->getNumElements();
879 unsigned Cost = 0;
880
881
883 if (CmpOpTy != nullptr)
885 if (Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP)
886
889}
890
896
899 return BaseCost == 0 ? BaseCost : 1;
900 }
901
902 unsigned DstScalarBits = Dst->getScalarSizeInBits();
903 unsigned SrcScalarBits = Src->getScalarSizeInBits();
904
905 if (!Src->isVectorTy()) {
906 if (Dst->isVectorTy())
908
909 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP) {
910 if (Src->isIntegerTy(128))
911 return LIBCALL_COST;
912 if (SrcScalarBits >= 32 ||
914 return 1;
915 return SrcScalarBits > 1 ? 2 : 5 ;
916 }
917
918 if ((Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) &&
919 Dst->isIntegerTy(128))
920 return LIBCALL_COST;
921
922 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) {
923 if (Src->isIntegerTy(1)) {
924 if (DstScalarBits == 128) {
925 if (Opcode == Instruction::SExt && ST->hasVectorEnhancements3())
926 return 0;
927 return 5 ;
928 }
929
930 if (ST->hasLoadStoreOnCond2())
931 return 2;
932
933
934
935 unsigned Cost = 0;
936 if (Opcode == Instruction::SExt)
937 Cost = (DstScalarBits < 64 ? 3 : 4);
938 if (Opcode == Instruction::ZExt)
942
945 }
946 else if (isInt128InVR(Dst)) {
947
948
949 if (Opcode == Instruction::ZExt && I != nullptr)
951 if (Ld->hasOneUse())
952 return 1;
953 return 2;
954 }
955 }
956
957 if (Opcode == Instruction::Trunc && isInt128InVR(Src) && I != nullptr) {
959 if (Ld->hasOneUse())
960 return 0;
961 bool OnlyTruncatingStores = true;
962 for (const User *U : I->users())
964 OnlyTruncatingStores = false;
965 break;
966 }
967 if (OnlyTruncatingStores)
968 return 0;
969 return 2;
970 }
971 }
972 else if (ST->hasVector()) {
973
976 if (!DstVecTy) {
977
979 }
980 unsigned VF = SrcVecTy->getNumElements();
983
984 if (Opcode == Instruction::Trunc) {
985 if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
986 return 0;
988 }
989
990 if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
991 if (SrcScalarBits >= 8) {
992
993 if (Opcode == Instruction::ZExt)
994 return NumDstVectors;
995
996
998
999
1000
1001 unsigned NumSrcVectorOps =
1002 (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
1003 : (NumDstVectors / 2));
1004
1005 return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
1006 }
1007 else if (SrcScalarBits == 1)
1009 }
1010
1011 if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
1012 Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
1013
1014
1015
1016
1017 if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
1018 if (SrcScalarBits == DstScalarBits)
1019 return NumDstVectors;
1020
1021 if (SrcScalarBits == 1)
1023 }
1024
1025
1026
1027
1029 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind);
1031 bool NeedsInserts = true, NeedsExtracts = true;
1032
1033 if (DstScalarBits == 128 &&
1034 (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
1035 NeedsInserts = false;
1036 if (SrcScalarBits == 128 &&
1037 (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
1038 NeedsExtracts = false;
1039
1043 false, CostKind);
1044
1045
1046 if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
1047 TotCost *= 2;
1048
1049 return TotCost;
1050 }
1051
1052 if (Opcode == Instruction::FPTrunc) {
1053 if (SrcScalarBits == 128)
1054 return VF +
1056 false, CostKind);
1057 else
1058 return VF / 2 + std::max(1U, VF / 4 );
1059 }
1060
1061 if (Opcode == Instruction::FPExt) {
1062 if (SrcScalarBits == 32 && DstScalarBits == 64) {
1063
1064
1065
1066 return VF * 2;
1067 }
1068
1070 true, CostKind);
1071 }
1072 }
1073
1075}
1076
1077
1078
1080 unsigned ExtCost = 0;
1081 for (Value *Op : I->operands())
1082
1084 ExtCost++;
1085
1086 return ExtCost;
1087}
1088
1095 Op1Info, Op2Info);
1096
1097 if (!ValTy->isVectorTy()) {
1098 switch (Opcode) {
1099 case Instruction::ICmp: {
1100
1101
1102 unsigned ScalarBits = ValTy->getScalarSizeInBits();
1103 if (I != nullptr && (ScalarBits == 32 || ScalarBits == 64))
1106 if (!Ld->hasOneUse() && Ld->getParent() == I->getParent() &&
1107 C->isZero())
1108 return 0;
1109
1110 unsigned Cost = 1;
1111 if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
1113 return Cost;
1114 }
1115 case Instruction::Select:
1116 if (ValTy->isFloatingPointTy())
1117 return 4;
1118
1119
1120
1121 if (I != nullptr)
1123 if (CI->getOperand(0)->getType()->isIntegerTy(128))
1124 return ST->hasVectorEnhancements3() ? 1 : 4;
1125
1126
1127 return !isInt128InVR(ValTy) ? 1 : 4;
1128 }
1129 }
1130 else if (ST->hasVector()) {
1132
1133
1134 if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
1135 unsigned PredicateExtraCost = 0;
1136 if (I != nullptr) {
1137
1144 PredicateExtraCost = 1;
1145 break;
1150 PredicateExtraCost = 2;
1151 break;
1152 default:
1153 break;
1154 }
1155 }
1156
1157
1158
1159 unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
1161
1162 unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
1163 return Cost;
1164 }
1165 else {
1166 assert (Opcode == Instruction::Select);
1167
1168
1169
1170 unsigned PackCost = 0;
1172 if (CmpOpTy != nullptr)
1173 PackCost =
1175
1177 }
1178 }
1179
1181 Op1Info, Op2Info);
1182}
1183
1186 unsigned Index,
1187 const Value *Op0,
1188 const Value *Op1) const {
1189 if (Opcode == Instruction::InsertElement) {
1190
1192 return 0;
1193
1194
1195
1196
1198 return ((Index % 2 == 0) ? 1 : 0);
1199 }
1200
1201 if (Opcode == Instruction::ExtractElement) {
1203
1204
1207
1208 return Cost;
1209 }
1210
1212}
1213
1214
1216 const Instruction *&FoldedValue) const {
1218 return false;
1219 FoldedValue = Ld;
1222 unsigned TruncBits = 0;
1223 unsigned SExtBits = 0;
1224 unsigned ZExtBits = 0;
1228 TruncBits = UserBits;
1230 SExtBits = UserBits;
1232 ZExtBits = UserBits;
1233 }
1234 if (TruncBits || SExtBits || ZExtBits) {
1235 FoldedValue = UserI;
1237
1238 }
1239 if ((UserI->getOpcode() == Instruction::Sub ||
1240 UserI->getOpcode() == Instruction::SDiv ||
1241 UserI->getOpcode() == Instruction::UDiv) &&
1242 UserI->getOperand(1) != FoldedValue)
1243 return false;
1244
1245
1246 unsigned LoadOrTruncBits =
1247 ((SExtBits || ZExtBits) ? 0 : (TruncBits ? TruncBits : LoadedBits));
1249 case Instruction::Add:
1250 case Instruction::Sub:
1251 case Instruction::ICmp:
1252 if (LoadedBits == 32 && ZExtBits == 64)
1253 return true;
1254 [[fallthrough]];
1255 case Instruction::Mul:
1256 if (UserI->getOpcode() != Instruction::ICmp) {
1257 if (LoadedBits == 16 &&
1258 (SExtBits == 32 ||
1259 (SExtBits == 64 && ST->hasMiscellaneousExtensions2())))
1260 return true;
1261 if (LoadOrTruncBits == 16)
1262 return true;
1263 }
1264 [[fallthrough]];
1265 case Instruction::SDiv:
1266 if (LoadedBits == 32 && SExtBits == 64)
1267 return true;
1268 [[fallthrough]];
1269 case Instruction::UDiv:
1270 case Instruction::And:
1271 case Instruction::Or:
1272 case Instruction::Xor:
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284 if (UserI->getOpcode() == Instruction::ICmp)
1286 if (CI->getValue().isIntN(16))
1287 return true;
1288 return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
1289 break;
1290 }
1291 return false;
1292}
1293
1297 if (auto *F = CI->getCalledFunction())
1298 if (F->getIntrinsicID() == Intrinsic::bswap)
1299 return true;
1300 return false;
1301}
1302
1304 Align Alignment,
1309 assert(!Src->isVoidTy() && "Invalid type");
1310
1311
1313 return 1;
1314
1315 if (!Src->isVectorTy() && Opcode == Instruction::Load && I != nullptr) {
1316
1317 const Instruction *FoldedValue = nullptr;
1321
1322
1323
1324 for (unsigned i = 0; i < 2; ++i) {
1325 if (UserI->getOperand(i) == FoldedValue)
1326 continue;
1327
1330 if (!OtherLoad &&
1334 if (OtherLoad && isFoldableLoad(OtherLoad, FoldedValue))
1335 return i == 0;
1336 }
1337 }
1338
1339 return 0;
1340 }
1341 }
1342
1343
1344 if (TLI->getValueType(DL, Src, true) == MVT::Other)
1347
1348
1349 if (Src->isFP128Ty() && !ST->hasVectorEnhancements1())
1350 return 2;
1351
1354
1355
1356 if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
1357 I != nullptr) {
1358 if (Opcode == Instruction::Load && I->hasOneUse()) {
1360
1363 return 0;
1364 }
1366 const Value *StoredVal = SI->getValueOperand();
1368 return 0;
1369 }
1370 }
1371
1373}
1374
1375
1376
1377
1378
1379
1383 bool UseMaskForCond, bool UseMaskForGaps) const {
1384 if (UseMaskForCond || UseMaskForGaps)
1387 UseMaskForCond, UseMaskForGaps);
1389 "Expect a vector type for interleaved memory op");
1390
1392 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1393 unsigned VF = NumElts / Factor;
1396 unsigned NumPermutes = 0;
1397
1398 if (Opcode == Instruction::Load) {
1399
1400
1401
1402 BitVector UsedInsts(NumVectorMemOps, false);
1403 std::vector ValueVecs(Factor, BitVector(NumVectorMemOps, false));
1404 for (unsigned Index : Indices)
1405 for (unsigned Elt = 0; Elt < VF; ++Elt) {
1406 unsigned Vec = (Index + Elt * Factor) / NumEltsPerVecReg;
1407 UsedInsts.set(Vec);
1408 ValueVecs[Index].set(Vec);
1409 }
1410 NumVectorMemOps = UsedInsts.count();
1411
1412 for (unsigned Index : Indices) {
1413
1414
1415
1416 unsigned NumSrcVecs = ValueVecs[Index].count();
1418 assert (NumSrcVecs >= NumDstVecs && "Expected at least as many sources");
1419 NumPermutes += std::max(1U, NumSrcVecs - NumDstVecs);
1420 }
1421 } else {
1422
1423
1424
1425 unsigned NumSrcVecs = std::min(NumEltsPerVecReg, Factor);
1426 unsigned NumDstVecs = NumVectorMemOps;
1427 NumPermutes += (NumDstVecs * NumSrcVecs) - NumDstVecs;
1428 }
1429
1430
1431 return NumVectorMemOps + NumPermutes;
1432}
1433
1436
1437 Cost += NumVec - 1;
1438
1439 Cost += (ScalarBits < 32) ? 3 : 2;
1440 return Cost;
1441}
1442
1444 unsigned ScalarBits) {
1447
1448 Cost += NumVec - 1;
1449
1450
1451 Cost += 2 * Log2_32_Ceil(std::min(NumElems, NumEltsPerVecReg));
1452 return Cost;
1453}
1454
1456 return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
1457 Opcode == Instruction::Add || Opcode == Instruction::Mul;
1458}
1459
1462 std::optional FMF,
1464 unsigned ScalarBits = Ty->getScalarSizeInBits();
1465
1466
1471 unsigned NumElems = ((FixedVectorType *)Ty)->getNumElements();
1472
1473 if (Opcode == Instruction::Add)
1475
1478
1479 if ((Opcode == Instruction::FAdd) || (Opcode == Instruction::FMul))
1481 return Cost;
1482 }
1483
1485}
1486
1491
1492 if (ST->hasVectorEnhancements1()) {
1494 unsigned NumElems = ((FixedVectorType *)Ty)->getNumElements();
1495 unsigned ScalarBits = Ty->getScalarSizeInBits();
1497
1498 Cost += NumVectors - 1;
1499
1500
1502 return Cost;
1503 }
1504
1506}
1507
1508static int
1511 if (RetTy->isVectorTy() && ID == Intrinsic::bswap)
1513
1514 return -1;
1515}
1516
1522 if (Cost != -1)
1523 return Cost;
1525}
1526
1528
1529 if (!ST->hasVector())
1530 return true;
1531
1532
1533 switch (II->getIntrinsicID()) {
1534 default:
1535 return true;
1536
1537 case Intrinsic::vector_reduce_add:
1539
1540
1541
1542 return VType->getScalarSizeInBits() >= 64 ||
1544 }
1545}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
Expand Atomic instructions
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static unsigned InstrCount
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static unsigned getNumElements(Type *Ty)
bool customCostReductions(unsigned Opcode)
Definition SystemZTargetTransformInfo.cpp:1455
static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1)
Definition SystemZTargetTransformInfo.cpp:771
static bool isBswapIntrinsicCall(const Value *V)
Definition SystemZTargetTransformInfo.cpp:1294
InstructionCost getIntAddReductionCost(unsigned NumVec, unsigned ScalarBits)
Definition SystemZTargetTransformInfo.cpp:1434
static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores, unsigned &NumLoads, const Function *F)
Definition SystemZTargetTransformInfo.cpp:57
static unsigned getOperandsExtensionCost(const Instruction *I)
Definition SystemZTargetTransformInfo.cpp:1079
static Type * getCmpOpsType(const Instruction *I, unsigned VF=1)
Definition SystemZTargetTransformInfo.cpp:848
static unsigned getScalarSizeInBits(Type *Ty)
Definition SystemZTargetTransformInfo.cpp:525
static bool isFreeEltLoad(const Value *Op)
Definition SystemZTargetTransformInfo.cpp:486
InstructionCost getFastReductionCost(unsigned NumVec, unsigned NumElems, unsigned ScalarBits)
Definition SystemZTargetTransformInfo.cpp:1443
static int getVectorIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, const SmallVectorImpl< Type * > &ParamTys)
Definition SystemZTargetTransformInfo.cpp:1509
static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse)
Definition SystemZTargetTransformInfo.cpp:38
static unsigned getNumVectorRegs(Type *Ty)
Definition SystemZTargetTransformInfo.cpp:535
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
unsigned getNumberOfParts(Type *Tp) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
size_type count() const
count - Returns the number of bits which are set.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_UGE
unsigned greater or equal
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This instruction compares its operands according to the predicate given to the constructor.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class wraps the llvm.memcpy intrinsic.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue) const
Definition SystemZTargetTransformInfo.cpp:1215
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
Definition SystemZTargetTransformInfo.cpp:426
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
Definition SystemZTargetTransformInfo.cpp:1488
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
Definition SystemZTargetTransformInfo.cpp:494
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition SystemZTargetTransformInfo.cpp:728
unsigned getNumberOfRegisters(unsigned ClassID) const override
Definition SystemZTargetTransformInfo.cpp:439
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition SystemZTargetTransformInfo.cpp:421
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition SystemZTargetTransformInfo.cpp:1518
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const override
Definition SystemZTargetTransformInfo.cpp:464
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition SystemZTargetTransformInfo.cpp:542
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition SystemZTargetTransformInfo.cpp:307
unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) const
Definition SystemZTargetTransformInfo.cpp:823
unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I) const
Definition SystemZTargetTransformInfo.cpp:875
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition SystemZTargetTransformInfo.cpp:1184
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
Definition SystemZTargetTransformInfo.cpp:186
bool shouldExpandReduction(const IntrinsicInst *II) const override
Definition SystemZTargetTransformInfo.cpp:1527
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition SystemZTargetTransformInfo.cpp:358
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition SystemZTargetTransformInfo.cpp:1380
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition SystemZTargetTransformInfo.cpp:1461
bool hasDivRemOp(Type *DataType, bool IsSigned) const override
Definition SystemZTargetTransformInfo.cpp:481
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy) const
Definition SystemZTargetTransformInfo.cpp:782
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition SystemZTargetTransformInfo.cpp:365
unsigned adjustInliningThreshold(const CallBase *CB) const override
Definition SystemZTargetTransformInfo.cpp:78
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition SystemZTargetTransformInfo.cpp:1303
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition SystemZTargetTransformInfo.cpp:1089
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition SystemZTargetTransformInfo.cpp:451
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition SystemZTargetTransformInfo.cpp:152
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition SystemZTargetTransformInfo.cpp:891
virtual bool isLoweredToCall(const Function *F) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
iterator_range< user_iterator > users()
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
const unsigned VectorBits
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned Insns
TODO: Some of these could be merged.
Parameters that control the generic loop unrolling transformation.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...