LLVM: lib/Target/RISCV/RISCVTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
18#include "llvm/IR/IntrinsicsRISCV.h"
20#include
21#include
22using namespace llvm;
24
25#define DEBUG_TYPE "riscvtti"
26
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
33
35 "riscv-v-slp-max-vf",
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
40
43 cl::desc("Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
46
50
53 size_t NumInstr = OpCodes.size();
55 return NumInstr;
58 return LMULCost * NumInstr;
60 for (auto Op : OpCodes) {
61 switch (Op) {
62 case RISCV::VRGATHER_VI:
64 break;
65 case RISCV::VRGATHER_VV:
67 break;
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
71 break;
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
75 break;
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
91 break;
92 }
93 case RISCV::VFREDOSUM_VS: {
98 break;
99 }
100 case RISCV::VMV_X_S:
101 case RISCV::VMV_S_X:
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
104 case RISCV::VMOR_MM:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
109 case RISCV::VCPOP_M:
110 case RISCV::VFIRST_M:
112 break;
113 default:
114 Cost += LMULCost;
115 }
116 }
118}
119
124 bool FreeZeroes) {
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
127
128
129 if (Imm == 0)
131
132
134 false, FreeZeroes);
135}
136
142
143
144
145
147 uint64_t Mask = Imm.getZExtValue();
149 if (!BO || !BO->hasOneUse())
150 return false;
151
152 if (BO->getOpcode() != Instruction::Shl)
153 return false;
154
156 return false;
157
158 unsigned ShAmt = cast(BO->getOperand(1))->getZExtValue();
159
160
163 if (ShAmt == Trailing)
164 return true;
165 }
166
167 return false;
168}
169
170
171
172
173
176 return false;
177
178
180 if (!Cmp || !Cmp->isEquality())
181 return false;
182
183
185 if ()
186 return false;
187
188 uint64_t Mask = Imm.getZExtValue();
189
190
192 return false;
193
194
195 uint64_t CmpC = C->getZExtValue();
196 if ((CmpC & Mask) != CmpC)
197 return false;
198
199
200
203 return NewCmpC >= -2048 && NewCmpC <= 2048;
204}
205
210 assert(Ty->isIntegerTy() &&
211 "getIntImmCost can only estimate cost of materialising integers");
212
213
214 if (Imm == 0)
216
217
218
219 bool Takes12BitImm = false;
220 unsigned ImmArgIdx = ~0U;
221
222 switch (Opcode) {
223 case Instruction::GetElementPtr:
224
225
226
228 case Instruction::Store: {
229
230
231
232
233 if (Idx == 1 || !Inst)
235 true);
236
238 if (!getTLI()->allowsMemoryAccessForAlignment(
242
244 true);
245 }
246 case Instruction::Load:
247
249 case Instruction::And:
250
251 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
253
254 if (Imm == UINT64_C(0xffffffff) &&
255 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
257
258 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
260 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
263 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
266 Takes12BitImm = true;
267 break;
268 case Instruction::Add:
269 Takes12BitImm = true;
270 break;
271 case Instruction::Or:
272 case Instruction::Xor:
273
274 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
276 Takes12BitImm = true;
277 break;
278 case Instruction::Mul:
279
280 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
282
283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
285
286 Takes12BitImm = true;
287 break;
288 case Instruction::Sub:
289 case Instruction::Shl:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Takes12BitImm = true;
293 ImmArgIdx = 1;
294 break;
295 default:
296 break;
297 }
298
299 if (Takes12BitImm) {
300
302
303 if (Imm.getSignificantBits() <= 64 &&
306 }
307 }
308
309
311 }
312
313
315}
316
324
326 return ST->hasVInstructions();
327}
328
334
336 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
340
341
342
343 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
344 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
345 InputTypeA != InputTypeB || !InputTypeA->isIntegerTy(8) ||
348
351
352 return LT.first *
353 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second, CostKind);
354}
355
357
358
359
360 switch (II->getIntrinsicID()) {
361 default:
362 return false;
363
364 case Intrinsic::vector_reduce_mul:
365 case Intrinsic::vector_reduce_fmul:
366 return true;
367 }
368}
369
371 if (ST->hasVInstructions())
374}
375
377 if (ST->hasVInstructions())
378 if (unsigned MinVLen = ST->getRealMinVLen();
382}
383
386 unsigned LMUL =
388 switch (K) {
393 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
396 (ST->hasVInstructions() &&
399 : 0);
400 }
401
403}
404
406RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty,
408
409
410
411 return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
412 0, CostKind);
413}
414
416 unsigned Size = Mask.size();
418 return false;
419 for (unsigned I = 0; I != Size; ++I) {
420 if (static_cast<unsigned>(Mask[I]) == I)
421 continue;
422 if (Mask[I] != 0)
423 return false;
425 return false;
426 for (unsigned J = I + 1; J != Size; ++J)
427
428 if (static_cast<unsigned>(Mask[J]) != J % I)
429 return false;
430 SubVectorSize = I;
431 return true;
432 }
433
434 return false;
435}
436
446
447
448
449
450
456 "Expected fixed vector type and non-empty mask");
458
459 unsigned NumOfDests = divideCeil(Mask.size(), LegalNumElts);
460
461
462
463 if (NumOfDests <= 1 ||
465 Tp->getElementType()->getPrimitiveSizeInBits() ||
466 LegalNumElts >= Tp->getElementCount().getFixedValue())
468
469 unsigned VecTySize = TTI.getDataLayout().getTypeStoreSize(Tp);
470 unsigned LegalVTSize = LegalVT.getStoreSize();
471
472 unsigned NumOfSrcs = divideCeil(VecTySize, LegalVTSize);
473
475
476 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
477 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
478 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
480 assert(NormalizedVF >= Mask.size() &&
481 "Normalized mask expected to be not shorter than original mask.");
482 copy(Mask, NormalizedMask.begin());
486 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
487 [&](ArrayRef RegMask, unsigned SrcReg, unsigned DestReg) {
489 return;
490 if (!ReusedSingleSrcShuffles.insert(std::make_pair(RegMask, SrcReg))
491 .second)
492 return;
493 Cost += TTI.getShuffleCost(
496 SingleOpTy, RegMask, CostKind, 0, nullptr);
497 },
498 [&](ArrayRef RegMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
499 Cost += TTI.getShuffleCost(
502 SingleOpTy, RegMask, CostKind, 0, nullptr);
503 });
504 return Cost;
505}
506
507
508
509
510
511
512
513
514
515
516
517
518
519
522 std::optional VLen, VectorType *Tp,
525 if (!VLen || Mask.empty())
529 LegalVT = TTI.getTypeLegalizationCost(
531 .second;
532
535 if (NumOfDests <= 1 ||
537 Tp->getElementType()->getPrimitiveSizeInBits() ||
540
541 unsigned VecTySize = TTI.getDataLayout().getTypeStoreSize(Tp);
542 unsigned LegalVTSize = LegalVT.getStoreSize();
543
544 unsigned NumOfSrcs = divideCeil(VecTySize, LegalVTSize);
545
548
549 unsigned E = NumOfDests.getValue();
550 unsigned NormalizedVF =
555 assert(NormalizedVF >= Mask.size() &&
556 "Normalized mask expected to be not shorter than original mask.");
557 copy(Mask, NormalizedMask.begin());
559 int NumShuffles = 0;
562 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
563 [&](ArrayRef RegMask, unsigned SrcReg, unsigned DestReg) {
565 return;
566 if (!ReusedSingleSrcShuffles.insert(std::make_pair(RegMask, SrcReg))
567 .second)
568 return;
569 ++NumShuffles;
571 SingleOpTy, RegMask, CostKind, 0, nullptr);
572 },
573 [&](ArrayRef RegMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
575 SingleOpTy, RegMask, CostKind, 0, nullptr);
576 NumShuffles += 2;
577 });
578
579
580
581
582 if ((NumOfDestRegs > 2 && NumShuffles <= static_cast<int>(NumOfDestRegs)) ||
583 (NumOfDestRegs <= 2 && NumShuffles < 4))
584 return Cost;
586}
587
591
594
597
598 if (.second.isFixedLengthVector())
600
601
602
603 if (LT.first != 1)
605
606 auto GetSlideOpcode = [&](int SlideAmt) {
607 assert(SlideAmt != 0);
608 bool IsVI = isUInt<5>(std::abs(SlideAmt));
609 if (SlideAmt < 0)
610 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
611 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
612 };
613
614 std::array<std::pair<int, int>, 2> SrcInfo;
617
618 if (SrcInfo[1].second == 0)
619 std::swap(SrcInfo[0], SrcInfo[1]);
620
622 if (SrcInfo[0].second != 0) {
623 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
624 FirstSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind);
625 }
626
627 if (SrcInfo[1].first == -1)
628 return FirstSlideCost;
629
631 if (SrcInfo[1].second != 0) {
632 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
633 SecondSlideCost = getRISCVInstructionCost(Opcode, LT.second, CostKind);
634 } else {
635 SecondSlideCost =
636 getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind);
637 }
638
643 return FirstSlideCost + SecondSlideCost + MaskCost;
644}
645
654 "Expected the Mask to match the return size if given");
656 "Expected the same scalar types");
657
660
661
662
663
665 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
667 *this, LT.second, ST->getRealVLen(),
669 if (VRegSplittingCost.isValid())
670 return VRegSplittingCost;
671 switch (Kind) {
672 default:
673 break;
675 if (Mask.size() >= 2) {
676 MVT EltTp = LT.second.getVectorElementType();
677
678
679
681
682
683
684
685
687 return 2 * LT.first * TLI->getLMULCost(LT.second);
688
689 if (Mask[0] == 0 || Mask[0] == 1) {
690 auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
691
692
693 if (equal(DeinterleaveMask, Mask))
694 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
696 }
697 }
698 int SubVectorSize;
699 if (LT.second.getScalarSizeInBits() != 1 &&
702 unsigned NumSlides = Log2_32(Mask.size() / SubVectorSize);
703
704 for (unsigned I = 0; I != NumSlides; ++I) {
705 unsigned InsertIndex = SubVectorSize * (1 << I);
710 std::pair<InstructionCost, MVT> DestLT =
712
713
714
715 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
717 CostKind, InsertIndex, SubTp);
718 }
720 }
721 }
722
725 return SlideCost;
726
727
728
729 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
730 LT.second.getVectorNumElements() <= 256)) {
734 return IndexCost +
735 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
736 }
737 break;
738 }
741
744 return SlideCost;
745
746
747
748
749 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
750 LT.second.getVectorNumElements() <= 256)) {
751 auto &C = SrcTy->getContext();
752 auto EC = SrcTy->getElementCount();
757 return 2 * IndexCost +
758 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
760 MaskCost;
761 }
762 break;
763 }
764 }
765
767 switch (Kind) {
768 default:
769 return false;
773 return true;
774 }
775 };
776
777 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
782 return SplitCost;
783 }
784 }
785
786
787 switch (Kind) {
788 default:
789
790
791
792 break;
794
795 if (Index == 0)
797
798
799
800
801
802
803
805 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
806 if (std::optional VLen = ST->getRealVLen();
807 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
808 SubLT.second.getSizeInBits() <= *VLen)
810 }
811
812
813
814
815 return LT.first *
816 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);
818
819
820
822 return LT.first *
823 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);
825
826
827
828
829
830
831
832
833
834 return LT.first *
835 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
837 }
840 Instruction::InsertElement);
841 if (LT.second.getScalarSizeInBits() == 1) {
842 if (HasScalar) {
843
844
845
846
847
848 return LT.first *
849 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
851 }
852
853
854
855
856
857
858
859
860
861 return LT.first *
862 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
863 RISCV::VMV_X_S, RISCV::VMV_V_X,
864 RISCV::VMSNE_VI},
866 }
867
868 if (HasScalar) {
869
870
871 return LT.first *
872 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
873 }
874
875
876
877 return LT.first *
878 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);
879 }
881
882
883
884 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
885 if (Index >= 0 && Index < 32)
886 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
887 else if (Index < 0 && Index > -32)
888 Opcodes[1] = RISCV::VSLIDEUP_VI;
889 return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
890 }
892
893 if (!LT.second.isVector())
895
896
897
898
899 if (SrcTy->getElementType()->isIntegerTy(1)) {
906 nullptr) +
909 }
910
911 MVT ContainerVT = LT.second;
912 if (LT.second.isFixedLengthVector())
913 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
915 if (ContainerVT.bitsLE(M1VT)) {
916
917
918
919
920
921
922
923
925 if (LT.second.isFixedLengthVector())
926
927 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
928 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
929 if (LT.second.isFixedLengthVector() &&
930 isInt<5>(LT.second.getVectorNumElements() - 1))
931 Opcodes[1] = RISCV::VRSUB_VI;
933 getRISCVInstructionCost(Opcodes, LT.second, CostKind);
934 return LT.first * (LenCost + GatherCost);
935 }
936
937
938
939
940
941 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
943 getRISCVInstructionCost(M1Opcodes, M1VT, CostKind) + 3;
944 unsigned Ratio =
947 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT, CostKind) * Ratio;
948 InstructionCost SlideCost = !LT.second.isFixedLengthVector() ? 0 :
949 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second, CostKind);
950 return FixedCost + LT.first * (GatherCost + SlideCost);
951 }
952 }
954 SubTp);
955}
956
964
966 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
971
972
973
974
976 return 1;
977 }
978
979
980
981
982
984 Ty, DemandedElts, Insert, Extract, CostKind);
986 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
987 if (Ty->getScalarSizeInBits() == 1) {
988 auto *WideVecTy = cast(Ty->getWithNewBitWidth(8));
989
990
995 }
996
997 assert(LT.second.isFixedLengthVector());
998 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1002 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second, CostKind);
1003 if (BV < Cost)
1005 }
1006 }
1007 return Cost;
1008}
1009
1015 switch (MICA.getID()) {
1016 case Intrinsic::vp_load_ff: {
1017 EVT DataTypeVT = TLI->getValueType(DL, DataTy);
1018 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1020
1024 }
1025 case Intrinsic::experimental_vp_strided_load:
1026 case Intrinsic::experimental_vp_strided_store:
1028 case Intrinsic::masked_compressstore:
1029 case Intrinsic::masked_expandload:
1031 case Intrinsic::vp_scatter:
1032 case Intrinsic::vp_gather:
1033 case Intrinsic::masked_scatter:
1034 case Intrinsic::masked_gather:
1036 case Intrinsic::vp_load:
1037 case Intrinsic::vp_store:
1038 case Intrinsic::masked_load:
1039 case Intrinsic::masked_store:
1041 }
1043}
1044
1048 unsigned Opcode = MICA.getID() == Intrinsic::masked_load ? Instruction::Load
1049 : Instruction::Store;
1053
1057
1059}
1060
1064 bool UseMaskForCond, bool UseMaskForGaps) const {
1065
1066
1067
1068
1069
1070 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1073
1074 if (LT.second.isVector()) {
1075 auto *SubVecTy =
1077 VTy->getElementCount().divideCoefficientBy(Factor));
1078 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1079 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1081
1082
1083
1084 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1087 MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT();
1088 Cost += Factor * TLI->getLMULCost(SubVecVT);
1089 return LT.first * Cost;
1090 }
1091
1092
1093
1095 getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0,
1096 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1097 unsigned NumLoads = getEstimatedVLFor(VTy);
1098 return NumLoads * MemOpCost;
1099 }
1100 }
1101 }
1102
1103
1104
1107
1111 unsigned VF = FVTy->getNumElements() / Factor;
1112
1113
1114
1115
1116
1117
1118 if (Opcode == Instruction::Load) {
1120 for (unsigned Index : Indices) {
1124 Mask.resize(VF * Factor, -1);
1127 Mask, CostKind, 0, nullptr, {});
1128 Cost += ShuffleCost;
1129 }
1130 return Cost;
1131 }
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143 if (Factor != 2)
1146 UseMaskForCond, UseMaskForGaps);
1147
1148 assert(Opcode == Instruction::Store && "Opcode must be a store");
1149
1150
1155 return MemCost + ShuffleCost;
1156}
1157
1161
1162 bool IsLoad = MICA.getID() == Intrinsic::masked_gather ||
1163 MICA.getID() == Intrinsic::vp_gather;
1164 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1170
1171 if ((Opcode == Instruction::Load &&
1173 (Opcode == Instruction::Store &&
1176
1177
1178
1179
1183 {TTI::OK_AnyValue, TTI::OP_None}, I);
1184 unsigned NumLoads = getEstimatedVLFor(&VTy);
1185 return NumLoads * MemOpCost;
1186}
1187
1191 unsigned Opcode = MICA.getID() == Intrinsic::masked_expandload
1192 ? Instruction::Load
1193 : Instruction::Store;
1197 bool IsLegal = (Opcode == Instruction::Store &&
1199 (Opcode == Instruction::Load &&
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217 auto MemOpCost =
1221 if (VariableMask)
1222 Opcodes.push_back(RISCV::VCPOP_M);
1223 if (Opcode == Instruction::Store)
1224 Opcodes.append({RISCV::VCOMPRESS_VM});
1225 else
1226 Opcodes.append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1227 return MemOpCost +
1228 LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1229}
1230
1234
1235 unsigned Opcode = MICA.getID() == Intrinsic::experimental_vp_strided_load
1236 ? Instruction::Load
1237 : Instruction::Store;
1238
1242
1245
1248
1249
1250
1251
1255 {TTI::OK_AnyValue, TTI::OP_None}, I);
1256 unsigned NumLoads = getEstimatedVLFor(&VTy);
1257 return NumLoads * MemOpCost;
1258}
1259
1262
1263
1264
1267 for (auto *Ty : Tys) {
1268 if (!Ty->isVectorTy())
1269 continue;
1270 Align A = DL.getPrefTypeAlign(Ty);
1273 }
1274 return Cost;
1275}
1276
1277
1278
1279
1280
1282 {Intrinsic::floor, MVT::f32, 9},
1283 {Intrinsic::floor, MVT::f64, 9},
1284 {Intrinsic::ceil, MVT::f32, 9},
1285 {Intrinsic::ceil, MVT::f64, 9},
1286 {Intrinsic::trunc, MVT::f32, 7},
1287 {Intrinsic::trunc, MVT::f64, 7},
1288 {Intrinsic::round, MVT::f32, 9},
1289 {Intrinsic::round, MVT::f64, 9},
1290 {Intrinsic::roundeven, MVT::f32, 9},
1291 {Intrinsic::roundeven, MVT::f64, 9},
1292 {Intrinsic::rint, MVT::f32, 7},
1293 {Intrinsic::rint, MVT::f64, 7},
1294 {Intrinsic::nearbyint, MVT::f32, 9},
1295 {Intrinsic::nearbyint, MVT::f64, 9},
1296 {Intrinsic::bswap, MVT::i16, 3},
1297 {Intrinsic::bswap, MVT::i32, 12},
1298 {Intrinsic::bswap, MVT::i64, 31},
1299 {Intrinsic::vp_bswap, MVT::i16, 3},
1300 {Intrinsic::vp_bswap, MVT::i32, 12},
1301 {Intrinsic::vp_bswap, MVT::i64, 31},
1302 {Intrinsic::vp_fshl, MVT::i8, 7},
1303 {Intrinsic::vp_fshl, MVT::i16, 7},
1304 {Intrinsic::vp_fshl, MVT::i32, 7},
1305 {Intrinsic::vp_fshl, MVT::i64, 7},
1306 {Intrinsic::vp_fshr, MVT::i8, 7},
1307 {Intrinsic::vp_fshr, MVT::i16, 7},
1308 {Intrinsic::vp_fshr, MVT::i32, 7},
1309 {Intrinsic::vp_fshr, MVT::i64, 7},
1310 {Intrinsic::bitreverse, MVT::i8, 17},
1311 {Intrinsic::bitreverse, MVT::i16, 24},
1312 {Intrinsic::bitreverse, MVT::i32, 33},
1313 {Intrinsic::bitreverse, MVT::i64, 52},
1314 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1315 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1316 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1317 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1318 {Intrinsic::ctpop, MVT::i8, 12},
1319 {Intrinsic::ctpop, MVT::i16, 19},
1320 {Intrinsic::ctpop, MVT::i32, 20},
1321 {Intrinsic::ctpop, MVT::i64, 21},
1322 {Intrinsic::ctlz, MVT::i8, 19},
1323 {Intrinsic::ctlz, MVT::i16, 28},
1324 {Intrinsic::ctlz, MVT::i32, 31},
1325 {Intrinsic::ctlz, MVT::i64, 35},
1326 {Intrinsic::cttz, MVT::i8, 16},
1327 {Intrinsic::cttz, MVT::i16, 23},
1328 {Intrinsic::cttz, MVT::i32, 24},
1329 {Intrinsic::cttz, MVT::i64, 25},
1330 {Intrinsic::vp_ctpop, MVT::i8, 12},
1331 {Intrinsic::vp_ctpop, MVT::i16, 19},
1332 {Intrinsic::vp_ctpop, MVT::i32, 20},
1333 {Intrinsic::vp_ctpop, MVT::i64, 21},
1334 {Intrinsic::vp_ctlz, MVT::i8, 19},
1335 {Intrinsic::vp_ctlz, MVT::i16, 28},
1336 {Intrinsic::vp_ctlz, MVT::i32, 31},
1337 {Intrinsic::vp_ctlz, MVT::i64, 35},
1338 {Intrinsic::vp_cttz, MVT::i8, 16},
1339 {Intrinsic::vp_cttz, MVT::i16, 23},
1340 {Intrinsic::vp_cttz, MVT::i32, 24},
1341 {Intrinsic::vp_cttz, MVT::i64, 25},
1342};
1343
1348 switch (ICA.getID()) {
1349 case Intrinsic::lrint:
1350 case Intrinsic::llrint:
1351 case Intrinsic::lround:
1352 case Intrinsic::llround: {
1356 if (ST->hasVInstructions() && LT.second.isVector()) {
1358 unsigned SrcEltSz = DL.getTypeSizeInBits(SrcTy->getScalarType());
1359 unsigned DstEltSz = DL.getTypeSizeInBits(RetTy->getScalarType());
1360 if (LT.second.getVectorElementType() == MVT::bf16) {
1361 if (!ST->hasVInstructionsBF16Minimal())
1363 if (DstEltSz == 32)
1364 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1365 else
1366 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1367 } else if (LT.second.getVectorElementType() == MVT::f16 &&
1368 !ST->hasVInstructionsF16()) {
1369 if (!ST->hasVInstructionsF16Minimal())
1371 if (DstEltSz == 32)
1372 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1373 else
1374 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1375
1376 } else if (SrcEltSz > DstEltSz) {
1377 Ops = {RISCV::VFNCVT_X_F_W};
1378 } else if (SrcEltSz < DstEltSz) {
1379 Ops = {RISCV::VFWCVT_X_F_V};
1380 } else {
1381 Ops = {RISCV::VFCVT_X_F_V};
1382 }
1383
1384
1385
1386 if (SrcEltSz > DstEltSz)
1387 return SrcLT.first *
1388 getRISCVInstructionCost(Ops, SrcLT.second, CostKind);
1389 return LT.first * getRISCVInstructionCost(Ops, LT.second, CostKind);
1390 }
1391 break;
1392 }
1393 case Intrinsic::ceil:
1394 case Intrinsic:🤣
1395 case Intrinsic::trunc:
1396 case Intrinsic::rint:
1397 case Intrinsic::round:
1398 case Intrinsic::roundeven: {
1399
1401 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1402 return LT.first * 8;
1403 break;
1404 }
1405 case Intrinsic::umin:
1406 case Intrinsic::umax:
1407 case Intrinsic::smin:
1408 case Intrinsic::smax: {
1410 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1411 return LT.first;
1412
1413 if (ST->hasVInstructions() && LT.second.isVector()) {
1414 unsigned Op;
1415 switch (ICA.getID()) {
1416 case Intrinsic::umin:
1417 Op = RISCV::VMINU_VV;
1418 break;
1419 case Intrinsic::umax:
1420 Op = RISCV::VMAXU_VV;
1421 break;
1422 case Intrinsic::smin:
1423 Op = RISCV::VMIN_VV;
1424 break;
1425 case Intrinsic::smax:
1426 Op = RISCV::VMAX_VV;
1427 break;
1428 }
1429 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1430 }
1431 break;
1432 }
1433 case Intrinsic::sadd_sat:
1434 case Intrinsic::ssub_sat:
1435 case Intrinsic::uadd_sat:
1436 case Intrinsic::usub_sat: {
1438 if (ST->hasVInstructions() && LT.second.isVector()) {
1439 unsigned Op;
1440 switch (ICA.getID()) {
1441 case Intrinsic::sadd_sat:
1442 Op = RISCV::VSADD_VV;
1443 break;
1444 case Intrinsic::ssub_sat:
1445 Op = RISCV::VSSUBU_VV;
1446 break;
1447 case Intrinsic::uadd_sat:
1448 Op = RISCV::VSADDU_VV;
1449 break;
1450 case Intrinsic::usub_sat:
1451 Op = RISCV::VSSUBU_VV;
1452 break;
1453 }
1454 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1455 }
1456 break;
1457 }
1458 case Intrinsic::fma:
1459 case Intrinsic::fmuladd: {
1460
1462 if (ST->hasVInstructions() && LT.second.isVector())
1463 return LT.first *
1464 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second, CostKind);
1465 break;
1466 }
1467 case Intrinsic::fabs: {
1469 if (ST->hasVInstructions() && LT.second.isVector()) {
1470
1471
1472
1473
1474
1475 if (LT.second.getVectorElementType() == MVT::bf16 ||
1476 (LT.second.getVectorElementType() == MVT::f16 &&
1477 !ST->hasVInstructionsF16()))
1478 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1480 2;
1481 else
1482 return LT.first *
1483 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);
1484 }
1485 break;
1486 }
1487 case Intrinsic::sqrt: {
1489 if (ST->hasVInstructions() && LT.second.isVector()) {
1492 MVT ConvType = LT.second;
1493 MVT FsqrtType = LT.second;
1494
1495
1496 if (LT.second.getVectorElementType() == MVT::bf16) {
1497 if (LT.second == MVT::nxv32bf16) {
1498 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1499 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1500 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1501 ConvType = MVT::nxv16f16;
1502 FsqrtType = MVT::nxv16f32;
1503 } else {
1504 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1505 FsqrtOp = {RISCV::VFSQRT_V};
1506 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1507 }
1508 } else if (LT.second.getVectorElementType() == MVT::f16 &&
1509 !ST->hasVInstructionsF16()) {
1510 if (LT.second == MVT::nxv32f16) {
1511 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1512 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1513 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1514 ConvType = MVT::nxv16f16;
1515 FsqrtType = MVT::nxv16f32;
1516 } else {
1517 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1518 FsqrtOp = {RISCV::VFSQRT_V};
1519 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1520 }
1521 } else {
1522 FsqrtOp = {RISCV::VFSQRT_V};
1523 }
1524
1525 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType, CostKind) +
1526 getRISCVInstructionCost(ConvOp, ConvType, CostKind));
1527 }
1528 break;
1529 }
1530 case Intrinsic::cttz:
1531 case Intrinsic::ctlz:
1532 case Intrinsic::ctpop: {
1534 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1535 unsigned Op;
1536 switch (ICA.getID()) {
1537 case Intrinsic::cttz:
1538 Op = RISCV::VCTZ_V;
1539 break;
1540 case Intrinsic::ctlz:
1541 Op = RISCV::VCLZ_V;
1542 break;
1543 case Intrinsic::ctpop:
1544 Op = RISCV::VCPOP_V;
1545 break;
1546 }
1547 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1548 }
1549 break;
1550 }
1551 case Intrinsic::abs: {
1553 if (ST->hasVInstructions() && LT.second.isVector()) {
1554
1555
1556 return LT.first *
1557 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1559 }
1560 break;
1561 }
1562 case Intrinsic::fshl:
1563 case Intrinsic::fshr: {
1565 break;
1566
1567
1568
1569
1570 if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1572 (RetTy->getIntegerBitWidth() == 32 ||
1573 RetTy->getIntegerBitWidth() == 64) &&
1574 RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1575 return 1;
1576 }
1577 break;
1578 }
1579 case Intrinsic::get_active_lane_mask: {
1580 if (ST->hasVInstructions()) {
1584
1585
1586
1587
1588 return LT.first *
1589 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1591 }
1592 break;
1593 }
1594
1595 case Intrinsic::stepvector: {
1597
1598
1599 if (ST->hasVInstructions())
1600 return getRISCVInstructionCost(RISCV::VID_V, LT.second, CostKind) +
1601 (LT.first - 1) *
1602 getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind);
1603 return 1 + (LT.first - 1);
1604 }
1605 case Intrinsic::experimental_cttz_elts: {
1607 EVT ArgType = TLI->getValueType(DL, ArgTy, true);
1608 if (getTLI()->shouldExpandCttzElements(ArgType))
1609 break;
1612
1613
1614
1622
1623 return Cost;
1624 }
1625 case Intrinsic::experimental_vp_splice: {
1626
1627
1628
1632 }
1633 case Intrinsic::fptoui_sat:
1634 case Intrinsic::fptosi_sat: {
1636 bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
1638
1641 if (!SrcTy->isVectorTy())
1642 break;
1643
1644 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1646
1648 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
1650
1651
1652
1653
1659 return Cost;
1660 }
1661 }
1662
1663 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1665 LT.second.isVector()) {
1666 MVT EltTy = LT.second.getVectorElementType();
1668 ICA.getID(), EltTy))
1669 return LT.first * Entry->Cost;
1670 }
1671 }
1672
1674}
1675
1678 const SCEV *Ptr,
1680
1681
1682 if (ST->hasVInstructions() && PtrTy->isVectorTy())
1684
1686}
1687
1694 if (!IsVectorType)
1696
1697
1698
1699
1700 if (ST->enablePExtCodeGen() &&
1702 return 1;
1703 }
1704
1705
1706
1707
1708 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1709 Dst->getScalarSizeInBits() > ST->getELen())
1711
1712 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1713 assert(ISD && "Invalid opcode");
1716
1717
1718
1719
1720
1721
1722 switch (ISD) {
1723 default:
1724 break;
1727 if (Src->getScalarSizeInBits() == 1) {
1728
1729
1730
1731
1732 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second, CostKind) +
1733 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1735 DstLT.first - 1;
1736 }
1737 break;
1739 if (Dst->getScalarSizeInBits() == 1) {
1740
1741
1742
1743
1744
1745 return SrcLT.first *
1746 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1748 SrcLT.first - 1;
1749 }
1750 break;
1751 };
1752
1753
1754
1755
1756
1758 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1759 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1761 SrcLT.second.getSizeInBits()) ||
1763 DstLT.second.getSizeInBits()) ||
1764 SrcLT.first > 1 || DstLT.first > 1)
1766
1767
1768 assert((SrcLT.first == 1) && (DstLT.first == 1) && "Illegal type");
1769
1770 int PowDiff = (int)Log2_32(DstLT.second.getScalarSizeInBits()) -
1771 (int)Log2_32(SrcLT.second.getScalarSizeInBits());
1772 switch (ISD) {
1775 if ((PowDiff < 1) || (PowDiff > 3))
1777 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1778 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1779 unsigned Op =
1781 return getRISCVInstructionCost(Op, DstLT.second, CostKind);
1782 }
1784 case ISD::FP_EXTEND:
1786
1787 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1788 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1789
1791 : (ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1792 : RISCV::VFNCVT_F_F_W;
1794 for (; SrcEltSize != DstEltSize;) {
1798 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1799 DstEltSize =
1800 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1801 Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);
1802 }
1803 return Cost;
1804 }
1808 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1809 unsigned FWCVT =
1810 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1811 unsigned FNCVT =
1812 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1813 unsigned SrcEltSize = Src->getScalarSizeInBits();
1814 unsigned DstEltSize = Dst->getScalarSizeInBits();
1816 if ((SrcEltSize == 16) &&
1817 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1818
1819
1823 std::pair<InstructionCost, MVT> VecF32LT =
1826 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1829 return Cost;
1830 }
1831 if (DstEltSize == SrcEltSize)
1832 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1833 else if (DstEltSize > SrcEltSize)
1834 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1835 else {
1836
1837
1839 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1840 Cost += getRISCVInstructionCost(FNCVT, VecVT, CostKind);
1841 if ((SrcEltSize / 2) > DstEltSize) {
1845 }
1846 }
1847 return Cost;
1848 }
1852 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1853 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1854 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1855 unsigned SrcEltSize = Src->getScalarSizeInBits();
1856 unsigned DstEltSize = Dst->getScalarSizeInBits();
1857
1859 if ((DstEltSize == 16) &&
1860 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1861
1862
1866 std::pair<InstructionCost, MVT> VecF32LT =
1869 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1871 return Cost;
1872 }
1873
1874 if (DstEltSize == SrcEltSize)
1875 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1876 else if (DstEltSize > SrcEltSize) {
1877 if ((DstEltSize / 2) > SrcEltSize) {
1881 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1883 }
1884 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1885 } else
1886 Cost += getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);
1887 return Cost;
1888 }
1889 }
1891}
1892
1893unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) const {
1895 const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
1896 const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
1899 }
1901}
1902
1909
1910
1911 if (Ty->getScalarSizeInBits() > ST->getELen())
1913
1915 if (Ty->getElementType()->isIntegerTy(1)) {
1916
1917
1918
1919 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1921 else
1923 }
1924
1925 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1928 switch (IID) {
1929 case Intrinsic::maximum:
1931 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1932 } else {
1933 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1934 RISCV::VFMV_F_S};
1935
1936
1937
1941 ExtraCost = 1 +
1945 }
1946 break;
1947
1948 case Intrinsic::minimum:
1950 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1951 } else {
1952 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1953 RISCV::VFMV_F_S};
1954
1955
1956
1958 const unsigned EltTyBits = DL.getTypeSizeInBits(DstTy);
1960 ExtraCost = 1 +
1964 }
1965 break;
1966 }
1967 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1968 }
1969
1970
1971 unsigned SplitOp;
1973 switch (IID) {
1974 default:
1976 case Intrinsic::smax:
1977 SplitOp = RISCV::VMAX_VV;
1978 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1979 break;
1980 case Intrinsic::smin:
1981 SplitOp = RISCV::VMIN_VV;
1982 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1983 break;
1984 case Intrinsic::umax:
1985 SplitOp = RISCV::VMAXU_VV;
1986 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1987 break;
1988 case Intrinsic::umin:
1989 SplitOp = RISCV::VMINU_VV;
1990 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1991 break;
1992 case Intrinsic::maxnum:
1993 SplitOp = RISCV::VFMAX_VV;
1994 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1995 break;
1996 case Intrinsic::minnum:
1997 SplitOp = RISCV::VFMIN_VV;
1998 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1999 break;
2000 }
2001
2003 (LT.first > 1) ? (LT.first - 1) *
2004 getRISCVInstructionCost(SplitOp, LT.second, CostKind)
2005 : 0;
2006 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
2007}
2008
2011 std::optional FMF,
2015
2016
2017 if (Ty->getScalarSizeInBits() > ST->getELen())
2019
2020 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2021 assert(ISD && "Invalid opcode");
2022
2026
2028 Type *ElementTy = Ty->getElementType();
2030
2031
2032
2033 if (LT.second == MVT::v1i1)
2034 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
2037
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2053 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
2054 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
2055 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
2059
2060
2061
2062
2063
2064 return (LT.first - 1) *
2065 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind) +
2066 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) + 1;
2067 } else {
2069
2070
2071
2072
2073
2074 return (LT.first - 1) *
2075 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second, CostKind) +
2076 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
2079 }
2080 }
2081
2082
2083
2084
2085 unsigned SplitOp;
2087 switch (ISD) {
2089 SplitOp = RISCV::VADD_VV;
2090 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2091 break;
2093 SplitOp = RISCV::VOR_VV;
2094 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2095 break;
2097 SplitOp = RISCV::VXOR_VV;
2098 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2099 break;
2101 SplitOp = RISCV::VAND_VV;
2102 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2103 break;
2105
2106 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2107 LT.second.getScalarType() == MVT::bf16)
2110 Opcodes.push_back(RISCV::VFMV_S_F);
2111 for (unsigned i = 0; i < LT.first.getValue(); i++)
2112 Opcodes.push_back(RISCV::VFREDOSUM_VS);
2113 Opcodes.push_back(RISCV::VFMV_F_S);
2114 return getRISCVInstructionCost(Opcodes, LT.second, CostKind);
2115 }
2116 SplitOp = RISCV::VFADD_VV;
2117 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2118 break;
2119 }
2120
2122 (LT.first > 1) ? (LT.first - 1) *
2123 getRISCVInstructionCost(SplitOp, LT.second, CostKind)
2124 : 0;
2125 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
2126}
2127
2129 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
2134
2135
2139
2140 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2143
2145
2146 if (IsUnsigned && Opcode == Instruction::Add &&
2147 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2148
2149
2150 return LT.first *
2151 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind);
2152 }
2153
2154 if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
2157
2158 return (LT.first - 1) +
2160}
2161
2165 assert(OpInfo.isConstant() && "non constant operand?");
2167
2168
2169
2170 return 0;
2171
2172 if (OpInfo.isUniform())
2173
2174
2175
2176 return 1;
2177
2178 return getConstantPoolLoadCost(Ty, CostKind);
2179}
2180
2182 Align Alignment,
2187 EVT VT = TLI->getValueType(DL, Src, true);
2188
2189 if (VT == MVT::Other)
2192
2194 if (Opcode == Instruction::Store && OpInfo.isConstant())
2196
2198
2202 return Cost;
2203
2204
2205
2206
2207
2209 if (Src->isVectorTy() && LT.second.isVector() &&
2211 LT.second.getSizeInBits()))
2212 return Cost;
2213
2216 }();
2217
2218
2219
2220
2221 if (ST->hasVInstructions() && LT.second.isVector() &&
2223 BaseCost *= TLI->getLMULCost(LT.second);
2224 return Cost + BaseCost;
2225}
2226
2233 Op1Info, Op2Info, I);
2234
2237 Op1Info, Op2Info, I);
2238
2239
2240 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2242 Op1Info, Op2Info, I);
2243
2244 auto GetConstantMatCost =
2246 if (OpInfo.isUniform())
2247
2248
2249 return 0;
2250
2251 return getConstantPoolLoadCost(ValTy, CostKind);
2252 };
2253
2256 ConstantMatCost += GetConstantMatCost(Op1Info);
2258 ConstantMatCost += GetConstantMatCost(Op2Info);
2259
2261 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2263 if (ValTy->getScalarSizeInBits() == 1) {
2264
2265
2266
2267 return ConstantMatCost +
2268 LT.first *
2269 getRISCVInstructionCost(
2270 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2272 }
2273
2274 return ConstantMatCost +
2275 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2277 }
2278
2279 if (ValTy->getScalarSizeInBits() == 1) {
2280
2281
2282
2283
2284
2285 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2286 return ConstantMatCost +
2287 LT.first *
2288 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2290 LT.first * getRISCVInstructionCost(
2291 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2293 }
2294
2295
2296
2297
2298 return ConstantMatCost +
2299 LT.first * getRISCVInstructionCost(
2300 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2302 }
2303
2304 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2306
2307
2308 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2309 LT.second,
2311 }
2312
2313 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2315
2316
2318 return ConstantMatCost +
2319 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind);
2320
2321
2322
2323
2324
2325 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2326 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2327 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2329 Op1Info, Op2Info, I);
2330
2331
2332
2333 switch (VecPred) {
2338 return ConstantMatCost +
2339 LT.first * getRISCVInstructionCost(
2340 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2342
2347 return ConstantMatCost +
2348 LT.first *
2349 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2351
2358 return ConstantMatCost +
2359 LT.first *
2360 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind);
2361 default:
2362 break;
2363 }
2364 }
2365
2366
2367
2368
2369
2370 if (ST->hasConditionalMoveFusion() && I && isa(I) &&
2371 ValTy->isIntegerTy() && ->user_empty()) {
2372 if (all_of(I->users(), [&](const User *U) {
2373 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2374 U->getType()->isIntegerTy() &&
2375 !isa(U->getOperand(1)) &&
2376 !isa(U->getOperand(2));
2377 }))
2378 return 0;
2379 }
2380
2381
2382
2384 Op1Info, Op2Info, I);
2385}
2386
2391 return Opcode == Instruction::PHI ? 0 : 1;
2392
2393 return 0;
2394}
2395
2398 unsigned Index,
2399 const Value *Op0,
2400 const Value *Op1) const {
2402
2403
2404
2405
2407 return 1;
2408 }
2409
2410 if (Opcode != Instruction::ExtractElement &&
2411 Opcode != Instruction::InsertElement)
2413
2414
2416
2417
2418 if (!LT.second.isVector()) {
2420
2421 if (Index != -1U)
2422 return 0;
2423
2424
2425
2426
2427 Type *ElemTy = FixedVecTy->getElementType();
2428 auto NumElems = FixedVecTy->getNumElements();
2429 auto Align = DL.getPrefTypeAlign(ElemTy);
2434 return Opcode == Instruction::ExtractElement
2435 ? StoreCost * NumElems + LoadCost
2436 : (StoreCost + LoadCost) * NumElems + StoreCost;
2437 }
2438
2439
2440 if (LT.second.isScalableVector() && !LT.first.isValid())
2441 return LT.first;
2442
2443
2448 if (Opcode == Instruction::ExtractElement) {
2454 return ExtendCost + ExtractCost;
2455 }
2464 return ExtendCost + InsertCost + TruncCost;
2465 }
2466
2467
2468
2469
2470 unsigned BaseCost = 1;
2471
2472 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2473
2474 if (Index != -1U) {
2475
2476
2477 if (LT.second.isFixedLengthVector()) {
2478 unsigned Width = LT.second.getVectorNumElements();
2479 Index = Index % Width;
2480 }
2481
2482
2483
2484 if (auto VLEN = ST->getRealVLen()) {
2485 unsigned EltSize = LT.second.getScalarSizeInBits();
2486 unsigned M1Max = *VLEN / EltSize;
2487 Index = Index % M1Max;
2488 }
2489
2490 if (Index == 0)
2491
2492 SlideCost = 0;
2493 else if (ST->hasVendorXRivosVisni() && isUInt<5>(Index) &&
2495 SlideCost = 0;
2496 else if (Opcode == Instruction::InsertElement)
2497 SlideCost = 1;
2498 }
2499
2500
2501
2502
2503 if (LT.first > 1 &&
2504 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2505 LT.second.isScalableVector()))) {
2507 Align VecAlign = DL.getPrefTypeAlign(Val);
2508 Align SclAlign = DL.getPrefTypeAlign(ScalarType);
2509
2511
2512
2513 if (Opcode == Instruction::ExtractElement)
2515 getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
2517 IdxCost;
2518
2519
2520
2523 getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
2525 IdxCost;
2526 }
2527
2528
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2550 }
2551 return BaseCost + SlideCost;
2552}
2553
2557 unsigned Index) const {
2560 Index);
2561
2562
2563
2564
2566 assert(Index < EC.getKnownMinValue() && "Unexpected reverse index");
2568 EC.getKnownMinValue() - 1 - Index, nullptr,
2569 nullptr);
2570}
2571
2576
2577
2580 Args, CxtI);
2581
2584 Args, CxtI);
2585
2586
2587 if (isa(Ty) && Ty->getScalarSizeInBits() > ST->getELen())
2589 Args, CxtI);
2590
2591
2593
2594
2595 if (!LT.second.isVector())
2597 Args, CxtI);
2598
2599
2600
2601 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2603 if ((LT.second.getVectorElementType() == MVT::f16 ||
2604 LT.second.getVectorElementType() == MVT::bf16) &&
2605 TLI->getOperationAction(ISDOpcode, LT.second) ==
2607 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2610
2611 CastCost += LT.first * Args.size() *
2614
2615 CastCost +=
2616 LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy,
2618
2619 LT.second = PromotedVT;
2620 }
2621
2622 auto getConstantMatCost =
2624 if (OpInfo.isUniform() && canSplatOperand(Opcode, Operand))
2625
2626
2627
2628
2629
2630 return 0;
2631
2632 return getConstantPoolLoadCost(Ty, CostKind);
2633 };
2634
2635
2638 ConstantMatCost += getConstantMatCost(0, Op1Info);
2640 ConstantMatCost += getConstantMatCost(1, Op2Info);
2641
2642 unsigned Op;
2643 switch (ISDOpcode) {
2646 Op = RISCV::VADD_VV;
2647 break;
2651 Op = RISCV::VSLL_VV;
2652 break;
2656 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2657 break;
2661 Op = RISCV::VMUL_VV;
2662 break;
2665 Op = RISCV::VDIV_VV;
2666 break;
2669 Op = RISCV::VREM_VV;
2670 break;
2673 Op = RISCV::VFADD_VV;
2674 break;
2676 Op = RISCV::VFMUL_VV;
2677 break;
2679 Op = RISCV::VFDIV_VV;
2680 break;
2681 case ISD::FNEG:
2682 Op = RISCV::VFSGNJN_VV;
2683 break;
2684 default:
2685
2686
2687 return CastCost + ConstantMatCost +
2689 Args, CxtI);
2690 }
2691
2693
2694
2695
2696 if (Ty->isFPOrFPVectorTy())
2698 return CastCost + ConstantMatCost + LT.first * InstrCost;
2699}
2700
2701
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717 for (auto [I, V] : enumerate(Ptrs)) {
2719 if ()
2720 continue;
2721 if (Info.isSameBase() && V != Base) {
2722 if (GEP->hasAllConstantIndices())
2723 continue;
2724
2725
2726
2727
2728 unsigned Stride = DL.getTypeStoreSize(AccessTy);
2729 if (Info.isUnitStride() &&
2731 nullptr,
2732 Stride * I,
2733 true,
2734 0,
2735 GEP->getType()->getPointerAddressSpace()))
2736 continue;
2738 {TTI::OK_AnyValue, TTI::OP_None},
2739 {TTI::OK_AnyValue, TTI::OP_None}, {});
2740 } else {
2743 Indices, AccessTy, CostKind);
2744 }
2745 }
2746 return Cost;
2747}
2748
2752
2753
2754
2755
2756 if (ST->enableDefaultUnroll())
2758
2759
2760
2762
2763
2766 if (L->getHeader()->getParent()->hasOptSize())
2767 return;
2768
2770 L->getExitingBlocks(ExitingBlocks);
2772 << "Blocks: " << L->getNumBlocks() << "\n"
2773 << "Exit blocks: " << ExitingBlocks.size() << "\n");
2774
2775
2776
2777 if (ExitingBlocks.size() > 2)
2778 return;
2779
2780
2781
2782 if (L->getNumBlocks() > 4)
2783 return;
2784
2785
2786
2787
2790 for (auto *BB : L->getBlocks()) {
2791 for (auto &I : *BB) {
2792
2793
2794
2795 if (IsVectorized && (I.getType()->isVectorTy() ||
2797 return V->getType()->isVectorTy();
2798 })))
2799 return;
2800
2804 continue;
2805 }
2806 return;
2807 }
2808
2812 }
2813 }
2814
2816
2821
2822
2823
2824 if (Cost < 12)
2825 UP.Force = true;
2826}
2827
2832
2838 bool HasMask = false;
2839
2840 auto getSegNum = [](const IntrinsicInst *II, unsigned PtrOperandNo,
2841 bool IsWrite) -> int64_t {
2842 if (auto *TarExtTy =
2844 return TarExtTy->getIntParameter(0);
2845
2846 return 1;
2847 };
2848
2849 switch (IID) {
2850 case Intrinsic::riscv_vle_mask:
2851 case Intrinsic::riscv_vse_mask:
2852 case Intrinsic::riscv_vlseg2_mask:
2853 case Intrinsic::riscv_vlseg3_mask:
2854 case Intrinsic::riscv_vlseg4_mask:
2855 case Intrinsic::riscv_vlseg5_mask:
2856 case Intrinsic::riscv_vlseg6_mask:
2857 case Intrinsic::riscv_vlseg7_mask:
2858 case Intrinsic::riscv_vlseg8_mask:
2859 case Intrinsic::riscv_vsseg2_mask:
2860 case Intrinsic::riscv_vsseg3_mask:
2861 case Intrinsic::riscv_vsseg4_mask:
2862 case Intrinsic::riscv_vsseg5_mask:
2863 case Intrinsic::riscv_vsseg6_mask:
2864 case Intrinsic::riscv_vsseg7_mask:
2865 case Intrinsic::riscv_vsseg8_mask:
2866 HasMask = true;
2867 [[fallthrough]];
2868 case Intrinsic::riscv_vle:
2869 case Intrinsic::riscv_vse:
2870 case Intrinsic::riscv_vlseg2:
2871 case Intrinsic::riscv_vlseg3:
2872 case Intrinsic::riscv_vlseg4:
2873 case Intrinsic::riscv_vlseg5:
2874 case Intrinsic::riscv_vlseg6:
2875 case Intrinsic::riscv_vlseg7:
2876 case Intrinsic::riscv_vlseg8:
2877 case Intrinsic::riscv_vsseg2:
2878 case Intrinsic::riscv_vsseg3:
2879 case Intrinsic::riscv_vsseg4:
2880 case Intrinsic::riscv_vsseg5:
2881 case Intrinsic::riscv_vsseg6:
2882 case Intrinsic::riscv_vsseg7:
2883 case Intrinsic::riscv_vsseg8: {
2884
2885
2886
2887
2888
2889
2890
2891
2892
2895
2897 unsigned SEW =
2899 ->getZExtValue();
2900 Ty = TarExtTy->getTypeParameter(0U);
2904 }
2905 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2906 unsigned VLIndex = RVVIInfo->VLOperand;
2907 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2912 if (HasMask)
2915 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2916
2917 if (SegNum > 1) {
2918 unsigned ElemSize = Ty->getScalarSizeInBits();
2921 }
2922 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2923 Alignment, Mask, EVL);
2924 return true;
2925 }
2926 case Intrinsic::riscv_vlse_mask:
2927 case Intrinsic::riscv_vsse_mask:
2928 case Intrinsic::riscv_vlsseg2_mask:
2929 case Intrinsic::riscv_vlsseg3_mask:
2930 case Intrinsic::riscv_vlsseg4_mask:
2931 case Intrinsic::riscv_vlsseg5_mask:
2932 case Intrinsic::riscv_vlsseg6_mask:
2933 case Intrinsic::riscv_vlsseg7_mask:
2934 case Intrinsic::riscv_vlsseg8_mask:
2935 case Intrinsic::riscv_vssseg2_mask:
2936 case Intrinsic::riscv_vssseg3_mask:
2937 case Intrinsic::riscv_vssseg4_mask:
2938 case Intrinsic::riscv_vssseg5_mask:
2939 case Intrinsic::riscv_vssseg6_mask:
2940 case Intrinsic::riscv_vssseg7_mask:
2941 case Intrinsic::riscv_vssseg8_mask:
2942 HasMask = true;
2943 [[fallthrough]];
2944 case Intrinsic::riscv_vlse:
2945 case Intrinsic::riscv_vsse:
2946 case Intrinsic::riscv_vlsseg2:
2947 case Intrinsic::riscv_vlsseg3:
2948 case Intrinsic::riscv_vlsseg4:
2949 case Intrinsic::riscv_vlsseg5:
2950 case Intrinsic::riscv_vlsseg6:
2951 case Intrinsic::riscv_vlsseg7:
2952 case Intrinsic::riscv_vlsseg8:
2953 case Intrinsic::riscv_vssseg2:
2954 case Intrinsic::riscv_vssseg3:
2955 case Intrinsic::riscv_vssseg4:
2956 case Intrinsic::riscv_vssseg5:
2957 case Intrinsic::riscv_vssseg6:
2958 case Intrinsic::riscv_vssseg7:
2959 case Intrinsic::riscv_vssseg8: {
2960
2961
2962
2963
2964
2965
2966
2967
2968
2971
2973 unsigned SEW =
2975 ->getZExtValue();
2976 Ty = TarExtTy->getTypeParameter(0U);
2980 }
2981 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2982 unsigned VLIndex = RVVIInfo->VLOperand;
2983 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
2986
2988
2989
2990
2991
2995 Alignment = Align(1);
2996
2999 if (HasMask)
3002 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3003
3004 if (SegNum > 1) {
3005 unsigned ElemSize = Ty->getScalarSizeInBits();
3008 }
3009 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3010 Alignment, Mask, EVL, Stride);
3011 return true;
3012 }
3013 case Intrinsic::riscv_vloxei_mask:
3014 case Intrinsic::riscv_vluxei_mask:
3015 case Intrinsic::riscv_vsoxei_mask:
3016 case Intrinsic::riscv_vsuxei_mask:
3017 case Intrinsic::riscv_vloxseg2_mask:
3018 case Intrinsic::riscv_vloxseg3_mask:
3019 case Intrinsic::riscv_vloxseg4_mask:
3020 case Intrinsic::riscv_vloxseg5_mask:
3021 case Intrinsic::riscv_vloxseg6_mask:
3022 case Intrinsic::riscv_vloxseg7_mask:
3023 case Intrinsic::riscv_vloxseg8_mask:
3024 case Intrinsic::riscv_vluxseg2_mask:
3025 case Intrinsic::riscv_vluxseg3_mask:
3026 case Intrinsic::riscv_vluxseg4_mask:
3027 case Intrinsic::riscv_vluxseg5_mask:
3028 case Intrinsic::riscv_vluxseg6_mask:
3029 case Intrinsic::riscv_vluxseg7_mask:
3030 case Intrinsic::riscv_vluxseg8_mask:
3031 case Intrinsic::riscv_vsoxseg2_mask:
3032 case Intrinsic::riscv_vsoxseg3_mask:
3033 case Intrinsic::riscv_vsoxseg4_mask:
3034 case Intrinsic::riscv_vsoxseg5_mask:
3035 case Intrinsic::riscv_vsoxseg6_mask:
3036 case Intrinsic::riscv_vsoxseg7_mask:
3037 case Intrinsic::riscv_vsoxseg8_mask:
3038 case Intrinsic::riscv_vsuxseg2_mask:
3039 case Intrinsic::riscv_vsuxseg3_mask:
3040 case Intrinsic::riscv_vsuxseg4_mask:
3041 case Intrinsic::riscv_vsuxseg5_mask:
3042 case Intrinsic::riscv_vsuxseg6_mask:
3043 case Intrinsic::riscv_vsuxseg7_mask:
3044 case Intrinsic::riscv_vsuxseg8_mask:
3045 HasMask = true;
3046 [[fallthrough]];
3047 case Intrinsic::riscv_vloxei:
3048 case Intrinsic::riscv_vluxei:
3049 case Intrinsic::riscv_vsoxei:
3050 case Intrinsic::riscv_vsuxei:
3051 case Intrinsic::riscv_vloxseg2:
3052 case Intrinsic::riscv_vloxseg3:
3053 case Intrinsic::riscv_vloxseg4:
3054 case Intrinsic::riscv_vloxseg5:
3055 case Intrinsic::riscv_vloxseg6:
3056 case Intrinsic::riscv_vloxseg7:
3057 case Intrinsic::riscv_vloxseg8:
3058 case Intrinsic::riscv_vluxseg2:
3059 case Intrinsic::riscv_vluxseg3:
3060 case Intrinsic::riscv_vluxseg4:
3061 case Intrinsic::riscv_vluxseg5:
3062 case Intrinsic::riscv_vluxseg6:
3063 case Intrinsic::riscv_vluxseg7:
3064 case Intrinsic::riscv_vluxseg8:
3065 case Intrinsic::riscv_vsoxseg2:
3066 case Intrinsic::riscv_vsoxseg3:
3067 case Intrinsic::riscv_vsoxseg4:
3068 case Intrinsic::riscv_vsoxseg5:
3069 case Intrinsic::riscv_vsoxseg6:
3070 case Intrinsic::riscv_vsoxseg7:
3071 case Intrinsic::riscv_vsoxseg8:
3072 case Intrinsic::riscv_vsuxseg2:
3073 case Intrinsic::riscv_vsuxseg3:
3074 case Intrinsic::riscv_vsuxseg4:
3075 case Intrinsic::riscv_vsuxseg5:
3076 case Intrinsic::riscv_vsuxseg6:
3077 case Intrinsic::riscv_vsuxseg7:
3078 case Intrinsic::riscv_vsuxseg8: {
3079
3080
3081
3082
3083
3084
3085
3086
3087
3090
3092 unsigned SEW =
3094 ->getZExtValue();
3095 Ty = TarExtTy->getTypeParameter(0U);
3099 }
3100 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3101 unsigned VLIndex = RVVIInfo->VLOperand;
3102 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3104 if (HasMask) {
3106 } else {
3107
3108
3109
3110
3113 }
3115 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3116
3117 if (SegNum > 1) {
3118 unsigned ElemSize = Ty->getScalarSizeInBits();
3121 }
3123 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3124 Align(1), Mask, EVL,
3125 nullptr, OffsetOp);
3126 return true;
3127 }
3128 }
3129 return false;
3130}
3131
3133 if (Ty->isVectorTy()) {
3134
3136 if ((EltTy->isHalfTy() && !ST->hasVInstructionsF16()) ||
3140
3142 if (Size.isScalable() && ST->hasVInstructions())
3144
3145 if (ST->useRVVForFixedLengthVectors())
3147 }
3148
3150}
3151
3153 if (SLPMaxVF.getNumOccurrences())
3155
3156
3157
3158
3159
3160
3163
3164
3165 return std::max(1U, RegWidth.getFixedValue() / ElemWidth);
3166}
3167
3171
3173 return ST->enableUnalignedVectorMem();
3174}
3175
3179 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3181
3183}
3184
3199
3201 Align Alignment) const {
3203 if (!VTy || VTy->isScalableTy())
3204 return false;
3205
3207 return false;
3208
3209
3210
3211 if (VTy->getElementType()->isIntegerTy(8))
3212 if (VTy->getElementCount().getFixedValue() > 256)
3213 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3214 ST->getMaxLMULForFixedLengthVectors();
3215 return true;
3216}
3217
3219 Align Alignment) const {
3221 if (!VTy || VTy->isScalableTy())
3222 return false;
3223
3225 return false;
3226 return true;
3227}
3228
3229
3230
3231
3232
3233
3235 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
3236 bool Considerable = false;
3237 AllowPromotionWithoutCommonHeader = false;
3239 return false;
3240 Type *ConsideredSExtType =
3242 if (I.getType() != ConsideredSExtType)
3243 return false;
3244
3245
3246 for (const User *U : I.users()) {
3248 Considerable = true;
3249
3250
3251
3252 if (GEPInst->getNumOperands() > 2) {
3253 AllowPromotionWithoutCommonHeader = true;
3254 break;
3255 }
3256 }
3257 }
3258 return Considerable;
3259}
3260
3262 switch (Opcode) {
3263 case Instruction::Add:
3264 case Instruction::Sub:
3265 case Instruction::Mul:
3266 case Instruction::And:
3267 case Instruction::Or:
3268 case Instruction::Xor:
3269 case Instruction::FAdd:
3270 case Instruction::FSub:
3271 case Instruction::FMul:
3272 case Instruction::FDiv:
3273 case Instruction::ICmp:
3274 case Instruction::FCmp:
3275 return true;
3276 case Instruction::Shl:
3277 case Instruction::LShr:
3278 case Instruction::AShr:
3279 case Instruction::UDiv:
3280 case Instruction::SDiv:
3281 case Instruction::URem:
3282 case Instruction::SRem:
3283 case Instruction::Select:
3284 return Operand == 1;
3285 default:
3286 return false;
3287 }
3288}
3289
3291 if (->getType()->isVectorTy() || !ST->hasVInstructions())
3292 return false;
3293
3295 return true;
3296
3298 if ()
3299 return false;
3300
3301 switch (II->getIntrinsicID()) {
3302 case Intrinsic::fma:
3303 case Intrinsic::vp_fma:
3304 case Intrinsic::fmuladd:
3305 case Intrinsic::vp_fmuladd:
3306 return Operand == 0 || Operand == 1;
3307 case Intrinsic::vp_shl:
3308 case Intrinsic::vp_lshr:
3309 case Intrinsic::vp_ashr:
3310 case Intrinsic::vp_udiv:
3311 case Intrinsic::vp_sdiv:
3312 case Intrinsic::vp_urem:
3313 case Intrinsic::vp_srem:
3314 case Intrinsic::ssub_sat:
3315 case Intrinsic::vp_ssub_sat:
3316 case Intrinsic::usub_sat:
3317 case Intrinsic::vp_usub_sat:
3318 case Intrinsic::vp_select:
3319 return Operand == 1;
3320
3321 case Intrinsic::vp_add:
3322 case Intrinsic::vp_mul:
3323 case Intrinsic::vp_and:
3324 case Intrinsic::vp_or:
3325 case Intrinsic::vp_xor:
3326 case Intrinsic::vp_fadd:
3327 case Intrinsic::vp_fmul:
3328 case Intrinsic::vp_icmp:
3329 case Intrinsic::vp_fcmp:
3330 case Intrinsic::smin:
3331 case Intrinsic::vp_smin:
3332 case Intrinsic::umin:
3333 case Intrinsic::vp_umin:
3334 case Intrinsic::smax:
3335 case Intrinsic::vp_smax:
3336 case Intrinsic::umax:
3337 case Intrinsic::vp_umax:
3338 case Intrinsic::sadd_sat:
3339 case Intrinsic::vp_sadd_sat:
3340 case Intrinsic::uadd_sat:
3341 case Intrinsic::vp_uadd_sat:
3342
3343 case Intrinsic::vp_sub:
3344 case Intrinsic::vp_fsub:
3345 case Intrinsic::vp_fdiv:
3346 return Operand == 0 || Operand == 1;
3347 default:
3348 return false;
3349 }
3350}
3351
3352
3353
3354
3358
3359 if (I->isBitwiseLogicOp()) {
3360 if (->getType()->isVectorTy()) {
3361 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3362 for (auto &Op : I->operands()) {
3363
3366 return true;
3367 }
3368 }
3369 }
3370 } else if (I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3371 for (auto &Op : I->operands()) {
3372
3375 return true;
3376 }
3377
3383 Ops.push_back(&Not);
3384 Ops.push_back(&InsertElt);
3386 return true;
3387 }
3388 }
3389 }
3390 }
3391
3392 if (->getType()->isVectorTy() || !ST->hasVInstructions())
3393 return false;
3394
3395
3396
3397
3398
3399
3400 if (!ST->sinkSplatOperands())
3401 return false;
3402
3405 continue;
3406
3408
3409 if ( || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
3410 continue;
3411
3412
3415 continue;
3416
3417
3418 if (cast(Op->getType())->getElementType()->isIntegerTy(1))
3419 continue;
3420
3421
3422
3423 for (Use &U : Op->uses()) {
3426 return false;
3427 }
3428
3429
3430 Use *InsertEltUse = &Op->getOperandUse(0);
3433 Ops.push_back(&InsertElt->getOperandUse(1));
3434 Ops.push_back(InsertEltUse);
3435 Ops.push_back(&OpIdx.value());
3436 }
3437 return true;
3438}
3439
3443
3444
3445 if (!ST->enableUnalignedScalarMem())
3447
3448 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3450
3451 Options.AllowOverlappingLoads = true;
3452 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3454 if (ST->is64Bit()) {
3455 Options.LoadSizes = {8, 4, 2, 1};
3456 Options.AllowedTailExpansions = {3, 5, 6};
3457 } else {
3458 Options.LoadSizes = {4, 2, 1};
3459 Options.AllowedTailExpansions = {3};
3460 }
3461
3462 if (IsZeroCmp && ST->hasVInstructions()) {
3463 unsigned VLenB = ST->getRealMinVLen() / 8;
3464
3465
3466 unsigned MinSize = ST->getXLen() / 8 + 1;
3467 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
3468 for (unsigned Size = MinSize; Size <= MaxSize; Size++)
3470 }
3472}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static InstructionCost costShuffleViaVRegSplitting(const RISCVTTIImpl &TTI, MVT LegalVT, std::optional< unsigned > VLen, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind)
Try to perform better estimation of the permutation.
Definition RISCVTargetTransformInfo.cpp:521
static InstructionCost costShuffleViaSplitting(const RISCVTTIImpl &TTI, MVT LegalVT, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind)
Attempt to approximate the cost of a shuffle which will require splitting during legalization.
Definition RISCVTargetTransformInfo.cpp:451
static bool isRepeatedConcatMask(ArrayRef< int > Mask, int &SubVectorSize)
Definition RISCVTargetTransformInfo.cpp:415
static unsigned isM1OrSmaller(MVT VT)
Definition RISCVTargetTransformInfo.cpp:957
static cl::opt< unsigned > SLPMaxVF("riscv-v-slp-max-vf", cl::desc("Overrides result used for getMaximumVF query which is used " "exclusively by SLP vectorizer."), cl::Hidden)
static cl::opt< unsigned > RVVRegisterWidthLMUL("riscv-v-register-bit-width-lmul", cl::desc("The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), cl::init(2), cl::Hidden)
static cl::opt< unsigned > RVVMinTripCount("riscv-v-min-trip-count", cl::desc("Set the lower bound of a trip count to decide on " "vectorization while tail-folding."), cl::init(5), cl::Hidden)
static InstructionCost getIntImmCostImpl(const DataLayout &DL, const RISCVSubtarget *ST, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, bool FreeZeroes)
Definition RISCVTargetTransformInfo.cpp:120
static VectorType * getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, LLVMContext &C)
Definition RISCVTargetTransformInfo.cpp:437
static const CostTblEntry VectorIntrinsicCostTable[]
Definition RISCVTargetTransformInfo.cpp:1281
static bool canUseShiftPair(Instruction *Inst, const APInt &Imm)
Definition RISCVTargetTransformInfo.cpp:146
static bool canUseShiftCmp(Instruction *Inst, const APInt &Imm)
Definition RISCVTargetTransformInfo.cpp:174
This file defines a TargetTransformInfoImplBase conforming object specific to the RISC-V target machi...
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:2128
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:2387
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:2572
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
Definition RISCVTargetTransformInfo.cpp:3200
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition RISCVTargetTransformInfo.cpp:1232
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:318
unsigned getMinTripCountTailFoldingThreshold() const override
Definition RISCVTargetTransformInfo.cpp:3168
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
Definition RISCVTargetTransformInfo.cpp:3177
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:1677
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
Definition RISCVTargetTransformInfo.cpp:2163
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
Definition RISCVTargetTransformInfo.cpp:2833
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
Definition RISCVTargetTransformInfo.cpp:1261
bool hasActiveVectorLength() const override
Definition RISCVTargetTransformInfo.cpp:325
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:1688
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:2227
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
Definition RISCVTargetTransformInfo.cpp:2555
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition RISCVTargetTransformInfo.cpp:2749
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:206
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
Definition RISCVTargetTransformInfo.cpp:1904
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
Definition RISCVTargetTransformInfo.cpp:3290
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
Definition RISCVTargetTransformInfo.cpp:3185
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
Definition RISCVTargetTransformInfo.cpp:3132
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition RISCVTargetTransformInfo.cpp:1061
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
Definition RISCVTargetTransformInfo.cpp:965
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
Definition RISCVTargetTransformInfo.cpp:3218
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition RISCVTargetTransformInfo.cpp:1159
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition RISCVTargetTransformInfo.cpp:1188
bool preferAlternateOpcodeVectorization() const override
Definition RISCVTargetTransformInfo.cpp:3172
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
Definition RISCVTargetTransformInfo.cpp:3355
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition RISCVTargetTransformInfo.cpp:2396
std::optional< unsigned > getMaxVScale() const override
Definition RISCVTargetTransformInfo.cpp:370
bool shouldExpandReduction(const IntrinsicInst *II) const override
Definition RISCVTargetTransformInfo.cpp:356
std::optional< unsigned > getVScaleForTuning() const override
Definition RISCVTargetTransformInfo.cpp:376
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
Definition RISCVTargetTransformInfo.cpp:1011
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:647
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
Definition RISCVTargetTransformInfo.cpp:3152
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:2702
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
Definition RISCVTargetTransformInfo.cpp:3441
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:335
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition RISCVTargetTransformInfo.cpp:2181
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
Definition RISCVTargetTransformInfo.cpp:1345
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition RISCVTargetTransformInfo.cpp:1046
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:2010
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition RISCVTargetTransformInfo.cpp:385
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition RISCVTargetTransformInfo.cpp:2828
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
Definition RISCVTargetTransformInfo.cpp:3234
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition RISCVTargetTransformInfo.cpp:138
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition RISCVTargetTransformInfo.cpp:330
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
virtual const DataLayout & getDataLayout() const
virtual TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
virtual bool isLoweredToCall(const Function *F) const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_PostIndexed
Prefer post-indexed addressing mode.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ None
The cast is not used with a load/store of any kind.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).