LLVM: lib/Target/RISCV/RISCVTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
19#include
20#include
21using namespace llvm;
23
24#define DEBUG_TYPE "riscvtti"
25
27 "riscv-v-register-bit-width-lmul",
29 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
30 "by autovectorized code. Fractional LMULs are not supported."),
32
34 "riscv-v-slp-max-vf",
36 "Overrides result used for getMaximumVF query which is used "
37 "exclusively by SLP vectorizer."),
39
43
46 size_t NumInstr = OpCodes.size();
48 return NumInstr;
51 return LMULCost * NumInstr;
53 for (auto Op : OpCodes) {
54 switch (Op) {
55 case RISCV::VRGATHER_VI:
57 break;
58 case RISCV::VRGATHER_VV:
60 break;
61 case RISCV::VSLIDEUP_VI:
62 case RISCV::VSLIDEDOWN_VI:
64 break;
65 case RISCV::VSLIDEUP_VX:
66 case RISCV::VSLIDEDOWN_VX:
68 break;
69 case RISCV::VREDMAX_VS:
70 case RISCV::VREDMIN_VS:
71 case RISCV::VREDMAXU_VS:
72 case RISCV::VREDMINU_VS:
73 case RISCV::VREDSUM_VS:
74 case RISCV::VREDAND_VS:
75 case RISCV::VREDOR_VS:
76 case RISCV::VREDXOR_VS:
77 case RISCV::VFREDMAX_VS:
78 case RISCV::VFREDMIN_VS:
79 case RISCV::VFREDUSUM_VS: {
84 break;
85 }
86 case RISCV::VFREDOSUM_VS: {
91 break;
92 }
93 case RISCV::VMV_X_S:
94 case RISCV::VMV_S_X:
95 case RISCV::VFMV_F_S:
96 case RISCV::VFMV_S_F:
97 case RISCV::VMOR_MM:
98 case RISCV::VMXOR_MM:
99 case RISCV::VMAND_MM:
100 case RISCV::VMANDN_MM:
101 case RISCV::VMNAND_MM:
102 case RISCV::VCPOP_M:
103 case RISCV::VFIRST_M:
105 break;
106 default:
107 Cost += LMULCost;
108 }
109 }
111}
112
117 bool FreeZeroes) {
119 "getIntImmCost can only estimate cost of materialising integers");
120
121
122 if (Imm == 0)
124
125
127 false, FreeZeroes);
128}
129
133}
134
135
136
137
139 uint64_t Mask = Imm.getZExtValue();
140 auto *BO = dyn_cast(Inst->getOperand(0));
141 if (!BO || !BO->hasOneUse())
142 return false;
143
144 if (BO->getOpcode() != Instruction::Shl)
145 return false;
146
147 if (!isa(BO->getOperand(1)))
148 return false;
149
150 unsigned ShAmt = cast(BO->getOperand(1))->getZExtValue();
151
152
155 if (ShAmt == Trailing)
156 return true;
157 }
158
159 return false;
160}
161
167 "getIntImmCost can only estimate cost of materialising integers");
168
169
170 if (Imm == 0)
172
173
174
175 bool Takes12BitImm = false;
176 unsigned ImmArgIdx = ~0U;
177
178 switch (Opcode) {
179 case Instruction::GetElementPtr:
180
181
182
184 case Instruction::Store: {
185
186
187
188
189 if (Idx == 1 || !Inst)
191 true);
192
193 StoreInst *ST = cast(Inst);
194 if (!getTLI()->allowsMemoryAccessForAlignment(
196 ST->getPointerAddressSpace(), ST->getAlign()))
198
200 true);
201 }
202 case Instruction::Load:
203
205 case Instruction::And:
206
207 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
209
210 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
212
213 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
215 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
218 Takes12BitImm = true;
219 break;
220 case Instruction::Add:
221 Takes12BitImm = true;
222 break;
223 case Instruction::Or:
224 case Instruction::Xor:
225
226 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
228 Takes12BitImm = true;
229 break;
230 case Instruction::Mul:
231
232 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
234
235 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
237
238 Takes12BitImm = true;
239 break;
240 case Instruction::Sub:
241 case Instruction::Shl:
242 case Instruction::LShr:
243 case Instruction::AShr:
244 Takes12BitImm = true;
245 ImmArgIdx = 1;
246 break;
247 default:
248 break;
249 }
250
251 if (Takes12BitImm) {
252
254
255 if (Imm.getSignificantBits() <= 64 &&
258 }
259 }
260
261
263 }
264
265
267}
268
273
275}
276
279}
280
284 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->is64Bit())
287}
288
290
291
292
293 switch (II->getIntrinsicID()) {
294 default:
295 return false;
296
297 case Intrinsic::vector_reduce_mul:
298 case Intrinsic::vector_reduce_fmul:
299 return true;
300 }
301}
302
307}
308
315}
316
319 unsigned LMUL =
321 switch (K) {
332 : 0);
333 }
334
336}
337
340
341
342
344 0, CostKind);
345}
346
348 unsigned Size = Mask.size();
350 return false;
351 for (unsigned I = 0; I != Size; ++I) {
352 if (static_cast<unsigned>(Mask[I]) == I)
353 continue;
354 if (Mask[I] != 0)
355 return false;
357 return false;
358 for (unsigned J = I + 1; J != Size; ++J)
359
360 if (static_cast<unsigned>(Mask[J]) != J % I)
361 return false;
362 SubVectorSize = I;
363 return true;
364 }
365
366 return false;
367}
368
376 return cast(EVT(IndexVT).getTypeForEVT(C));
377}
378
386
388
389
390
391
392 if (isa(Tp)) {
393 switch (Kind) {
394 default:
395 break;
397 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
398 MVT EltTp = LT.second.getVectorElementType();
399
400
401
403
404
405
406
407
409 return 2 * LT.first * TLI->getLMULCost(LT.second);
410
411 if (Mask[0] == 0 || Mask[0] == 1) {
412 auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
413
414
415 if (equal(DeinterleaveMask, Mask))
416 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
418 }
419 }
420 int SubVectorSize;
421 if (LT.second.getScalarSizeInBits() != 1 &&
424 unsigned NumSlides = Log2_32(Mask.size() / SubVectorSize);
425
426 for (unsigned I = 0; I != NumSlides; ++I) {
427 unsigned InsertIndex = SubVectorSize * (1 << I);
432 std::pair<InstructionCost, MVT> DestLT =
434
435
436
439 CostKind, InsertIndex, SubTp);
440 }
442 }
443 }
444
445
446 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
447 (LT.second.getScalarSizeInBits() != 8 ||
448 LT.second.getVectorNumElements() <= 256)) {
451 return IndexCost +
452 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
453 }
454 [[fallthrough]];
455 }
458
459
460
461 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
462 (LT.second.getScalarSizeInBits() != 8 ||
463 LT.second.getVectorNumElements() <= 256)) {
470 return 2 * IndexCost +
471 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
473 MaskCost;
474 }
475 [[fallthrough]];
476 }
478
479
480
481 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
482 LT.second.isFixedLengthVector() &&
483 LT.second.getVectorElementType().getSizeInBits() ==
485 LT.second.getVectorNumElements() <
486 cast(Tp)->getNumElements() &&
488 cast(Tp)->getNumElements()) ==
489 static_cast<unsigned>(*LT.first.getValue())) {
490 unsigned NumRegs = *LT.first.getValue();
491 unsigned VF = cast(Tp)->getNumElements();
494
496 for (unsigned I = 0, NumSrcRegs = divideCeil(Mask.size(), SubVF);
498 bool IsSingleVector = true;
501 Mask.slice(I * SubVF,
502 I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
503 SubMask.begin(), [&](int I) -> int {
504 if (I == PoisonMaskElem)
505 return PoisonMaskElem;
506 bool SingleSubVector = I / VF == 0;
507 IsSingleVector &= SingleSubVector;
508 return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
509 });
512 static_cast<unsigned>(P.value()) == P.index();
513 }))
514 continue;
517 SubVecTy, SubMask, CostKind, 0, nullptr);
518 }
520 }
521 break;
522 }
523 }
524 };
525
526
527 switch (Kind) {
528 default:
529
530
531
532 break;
534
535 if (Index == 0)
537
538
539
540
541
542
543
545 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
548 if (MinVLen == MaxVLen &&
549 SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&
550 SubLT.second.getSizeInBits() <= MinVLen)
552 }
553
554
555
556
557 return LT.first *
558 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);
560
561
562
563 return LT.first *
564 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);
566
567
568
569
570
571
572
573
574
575 return LT.first *
576 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
578 }
581 Instruction::InsertElement);
582 if (LT.second.getScalarSizeInBits() == 1) {
583 if (HasScalar) {
584
585
586
587
588
589 return LT.first *
590 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
592 }
593
594
595
596
597
598
599
600
601
602 return LT.first *
603 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
604 RISCV::VMV_X_S, RISCV::VMV_V_X,
605 RISCV::VMSNE_VI},
607 }
608
609 if (HasScalar) {
610
611
612 return LT.first *
613 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
614 }
615
616
617
618 return LT.first *
619 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);
620 }
622
623
624
625 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
626 if (Index >= 0 && Index < 32)
627 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
628 else if (Index < 0 && Index > -32)
629 Opcodes[1] = RISCV::VSLIDEUP_VI;
630 return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
631 }
633
634
635
636
637
638
639
640
641
642
643
644
645
646
648 if (LT.second.isFixedLengthVector())
649
650 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
651 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
652 if (LT.second.isFixedLengthVector() &&
653 isInt<5>(LT.second.getVectorNumElements() - 1))
654 Opcodes[1] = RISCV::VRSUB_VI;
656 getRISCVInstructionCost(Opcodes, LT.second, CostKind);
657
659 return LT.first * (LenCost + GatherCost + ExtendCost);
660 }
661 }
663}
664
669}
670
672 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
674 if (isa(Ty))
676
677
678
679
680
682 Ty, DemandedElts, Insert, Extract, CostKind);
684 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
687
688
693 }
694
695 assert(LT.second.isFixedLengthVector());
699 cast(Ty)->getNumElements() *
700 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second, CostKind);
701 if (BV < Cost)
703 }
704 }
706}
707
716
718}
719
723 bool UseMaskForCond, bool UseMaskForGaps) {
724
725
726
727
728 if (!UseMaskForCond && !UseMaskForGaps &&
729 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
730 auto *VTy = cast(VecTy);
732
733 if (LT.second.isVector()) {
734 auto *SubVecTy =
736 VTy->getElementCount().divideCoefficientBy(Factor));
737 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
740
741
742
748 return LT.first * Cost;
749 }
750
751
752
754 getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0,
755 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
756 unsigned NumLoads = getEstimatedVLFor(VTy);
757 return NumLoads * MemOpCost;
758 }
759 }
760 }
761
762
763
764 if (isa(VecTy))
766
767 auto *FVTy = cast(VecTy);
770 unsigned VF = FVTy->getNumElements() / Factor;
771
772
773
774
775
776
777 if (Opcode == Instruction::Load) {
779 for (unsigned Index : Indices) {
786 Cost += ShuffleCost;
787 }
789 }
790
791
792
793
794
795
796
797
798
799
800
801 if (Factor != 2)
804 UseMaskForCond, UseMaskForGaps);
805
806 assert(Opcode == Instruction::Store && "Opcode must be a store");
807
808
813 return MemCost + ShuffleCost;
814}
815
817 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
822
823 if ((Opcode == Instruction::Load &&
825 (Opcode == Instruction::Store &&
829
830
831
832
833 auto &VTy = *cast(DataTy);
836 {TTI::OK_AnyValue, TTI::OP_None}, I);
837 unsigned NumLoads = getEstimatedVLFor(&VTy);
838 return NumLoads * MemOpCost;
839}
840
842 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
844 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
846 (Opcode != Instruction::Load && Opcode != Instruction::Store))
849
852
853
854
855
856 auto &VTy = *cast(DataTy);
859 {TTI::OK_AnyValue, TTI::OP_None}, I);
860 unsigned NumLoads = getEstimatedVLFor(&VTy);
861 return NumLoads * MemOpCost;
862}
863
866
867
868
871 for (auto *Ty : Tys) {
872 if (!Ty->isVectorTy())
873 continue;
877 }
879}
880
881
882
883
884
886 {Intrinsic::floor, MVT::f32, 9},
887 {Intrinsic::floor, MVT::f64, 9},
888 {Intrinsic::ceil, MVT::f32, 9},
889 {Intrinsic::ceil, MVT::f64, 9},
890 {Intrinsic::trunc, MVT::f32, 7},
891 {Intrinsic::trunc, MVT::f64, 7},
892 {Intrinsic::round, MVT::f32, 9},
893 {Intrinsic::round, MVT::f64, 9},
894 {Intrinsic::roundeven, MVT::f32, 9},
895 {Intrinsic::roundeven, MVT::f64, 9},
896 {Intrinsic::rint, MVT::f32, 7},
897 {Intrinsic::rint, MVT::f64, 7},
898 {Intrinsic::lrint, MVT::i32, 1},
899 {Intrinsic::lrint, MVT::i64, 1},
900 {Intrinsic::llrint, MVT::i64, 1},
901 {Intrinsic::nearbyint, MVT::f32, 9},
902 {Intrinsic::nearbyint, MVT::f64, 9},
903 {Intrinsic::bswap, MVT::i16, 3},
904 {Intrinsic::bswap, MVT::i32, 12},
905 {Intrinsic::bswap, MVT::i64, 31},
906 {Intrinsic::vp_bswap, MVT::i16, 3},
907 {Intrinsic::vp_bswap, MVT::i32, 12},
908 {Intrinsic::vp_bswap, MVT::i64, 31},
909 {Intrinsic::vp_fshl, MVT::i8, 7},
910 {Intrinsic::vp_fshl, MVT::i16, 7},
911 {Intrinsic::vp_fshl, MVT::i32, 7},
912 {Intrinsic::vp_fshl, MVT::i64, 7},
913 {Intrinsic::vp_fshr, MVT::i8, 7},
914 {Intrinsic::vp_fshr, MVT::i16, 7},
915 {Intrinsic::vp_fshr, MVT::i32, 7},
916 {Intrinsic::vp_fshr, MVT::i64, 7},
917 {Intrinsic::bitreverse, MVT::i8, 17},
918 {Intrinsic::bitreverse, MVT::i16, 24},
919 {Intrinsic::bitreverse, MVT::i32, 33},
920 {Intrinsic::bitreverse, MVT::i64, 52},
921 {Intrinsic::vp_bitreverse, MVT::i8, 17},
922 {Intrinsic::vp_bitreverse, MVT::i16, 24},
923 {Intrinsic::vp_bitreverse, MVT::i32, 33},
924 {Intrinsic::vp_bitreverse, MVT::i64, 52},
925 {Intrinsic::ctpop, MVT::i8, 12},
926 {Intrinsic::ctpop, MVT::i16, 19},
927 {Intrinsic::ctpop, MVT::i32, 20},
928 {Intrinsic::ctpop, MVT::i64, 21},
929 {Intrinsic::ctlz, MVT::i8, 19},
930 {Intrinsic::ctlz, MVT::i16, 28},
931 {Intrinsic::ctlz, MVT::i32, 31},
932 {Intrinsic::ctlz, MVT::i64, 35},
933 {Intrinsic::cttz, MVT::i8, 16},
934 {Intrinsic::cttz, MVT::i16, 23},
935 {Intrinsic::cttz, MVT::i32, 24},
936 {Intrinsic::cttz, MVT::i64, 25},
937 {Intrinsic::vp_ctpop, MVT::i8, 12},
938 {Intrinsic::vp_ctpop, MVT::i16, 19},
939 {Intrinsic::vp_ctpop, MVT::i32, 20},
940 {Intrinsic::vp_ctpop, MVT::i64, 21},
941 {Intrinsic::vp_ctlz, MVT::i8, 19},
942 {Intrinsic::vp_ctlz, MVT::i16, 28},
943 {Intrinsic::vp_ctlz, MVT::i32, 31},
944 {Intrinsic::vp_ctlz, MVT::i64, 35},
945 {Intrinsic::vp_cttz, MVT::i8, 16},
946 {Intrinsic::vp_cttz, MVT::i16, 23},
947 {Intrinsic::vp_cttz, MVT::i32, 24},
948 {Intrinsic::vp_cttz, MVT::i64, 25},
949};
950
952 switch (ID) {
953#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
954 case Intrinsic::VPID: \
955 return ISD::VPSD;
956#include "llvm/IR/VPIntrinsics.def"
957#undef HELPER_MAP_VPID_TO_VPSD
958 }
960}
961
966 switch (ICA.getID()) {
967 case Intrinsic::lrint:
968 case Intrinsic::llrint:
969
970 if (auto *VecTy = dyn_cast(ICA.getArgTypes()[0]);
971 VecTy && VecTy->getElementType()->is16bitFPTy())
973 [[fallthrough]];
974 case Intrinsic::ceil:
975 case Intrinsic:🤣
976 case Intrinsic::trunc:
977 case Intrinsic::rint:
978 case Intrinsic::round:
979 case Intrinsic::roundeven: {
980
983 return LT.first * 8;
984 break;
985 }
986 case Intrinsic::umin:
987 case Intrinsic::umax:
988 case Intrinsic::smin:
989 case Intrinsic::smax: {
991 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
992 return LT.first;
993
995 unsigned Op;
996 switch (ICA.getID()) {
997 case Intrinsic::umin:
998 Op = RISCV::VMINU_VV;
999 break;
1000 case Intrinsic::umax:
1001 Op = RISCV::VMAXU_VV;
1002 break;
1003 case Intrinsic::smin:
1004 Op = RISCV::VMIN_VV;
1005 break;
1006 case Intrinsic::smax:
1007 Op = RISCV::VMAX_VV;
1008 break;
1009 }
1010 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1011 }
1012 break;
1013 }
1014 case Intrinsic::sadd_sat:
1015 case Intrinsic::ssub_sat:
1016 case Intrinsic::uadd_sat:
1017 case Intrinsic::usub_sat: {
1020 unsigned Op;
1021 switch (ICA.getID()) {
1022 case Intrinsic::sadd_sat:
1023 Op = RISCV::VSADD_VV;
1024 break;
1025 case Intrinsic::ssub_sat:
1026 Op = RISCV::VSSUBU_VV;
1027 break;
1028 case Intrinsic::uadd_sat:
1029 Op = RISCV::VSADDU_VV;
1030 break;
1031 case Intrinsic::usub_sat:
1032 Op = RISCV::VSSUBU_VV;
1033 break;
1034 }
1035 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1036 }
1037 break;
1038 }
1039 case Intrinsic::fabs: {
1042
1043
1044
1045
1046
1047 if (LT.second.getVectorElementType() == MVT::bf16 ||
1048 (LT.second.getVectorElementType() == MVT::f16 &&
1050 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1052 2;
1053 else
1054 return LT.first *
1055 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);
1056 }
1057 break;
1058 }
1059 case Intrinsic::sqrt: {
1064 MVT ConvType = LT.second;
1065 MVT FsqrtType = LT.second;
1066
1067
1068 if (LT.second.getVectorElementType() == MVT::bf16) {
1069 if (LT.second == MVT::nxv32bf16) {
1070 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1071 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1072 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1073 ConvType = MVT::nxv16f16;
1074 FsqrtType = MVT::nxv16f32;
1075 } else {
1076 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1077 FsqrtOp = {RISCV::VFSQRT_V};
1079 }
1080 } else if (LT.second.getVectorElementType() == MVT::f16 &&
1082 if (LT.second == MVT::nxv32f16) {
1083 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1084 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1085 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1086 ConvType = MVT::nxv16f16;
1087 FsqrtType = MVT::nxv16f32;
1088 } else {
1089 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1090 FsqrtOp = {RISCV::VFSQRT_V};
1092 }
1093 } else {
1094 FsqrtOp = {RISCV::VFSQRT_V};
1095 }
1096
1097 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType, CostKind) +
1098 getRISCVInstructionCost(ConvOp, ConvType, CostKind));
1099 }
1100 break;
1101 }
1102 case Intrinsic::cttz:
1103 case Intrinsic::ctlz:
1104 case Intrinsic::ctpop: {
1106 if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
1107 unsigned Op;
1108 switch (ICA.getID()) {
1109 case Intrinsic::cttz:
1110 Op = RISCV::VCTZ_V;
1111 break;
1112 case Intrinsic::ctlz:
1113 Op = RISCV::VCLZ_V;
1114 break;
1115 case Intrinsic::ctpop:
1116 Op = RISCV::VCPOP_V;
1117 break;
1118 }
1119 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);
1120 }
1121 break;
1122 }
1123 case Intrinsic::abs: {
1126
1127
1128 return LT.first *
1129 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1131 }
1132 break;
1133 }
1134 case Intrinsic::get_active_lane_mask: {
1137 ICA.getArgTypes()[0], cast(RetTy)->getElementCount());
1139
1140
1141
1142
1143 return LT.first *
1144 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1146 }
1147 break;
1148 }
1149
1150 case Intrinsic::stepvector: {
1152
1153
1155 return getRISCVInstructionCost(RISCV::VID_V, LT.second, CostKind) +
1156 (LT.first - 1) *
1157 getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind);
1158 return 1 + (LT.first - 1);
1159 }
1160 case Intrinsic::experimental_cttz_elts: {
1164 break;
1167
1168
1169
1172 cast(ICA.getArgs()[1])->isZero())
1177
1178 return Cost;
1179 }
1180 case Intrinsic::vp_rint: {
1181
1182 unsigned Cost = 5;
1185 return Cost * LT.first;
1186 break;
1187 }
1188 case Intrinsic::vp_nearbyint: {
1189
1190 unsigned Cost = 7;
1193 return Cost * LT.first;
1194 break;
1195 }
1196 case Intrinsic::vp_ceil:
1197 case Intrinsic::vp_floor:
1198 case Intrinsic::vp_round:
1199 case Intrinsic::vp_roundeven:
1200 case Intrinsic::vp_roundtozero: {
1201
1202
1203 unsigned Cost = 7;
1207 return Cost * LT.first;
1208 break;
1209 }
1210 case Intrinsic::vp_fneg: {
1211 std::optional FOp =
1213 assert(FOp.has_value());
1215 break;
1216 }
1217 case Intrinsic::vp_select: {
1220 assert(FOp.has_value());
1223 }
1224 case Intrinsic::vp_merge:
1228 case Intrinsic::experimental_vp_splat: {
1230
1231 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1233 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1234 ? RISCV::VFMV_V_F
1235 : RISCV::VMV_V_X,
1237 }
1238 }
1239
1242 LT.second.isVector()) {
1243 MVT EltTy = LT.second.getVectorElementType();
1245 ICA.getID(), EltTy))
1246 return LT.first * Entry->Cost;
1247 }
1248 }
1249
1251}
1252
1258 bool IsVectorType = isa(Dst) && isa(Src);
1259 if (!IsVectorType)
1261
1262
1263
1264
1266 Dst->getScalarSizeInBits() > ST->getELen())
1268
1270 assert(ISD && "Invalid opcode");
1273
1274
1275
1276
1277
1278
1279 switch (ISD) {
1280 default:
1281 break;
1284 if (Src->getScalarSizeInBits() == 1) {
1285
1286
1287
1288
1289 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second, CostKind) +
1290 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1292 DstLT.first - 1;
1293 }
1294 break;
1296 if (Dst->getScalarSizeInBits() == 1) {
1297
1298
1299
1300
1301
1302 return SrcLT.first *
1303 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1305 SrcLT.first - 1;
1306 }
1307 break;
1308 };
1309
1310
1311
1312
1313
1315 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1317 SrcLT.second.getSizeInBits()) ||
1319 DstLT.second.getSizeInBits()))
1321
1322
1323 assert((SrcLT.first == 1) && (DstLT.first == 1) && "Illegal type");
1324
1325 int PowDiff = (int)Log2_32(DstLT.second.getScalarSizeInBits()) -
1326 (int)Log2_32(SrcLT.second.getScalarSizeInBits());
1327 switch (ISD) {
1330 if ((PowDiff < 1) || (PowDiff > 3))
1332 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1333 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1334 unsigned Op =
1335 (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];
1336 return getRISCVInstructionCost(Op, DstLT.second, CostKind);
1337 }
1341
1342 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1343 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1344
1347 : RISCV::VFNCVT_F_F_W;
1349 for (; SrcEltSize != DstEltSize;) {
1354 DstEltSize =
1355 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1356 Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);
1357 }
1358 return Cost;
1359 }
1363 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1364 unsigned FWCVT =
1365 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1366 unsigned FNCVT =
1367 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1368 unsigned SrcEltSize = Src->getScalarSizeInBits();
1369 unsigned DstEltSize = Dst->getScalarSizeInBits();
1371 if ((SrcEltSize == 16) &&
1373
1374
1377 cast(Dst)->getElementCount());
1378 std::pair<InstructionCost, MVT> VecF32LT =
1381 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1384 return Cost;
1385 }
1386 if (DstEltSize == SrcEltSize)
1387 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1388 else if (DstEltSize > SrcEltSize)
1389 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1390 else {
1391
1392
1395 Cost += getRISCVInstructionCost(FNCVT, VecVT, CostKind);
1396 if ((SrcEltSize / 2) > DstEltSize) {
1400 }
1401 }
1402 return Cost;
1403 }
1407 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1408 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1409 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1410 unsigned SrcEltSize = Src->getScalarSizeInBits();
1411 unsigned DstEltSize = Dst->getScalarSizeInBits();
1412
1414 if ((DstEltSize == 16) &&
1416
1417
1420 cast(Dst)->getElementCount());
1421 std::pair<InstructionCost, MVT> VecF32LT =
1424 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1426 return Cost;
1427 }
1428
1429 if (DstEltSize == SrcEltSize)
1430 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);
1431 else if (DstEltSize > SrcEltSize) {
1432 if ((DstEltSize / 2) > SrcEltSize) {
1435 cast(Dst)->getElementCount());
1436 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1438 }
1439 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);
1440 } else
1441 Cost += getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);
1442 return Cost;
1443 }
1444 }
1446}
1447
1448unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {
1449 if (isa(Ty)) {
1454 }
1455 return cast(Ty)->getNumElements();
1456}
1457
1464
1465
1468
1471
1472
1473
1474 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1476 else
1478 }
1479
1480 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1483 switch (IID) {
1484 case Intrinsic::maximum:
1486 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1487 } else {
1488 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1489 RISCV::VFMV_F_S};
1490
1491
1492
1496 ExtraCost = 1 +
1500 }
1501 break;
1502
1503 case Intrinsic::minimum:
1505 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1506 } else {
1507 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1508 RISCV::VFMV_F_S};
1509
1510
1511
1515 ExtraCost = 1 +
1519 }
1520 break;
1521 }
1522 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1523 }
1524
1525
1526 unsigned SplitOp;
1528 switch (IID) {
1529 default:
1531 case Intrinsic::smax:
1532 SplitOp = RISCV::VMAX_VV;
1533 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1534 break;
1535 case Intrinsic::smin:
1536 SplitOp = RISCV::VMIN_VV;
1537 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1538 break;
1539 case Intrinsic::umax:
1540 SplitOp = RISCV::VMAXU_VV;
1541 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1542 break;
1543 case Intrinsic::umin:
1544 SplitOp = RISCV::VMINU_VV;
1545 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1546 break;
1547 case Intrinsic::maxnum:
1548 SplitOp = RISCV::VFMAX_VV;
1549 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1550 break;
1551 case Intrinsic::minnum:
1552 SplitOp = RISCV::VFMIN_VV;
1553 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1554 break;
1555 }
1556
1558 (LT.first > 1) ? (LT.first - 1) *
1559 getRISCVInstructionCost(SplitOp, LT.second, CostKind)
1560 : 0;
1561 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1562}
1563
1566 std::optional FMF,
1570
1571
1574
1576 assert(ISD && "Invalid opcode");
1577
1581
1585
1586
1587
1588 if (LT.second == MVT::v1i1)
1589 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
1592
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1608 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
1609 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
1610 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
1614
1615
1616
1617
1618
1619 return (LT.first - 1) *
1620 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind) +
1621 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) + 1;
1622 } else {
1624
1625
1626
1627
1628
1629 return (LT.first - 1) *
1630 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second, CostKind) +
1631 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
1634 }
1635 }
1636
1637
1638
1639
1640 unsigned SplitOp;
1642 switch (ISD) {
1644 SplitOp = RISCV::VADD_VV;
1645 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1646 break;
1648 SplitOp = RISCV::VOR_VV;
1649 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1650 break;
1652 SplitOp = RISCV::VXOR_VV;
1653 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1654 break;
1656 SplitOp = RISCV::VAND_VV;
1657 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1658 break;
1660
1661 if ((LT.second.getVectorElementType() == MVT::f16 &&
1663 LT.second.getVectorElementType() == MVT::bf16)
1666 Opcodes.push_back(RISCV::VFMV_S_F);
1667 for (unsigned i = 0; i < LT.first.getValue(); i++)
1668 Opcodes.push_back(RISCV::VFREDOSUM_VS);
1669 Opcodes.push_back(RISCV::VFMV_F_S);
1670 return getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1671 }
1672 SplitOp = RISCV::VFADD_VV;
1673 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1674 break;
1675 }
1676
1678 (LT.first > 1) ? (LT.first - 1) *
1679 getRISCVInstructionCost(SplitOp, LT.second, CostKind)
1680 : 0;
1681 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);
1682}
1683
1685 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
1690
1691
1695
1696 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1699
1701
1702 if (IsUnsigned && Opcode == Instruction::Add &&
1703 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
1704
1705
1706 return LT.first *
1707 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind);
1708 }
1709
1710 if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())
1713
1714 return (LT.first - 1) +
1716}
1717
1722 if (!isa(Ty))
1723
1724
1725
1726 return 0;
1727
1729
1730
1731
1732 return 1;
1733
1734 return getConstantPoolLoadCost(Ty, CostKind);
1735}
1736
1737
1745
1746 if (VT == MVT::Other)
1749
1751 if (Opcode == Instruction::Store && OpInfo.isConstant())
1753
1755
1759 return Cost;
1760
1761
1762
1763
1764
1766 if (Src->isVectorTy() && LT.second.isVector() &&
1768 LT.second.getSizeInBits()))
1769 return Cost;
1770
1773 }();
1774
1775
1776
1777
1779 BaseCost *= TLI->getLMULCost(LT.second);
1780 return Cost + BaseCost;
1781
1782}
1783
1790 Op1Info, Op2Info, I);
1791
1794 Op1Info, Op2Info, I);
1795
1796
1799 Op1Info, Op2Info, I);
1800
1801 auto GetConstantMatCost =
1803 if (OpInfo.isUniform())
1804
1805
1806 return 0;
1807
1808 return getConstantPoolLoadCost(ValTy, CostKind);
1809 };
1810
1813 ConstantMatCost += GetConstantMatCost(Op1Info);
1815 ConstantMatCost += GetConstantMatCost(Op2Info);
1816
1818 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
1821
1822
1823
1824 return ConstantMatCost +
1825 LT.first *
1826 getRISCVInstructionCost(
1827 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1829 }
1830
1831 return ConstantMatCost +
1832 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
1834 }
1835
1837
1838
1839
1840
1841
1842 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
1843 return ConstantMatCost +
1844 LT.first *
1845 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1847 LT.first * getRISCVInstructionCost(
1848 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1850 }
1851
1852
1853
1854
1855 return ConstantMatCost +
1856 LT.first * getRISCVInstructionCost(
1857 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
1859 }
1860
1861 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
1863
1864
1865 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
1866 LT.second,
1868 }
1869
1870 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
1872
1873
1875 return ConstantMatCost +
1876 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind);
1877
1878
1879
1880
1881
1886 Op1Info, Op2Info, I);
1887
1888
1889
1890 switch (VecPred) {
1895 return ConstantMatCost +
1896 LT.first * getRISCVInstructionCost(
1897 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
1899
1904 return ConstantMatCost +
1905 LT.first *
1906 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
1908
1915 return ConstantMatCost +
1916 LT.first *
1917 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind);
1918 default:
1919 break;
1920 }
1921 }
1922
1923
1924
1925
1926
1929 if (all_of(I->users(), [&](const User *U) {
1930 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
1931 U->getType()->isIntegerTy() &&
1932 !isa(U->getOperand(1)) &&
1933 !isa(U->getOperand(2));
1934 }))
1935 return 0;
1936 }
1937
1938
1939
1941 Op1Info, Op2Info, I);
1942}
1943
1948 return Opcode == Instruction::PHI ? 0 : 1;
1949
1950 return 0;
1951}
1952
1955 unsigned Index, Value *Op0,
1958
1959 if (Opcode != Instruction::ExtractElement &&
1960 Opcode != Instruction::InsertElement)
1962
1963
1965
1966
1967 if (!LT.second.isVector()) {
1968 auto *FixedVecTy = cast(Val);
1969
1970 if (Index != -1U)
1971 return 0;
1972
1973
1974
1975
1976 Type *ElemTy = FixedVecTy->getElementType();
1977 auto NumElems = FixedVecTy->getNumElements();
1983 return Opcode == Instruction::ExtractElement
1984 ? StoreCost * NumElems + LoadCost
1985 : (StoreCost + LoadCost) * NumElems + StoreCost;
1986 }
1987
1988
1989 if (LT.second.isScalableVector() && !LT.first.isValid())
1990 return LT.first;
1991
1992
1996 cast(Val)->getElementCount());
1997 if (Opcode == Instruction::ExtractElement) {
2003 return ExtendCost + ExtractCost;
2004 }
2013 return ExtendCost + InsertCost + TruncCost;
2014 }
2015
2016
2017
2018
2019 unsigned BaseCost = 1;
2020
2021 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2022
2023 if (Index != -1U) {
2024
2025
2026 if (LT.second.isFixedLengthVector()) {
2027 unsigned Width = LT.second.getVectorNumElements();
2028 Index = Index % Width;
2029 }
2030
2031
2032
2034 unsigned EltSize = LT.second.getScalarSizeInBits();
2035 unsigned M1Max = *VLEN / EltSize;
2036 Index = Index % M1Max;
2037 }
2038
2039
2040 if (Index == 0)
2041 SlideCost = 0;
2042 else if (Opcode == Instruction::InsertElement)
2043 SlideCost = 1;
2044 }
2045
2046
2047
2048
2049 if (LT.first > 1 &&
2050 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2051 LT.second.isScalableVector()))) {
2055
2057
2058
2059 if (Opcode == Instruction::ExtractElement)
2061 getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
2063 IdxCost;
2064
2065
2066
2069 getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
2071 IdxCost;
2072 }
2073
2074
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2096 }
2097 return BaseCost + SlideCost;
2098}
2099
2104
2105
2108 Args, CxtI);
2109
2112 Args, CxtI);
2113
2114
2117 Args, CxtI);
2118
2119
2121
2122
2123 if (!LT.second.isVector())
2125 Args, CxtI);
2126
2127
2128
2131 if ((LT.second.getVectorElementType() == MVT::f16 ||
2132 LT.second.getVectorElementType() == MVT::bf16) &&
2138
2139 CastCost += LT.first * Args.size() *
2142
2143 CastCost +=
2144 LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy,
2146
2147 LT.second = PromotedVT;
2148 }
2149
2150 auto getConstantMatCost =
2152 if (OpInfo.isUniform() && canSplatOperand(Opcode, Operand))
2153
2154
2155
2156
2157
2158 return 0;
2159
2160 return getConstantPoolLoadCost(Ty, CostKind);
2161 };
2162
2163
2166 ConstantMatCost += getConstantMatCost(0, Op1Info);
2168 ConstantMatCost += getConstantMatCost(1, Op2Info);
2169
2170 unsigned Op;
2171 switch (ISDOpcode) {
2174 Op = RISCV::VADD_VV;
2175 break;
2179 Op = RISCV::VSLL_VV;
2180 break;
2185 break;
2189 Op = RISCV::VMUL_VV;
2190 break;
2193 Op = RISCV::VDIV_VV;
2194 break;
2197 Op = RISCV::VREM_VV;
2198 break;
2201 Op = RISCV::VFADD_VV;
2202 break;
2204 Op = RISCV::VFMUL_VV;
2205 break;
2207 Op = RISCV::VFDIV_VV;
2208 break;
2210 Op = RISCV::VFSGNJN_VV;
2211 break;
2212 default:
2213
2214
2215 return CastCost + ConstantMatCost +
2217 Args, CxtI);
2218 }
2219
2221
2222
2223
2226 return CastCost + ConstantMatCost + LT.first * InstrCost;
2227}
2228
2229
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245 for (auto [I, V] : enumerate(Ptrs)) {
2246 const auto *GEP = dyn_cast(V);
2247 if ()
2248 continue;
2249 if (Info.isSameBase() && V != Base) {
2250 if (GEP->hasAllConstantIndices())
2251 continue;
2252
2253
2254
2255
2257 if (Info.isUnitStride() &&
2259 nullptr,
2260 Stride * I,
2261 true,
2262 0,
2263 GEP->getType()->getPointerAddressSpace()))
2264 continue;
2266 {TTI::OK_AnyValue, TTI::OP_None},
2267 {TTI::OK_AnyValue, TTI::OP_None}, {});
2268 } else {
2271 Indices, AccessTy, CostKind);
2272 }
2273 }
2274 return Cost;
2275}
2276
2280
2281
2282
2283
2284 if (ST->enableDefaultUnroll())
2286
2287
2288
2290
2291
2294 if (L->getHeader()->getParent()->hasOptSize())
2295 return;
2296
2298 L->getExitingBlocks(ExitingBlocks);
2300 << "Blocks: " << L->getNumBlocks() << "\n"
2301 << "Exit blocks: " << ExitingBlocks.size() << "\n");
2302
2303
2304
2305 if (ExitingBlocks.size() > 2)
2306 return;
2307
2308
2309
2310 if (L->getNumBlocks() > 4)
2311 return;
2312
2313
2315 return;
2316
2317
2318
2320 for (auto *BB : L->getBlocks()) {
2321 for (auto &I : *BB) {
2322
2323
2324 if (I.getType()->isVectorTy())
2325 return;
2326
2330 continue;
2331 }
2332 return;
2333 }
2334
2338 }
2339 }
2340
2342
2347
2348
2349
2350 if (Cost < 12)
2351 UP.Force = true;
2352}
2353
2357}
2358
2361
2362 Type *EltTy = cast(Ty)->getElementType();
2366 cast(Ty));
2367
2371
2374 }
2375
2377}
2378
2380 if (SLPMaxVF.getNumOccurrences())
2382
2383
2384
2385
2386
2387
2390
2391
2392 return std::max(1U, RegWidth.getFixedValue() / ElemWidth);
2393}
2394
2398 if (ST->hasVendorXCVmem() && !ST->is64Bit())
2400
2402}
2403
2406
2407
2408
2417}
2418
2420 auto *VTy = dyn_cast(DataTy);
2421 if (!VTy || VTy->isScalableTy())
2422 return false;
2423
2425 return false;
2426
2427
2428
2429 if (VTy->getElementType()->isIntegerTy(8))
2430 if (VTy->getElementCount().getFixedValue() > 256)
2431 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
2433 return true;
2434}
2435
2437 auto *VTy = dyn_cast(DataTy);
2438 if (!VTy || VTy->isScalableTy())
2439 return false;
2440
2442 return false;
2443 return true;
2444}
2445
2446
2447
2448
2449
2450
2452 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
2453 bool Considerable = false;
2454 AllowPromotionWithoutCommonHeader = false;
2455 if (!isa(&I))
2456 return false;
2457 Type *ConsideredSExtType =
2459 if (I.getType() != ConsideredSExtType)
2460 return false;
2461
2462
2463 for (const User *U : I.users()) {
2464 if (const GetElementPtrInst *GEPInst = dyn_cast(U)) {
2465 Considerable = true;
2466
2467
2468
2469 if (GEPInst->getNumOperands() > 2) {
2470 AllowPromotionWithoutCommonHeader = true;
2471 break;
2472 }
2473 }
2474 }
2475 return Considerable;
2476}
2477
2479 switch (Opcode) {
2480 case Instruction::Add:
2481 case Instruction::Sub:
2482 case Instruction::Mul:
2483 case Instruction::And:
2484 case Instruction::Or:
2485 case Instruction::Xor:
2486 case Instruction::FAdd:
2487 case Instruction::FSub:
2488 case Instruction::FMul:
2489 case Instruction::FDiv:
2490 case Instruction::ICmp:
2491 case Instruction::FCmp:
2492 return true;
2493 case Instruction::Shl:
2494 case Instruction::LShr:
2495 case Instruction::AShr:
2496 case Instruction::UDiv:
2497 case Instruction::SDiv:
2498 case Instruction::URem:
2499 case Instruction::SRem:
2500 case Instruction::Select:
2501 return Operand == 1;
2502 default:
2503 return false;
2504 }
2505}
2506
2509 return false;
2510
2512 return true;
2513
2515 if ()
2516 return false;
2517
2518 switch (II->getIntrinsicID()) {
2519 case Intrinsic::fma:
2520 case Intrinsic::vp_fma:
2521 case Intrinsic::fmuladd:
2522 case Intrinsic::vp_fmuladd:
2523 return Operand == 0 || Operand == 1;
2524 case Intrinsic::vp_shl:
2525 case Intrinsic::vp_lshr:
2526 case Intrinsic::vp_ashr:
2527 case Intrinsic::vp_udiv:
2528 case Intrinsic::vp_sdiv:
2529 case Intrinsic::vp_urem:
2530 case Intrinsic::vp_srem:
2531 case Intrinsic::ssub_sat:
2532 case Intrinsic::vp_ssub_sat:
2533 case Intrinsic::usub_sat:
2534 case Intrinsic::vp_usub_sat:
2535 case Intrinsic::vp_select:
2536 return Operand == 1;
2537
2538 case Intrinsic::vp_add:
2539 case Intrinsic::vp_mul:
2540 case Intrinsic::vp_and:
2541 case Intrinsic::vp_or:
2542 case Intrinsic::vp_xor:
2543 case Intrinsic::vp_fadd:
2544 case Intrinsic::vp_fmul:
2545 case Intrinsic::vp_icmp:
2546 case Intrinsic::vp_fcmp:
2547 case Intrinsic::smin:
2548 case Intrinsic::vp_smin:
2549 case Intrinsic::umin:
2550 case Intrinsic::vp_umin:
2551 case Intrinsic::smax:
2552 case Intrinsic::vp_smax:
2553 case Intrinsic::umax:
2554 case Intrinsic::vp_umax:
2555 case Intrinsic::sadd_sat:
2556 case Intrinsic::vp_sadd_sat:
2557 case Intrinsic::uadd_sat:
2558 case Intrinsic::vp_uadd_sat:
2559
2560 case Intrinsic::vp_sub:
2561 case Intrinsic::vp_fsub:
2562 case Intrinsic::vp_fdiv:
2563 return Operand == 0 || Operand == 1;
2564 default:
2565 return false;
2566 }
2567}
2568
2569
2570
2571
2575
2577 return false;
2578
2579
2580
2581
2582
2583
2584 if (!ST->sinkSplatOperands())
2585 return false;
2586
2587 for (auto OpIdx : enumerate(I->operands())) {
2589 continue;
2590
2591 Instruction *Op = dyn_cast(OpIdx.value().get());
2592
2593 if ( || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2594 continue;
2595
2596
2599 continue;
2600
2601
2602 if (cast(Op->getType())->getElementType()->isIntegerTy(1))
2603 continue;
2604
2605
2606
2607 for (Use &U : Op->uses()) {
2610 return false;
2611 }
2612
2615 }
2616 return true;
2617}
2618
2622
2623
2624 if (!ST->enableUnalignedScalarMem())
2626
2627 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
2629
2630 Options.AllowOverlappingLoads = true;
2634 Options.LoadSizes = {8, 4, 2, 1};
2635 Options.AllowedTailExpansions = {3, 5, 6};
2636 } else {
2637 Options.LoadSizes = {4, 2, 1};
2638 Options.AllowedTailExpansions = {3};
2639 }
2641}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static bool isRepeatedConcatMask(ArrayRef< int > Mask, int &SubVectorSize)
static unsigned isM1OrSmaller(MVT VT)
static cl::opt< unsigned > SLPMaxVF("riscv-v-slp-max-vf", cl::desc("Overrides result used for getMaximumVF query which is used " "exclusively by SLP vectorizer."), cl::Hidden)
static cl::opt< unsigned > RVVRegisterWidthLMUL("riscv-v-register-bit-width-lmul", cl::desc("The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), cl::init(2), cl::Hidden)
static InstructionCost getIntImmCostImpl(const DataLayout &DL, const RISCVSubtarget *ST, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, bool FreeZeroes)
static VectorType * getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, LLVMContext &C)
static const CostTblEntry VectorIntrinsicCostTable[]
static bool canUseShiftPair(Instruction *Inst, const APInt &Imm)
static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID)
This file defines a TargetTransformInfo::Concept conforming object specific to the RISC-V target mach...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
MVT getContainerForFixedLengthVector(MVT VT) const
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVII::VLMUL getLMUL(MVT VT)
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
const DataLayout & getDataLayout() const
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
bool isLoweredToCall(const Function *F) const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ None
The cast is not used with a load/store of any kind.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).