LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
23#include "llvm/IR/IntrinsicsAArch64.h"
30#include
31#include
32using namespace llvm;
34
35#define DEBUG_TYPE "aarch64tti"
36
39
41 "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden);
42
45
48
51
55
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
60
63 cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
64
67
70
71
74 cl::desc("The cost of a histcnt instruction"));
75
78 cl::desc("The number of instructions to search for a redundant dmb"));
79
82 cl::desc("Threshold for forced unrolling of small loops in AArch64"));
83
84namespace {
85class TailFoldingOption {
86
87
88
89
90
91
92
93
97
98
99
100 bool NeedsDefault = true;
101
102 void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }
103
104 void setNeedsDefault(bool V) { NeedsDefault = V; }
105
107 EnableBits |= Bit;
108 DisableBits &= ~Bit;
109 }
110
112 EnableBits &= ~Bit;
113 DisableBits |= Bit;
114 }
115
118
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
123 Bits |= EnableBits;
124 Bits &= ~DisableBits;
125
127 }
128
130 errs() << "invalid argument '" << Opt
131 << "' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
135 }
136
137public:
138
139 void operator=(const std::string &Val) {
140
141 if (Val.empty()) {
143 return;
144 }
145
146
147
148 setNeedsDefault(false);
149
151 StringRef(Val).split(TailFoldTypes, '+', -1, false);
152
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] == "disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] == "all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] == "default")
159 setNeedsDefault(true);
160 else if (TailFoldTypes[0] == "simple")
161 setInitialBits(TailFoldingOpts::Simple);
162 else {
163 StartIdx = 0;
164 setInitialBits(TailFoldingOpts::Disabled);
165 }
166
167 for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {
168 if (TailFoldTypes[I] == "reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[I] == "recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[I] == "reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[I] == "noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[I] == "norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[I] == "noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
180 else
182 }
183 }
184
187 }
188};
189}
190
192
194 "sve-tail-folding",
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
199 "tail-folding"
200 "\ndefault (Initial) Uses the default tail-folding settings for "
201 "the target CPU"
202 "\nall (Initial) All legal loop types will vectorize using "
203 "tail-folding"
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
209 "recurrences"
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
212 "predicates"
213 "\nnoreverse Inverse of above"),
215
216
217
218
220 "enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
221
222
223
224
226 "enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
227
234
235
236
237
242
243
244
245
249 return true;
250 }
251 }
252 return false;
253}
254
258 TTI->isMultiversionedFunction(F) ? "fmv-features" : "target-features";
259 StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.split(Features, ",");
261}
262
268
274
276 return F.hasFnAttribute("fmv-features");
277}
278
279const FeatureBitset AArch64TTIImpl::InlineInverseFeatures = {
280 AArch64::FeatureExecuteOnly,
281};
282
284 const Function *Callee) const {
286
287
288
289
292 return false;
293
294
295
299 }
300
302 return false;
303
308 return false;
309 }
310
311 const TargetMachine &TM = getTLI()->getTargetMachine();
316
317
318
319
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
322
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
324}
325
330 return false;
331
332
333
334
335
336
337
338
339
340 if (ST->useSVEForFixedLengthVectors() && llvm::any_of(Types, [](Type *Ty) {
341 auto FVTy = dyn_cast(Ty);
342 return FVTy &&
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
344 }))
345 return false;
346
347 return true;
348}
349
350unsigned
352 unsigned DefaultCallPenalty) const {
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
374 SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo());
375
377 if (F == Call.getCaller())
381 }
382
383 return DefaultCallPenalty;
384}
385
389
391 return true;
392
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
396}
397
398
399
400
402
404 return 0;
405
406 if (Val < 0)
407 Val = ~Val;
408
409
412 return Insn.size();
413}
414
415
419 assert(Ty->isIntegerTy());
420
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
422 if (BitSize == 0)
423 return ~0U;
424
425
426 APInt ImmVal = Imm;
427 if (BitSize & 0x3f)
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
429
430
431
433 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
437 }
438
439 return std::max(1, Cost);
440}
441
446 assert(Ty->isIntegerTy());
447
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
449
450
451 if (BitSize == 0)
453
454 unsigned ImmIdx = ~0U;
455 switch (Opcode) {
456 default:
458 case Instruction::GetElementPtr:
459
460 if (Idx == 0)
463 case Instruction::Store:
464 ImmIdx = 0;
465 break;
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
477 ImmIdx = 1;
478 break;
479
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
483 if (Idx == 1)
485 break;
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
497 break;
498 }
499
500 if (Idx == ImmIdx) {
501 int NumConstants = (BitSize + 63) / 64;
506 }
508}
509
514 assert(Ty->isIntegerTy());
515
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
517
518
519 if (BitSize == 0)
521
522
523
524
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
527
528 switch (IID) {
529 default:
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
537 if (Idx == 1) {
538 int NumConstants = (BitSize + 63) / 64;
543 }
544 break;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
548 break;
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
553 break;
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
557 break;
558 }
560}
561
565 if (TyWidth == 32 || TyWidth == 64)
567
569}
570
575
578
579
582
583
584 if (!ST->hasSVE2())
586
587 Type *BucketPtrsTy = ICA.getArgTypes()[0];
588 Type *EltTy = ICA.getArgTypes()[1];
589 unsigned TotalHistCnts = 1;
590
592
595
596
597
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
602
603
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
605
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
608
610 TotalHistCnts = EC / NaturalVectorWidth;
611
613 }
614
616}
617
621
622
623
624
629
630 switch (ICA.getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
633
635 return HistCost;
636 break;
637 }
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
645 MVT::nxv2i64};
647
648 if (LT.second == MVT::v2i64)
649 return LT.first * 2;
650 if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }))
651 return LT.first;
652 break;
653 }
654 case Intrinsic::sadd_sat:
655 case Intrinsic::ssub_sat:
656 case Intrinsic::uadd_sat:
657 case Intrinsic::usub_sat: {
658 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
659 MVT::v8i16, MVT::v2i32, MVT::v4i32,
660 MVT::v2i64};
662
663
664 unsigned Instrs =
665 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
666 if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; }))
667 return LT.first * Instrs;
668
671
672 if (ST->isSVEAvailable() && VectorSize >= 128 && isPowerOf2_64(VectorSize))
673 return LT.first * Instrs;
674
675 break;
676 }
677 case Intrinsic::abs: {
678 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
679 MVT::v8i16, MVT::v2i32, MVT::v4i32,
680 MVT::v2i64};
682 if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }))
683 return LT.first;
684 break;
685 }
686 case Intrinsic::bswap: {
687 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
688 MVT::v4i32, MVT::v2i64};
690 if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }) &&
691 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
692 return LT.first;
693 break;
694 }
695 case Intrinsic::fma:
696 case Intrinsic::fmuladd: {
697
698
701 (EltTy->isHalfTy() && ST->hasFullFP16()))
703 break;
704 }
705 case Intrinsic::stepvector: {
708
709
710 if (LT.first > 1) {
714 Cost += AddCost * (LT.first - 1);
715 }
717 }
718 case Intrinsic::vector_extract:
719 case Intrinsic::vector_insert: {
720
721
722
723
724
725
728 break;
729
731 EVT VecVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
732 bool IsExtract = ICA.getID() == Intrinsic::vector_extract;
733 EVT SubVecVT = IsExtract ? getTLI()->getValueType(DL, RetTy)
734 : getTLI()->getValueType(DL, ICA.getArgTypes()[1]);
735
736
738 break;
739
741 getTLI()->getTypeConversion(C, SubVecVT);
743 getTLI()->getTypeConversion(C, VecVT);
749 break;
750 }
751 case Intrinsic::bitreverse: {
752 static const CostTblEntry BitreverseTbl[] = {
753 {Intrinsic::bitreverse, MVT::i32, 1},
754 {Intrinsic::bitreverse, MVT::i64, 1},
755 {Intrinsic::bitreverse, MVT::v8i8, 1},
756 {Intrinsic::bitreverse, MVT::v16i8, 1},
757 {Intrinsic::bitreverse, MVT::v4i16, 2},
758 {Intrinsic::bitreverse, MVT::v8i16, 2},
759 {Intrinsic::bitreverse, MVT::v2i32, 2},
760 {Intrinsic::bitreverse, MVT::v4i32, 2},
761 {Intrinsic::bitreverse, MVT::v1i64, 2},
762 {Intrinsic::bitreverse, MVT::v2i64, 2},
763 };
765 const auto *Entry =
767 if (Entry) {
768
769
770 if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||
771 TLI->getValueType(DL, RetTy, true) == MVT::i16)
772 return LegalisationCost.first * Entry->Cost + 1;
773
774 return LegalisationCost.first * Entry->Cost;
775 }
776 break;
777 }
778 case Intrinsic::ctpop: {
779 if (!ST->hasNEON()) {
780
782 }
793 };
795 MVT MTy = LT.second;
797
798
800 RetTy->getScalarSizeInBits()
801 ? 1
802 : 0;
803 return LT.first * Entry->Cost + ExtraCost;
804 }
805 break;
806 }
807 case Intrinsic::sadd_with_overflow:
808 case Intrinsic::uadd_with_overflow:
809 case Intrinsic::ssub_with_overflow:
810 case Intrinsic::usub_with_overflow:
811 case Intrinsic::smul_with_overflow:
812 case Intrinsic::umul_with_overflow: {
813 static const CostTblEntry WithOverflowCostTbl[] = {
814 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
815 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
816 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
817 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
818 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
819 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
820 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
821 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
822 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
823 {Intrinsic::usub_with_overflow, MVT::i8, 3},
824 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
825 {Intrinsic::usub_with_overflow, MVT::i16, 3},
826 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
827 {Intrinsic::usub_with_overflow, MVT::i32, 1},
828 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
829 {Intrinsic::usub_with_overflow, MVT::i64, 1},
830 {Intrinsic::smul_with_overflow, MVT::i8, 5},
831 {Intrinsic::umul_with_overflow, MVT::i8, 4},
832 {Intrinsic::smul_with_overflow, MVT::i16, 5},
833 {Intrinsic::umul_with_overflow, MVT::i16, 4},
834 {Intrinsic::smul_with_overflow, MVT::i32, 2},
835 {Intrinsic::umul_with_overflow, MVT::i32, 2},
836 {Intrinsic::smul_with_overflow, MVT::i64, 3},
837 {Intrinsic::umul_with_overflow, MVT::i64, 3},
838 };
839 EVT MTy = TLI->getValueType(DL, RetTy->getContainedType(0), true);
843 return Entry->Cost;
844 break;
845 }
846 case Intrinsic::fptosi_sat:
847 case Intrinsic::fptoui_sat: {
849 break;
850 bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
852 EVT MTy = TLI->getValueType(DL, RetTy);
853
854
855 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
856 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
857 LT.second == MVT::v2f64)) {
859 (LT.second == MVT::f64 && MTy == MVT::i32) ||
860 (LT.second == MVT::f32 && MTy == MVT::i64)))
861 return LT.first;
862
863 if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() &&
866 }
867
868
869 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
872 RetTy,
876 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
877 (LT.second == MVT::f16 && MTy == MVT::i64) ||
878 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
880 return LT.first;
881
882 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
885
886
887 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
890
891
892 if ((LT.second.getScalarType() == MVT::f32 ||
893 LT.second.getScalarType() == MVT::f64 ||
894 LT.second.getScalarType() == MVT::f16) &&
896 Type *LegalTy =
897 Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());
898 if (LT.second.isVector())
899 LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());
902 LegalTy, {LegalTy, LegalTy});
905 LegalTy, {LegalTy, LegalTy});
907 return LT.first * Cost +
908 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
909 : 1);
910 }
911
912
914 RetTy = RetTy->getScalarType();
915 if (LT.second.isVector()) {
916 FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount());
917 RetTy = VectorType::get(RetTy, LT.second.getVectorElementCount());
918 }
924 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
926 if (IsSigned) {
932 }
933 return LT.first * Cost;
934 }
935 case Intrinsic::fshl:
936 case Intrinsic::fshr: {
938 break;
939
941
942
943
944
945 if (RetTy->isIntegerTy() && ICA.getArgs()[0] == ICA.getArgs()[1] &&
946 (RetTy->getPrimitiveSizeInBits() == 32 ||
947 RetTy->getPrimitiveSizeInBits() == 64)) {
949 (ICA.getID() == Intrinsic::fshl && !OpInfoZ.isConstant()) ? 1 : 0;
950 return 1 + NegCost;
951 }
952
953
955 break;
956
960 {Intrinsic::fshl, MVT::v4i32, 2},
961 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
962 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
963 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
964
965
966 const auto *Entry =
967 CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);
968 if (Entry)
969 return LegalisationCost.first * Entry->Cost;
970 }
971
973 if (!RetTy->isIntegerTy())
974 break;
975
976
977
978 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
979 RetTy->getScalarSizeInBits() < 64) ||
980 (RetTy->getScalarSizeInBits() % 64 != 0);
981 unsigned ExtraCost = HigherCost ? 1 : 0;
982 if (RetTy->getScalarSizeInBits() == 32 ||
983 RetTy->getScalarSizeInBits() == 64)
984 ExtraCost = 0;
985
986 else if (HigherCost)
987 ExtraCost = 1;
988 else
989 break;
990 return TyL.first + ExtraCost;
991 }
992 case Intrinsic::get_active_lane_mask: {
994 EVT RetVT = getTLI()->getValueType(DL, RetTy);
996 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
997 break;
998
999 if (RetTy->isScalableTy()) {
1000 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1002 break;
1003
1006
1007
1008
1009
1010 if (ST->hasSVE2p1() || ST->hasSME2()) {
1012 if (Cost == 1)
1013 return Cost;
1014 }
1015
1016
1017
1018
1025 return Cost + (SplitCost * (Cost - 1));
1026 } else if (!getTLI()->isTypeLegal(RetVT)) {
1027
1028
1029
1030
1031
1032
1033
1034
1035
1037 }
1038 break;
1039 }
1040 case Intrinsic::experimental_vector_match: {
1042 EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
1043 unsigned SearchSize = NeedleTy->getNumElements();
1044 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1045
1046
1047
1048
1049
1053 return Cost;
1054 }
1055 break;
1056 }
1057 case Intrinsic::experimental_cttz_elts: {
1058 EVT ArgVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
1059 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1060
1061
1062
1063 return 4;
1064 }
1065 break;
1066 }
1067 case Intrinsic::loop_dependence_raw_mask:
1068 case Intrinsic::loop_dependence_war_mask: {
1069
1070 if (ST->hasSVE2() || ST->hasSME()) {
1071 EVT VecVT = getTLI()->getValueType(DL, RetTy);
1072 unsigned EltSizeInBytes =
1074 if (is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) &&
1076 return 1;
1077 }
1078 break;
1079 }
1080 case Intrinsic::experimental_vector_extract_last_active:
1081 if (ST->isSVEorStreamingSVEAvailable()) {
1083
1084 return LegalCost;
1085 }
1086 break;
1087 default:
1088 break;
1089 }
1091}
1092
1093
1094
1098 auto RequiredType = II.getType();
1099
1101 assert(PN && "Expected Phi Node!");
1102
1103
1104 if (!PN->hasOneUse())
1105 return std::nullopt;
1106
1107 for (Value *IncValPhi : PN->incoming_values()) {
1109 if (!Reinterpret ||
1110 Reinterpret->getIntrinsicID() !=
1111 Intrinsic::aarch64_sve_convert_to_svbool ||
1112 RequiredType != Reinterpret->getArgOperand(0)->getType())
1113 return std::nullopt;
1114 }
1115
1116
1120
1121 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
1123 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
1124 Worklist.push_back(Reinterpret);
1125 }
1126
1127
1129}
1130
1131
1132
1134
1135
1136
1137
1138
1139
1147
1148
1155
1156
1162
1163
1169
1170
1171
1177
1178
1179
1186
1187
1188
1190
1191
1192
1193
1194
1196 return GoverningPredicateIdx != std::numeric_limits::max();
1197 }
1198
1201 return GoverningPredicateIdx;
1202 }
1203
1206 GoverningPredicateIdx = Index;
1207 return *this;
1208 }
1209
1210
1211
1212
1213
1214
1215
1216
1217
1221
1224 return UndefIntrinsic;
1225 }
1226
1229 UndefIntrinsic = IID;
1230 return *this;
1231 }
1232
1234
1237 return IROpcode;
1238 }
1239
1242 IROpcode = Opcode;
1243 return *this;
1244 }
1245
1246
1247
1248
1249
1251 return ResultLanes == InactiveLanesTakenFromOperand;
1252 }
1253
1256 return OperandIdxForInactiveLanes;
1257 }
1258
1260 assert(ResultLanes == Uninitialized && "Cannot set property twice!");
1261 ResultLanes = InactiveLanesTakenFromOperand;
1262 OperandIdxForInactiveLanes = Index;
1263 return *this;
1264 }
1265
1267 return ResultLanes == InactiveLanesAreNotDefined;
1268 }
1269
1271 assert(ResultLanes == Uninitialized && "Cannot set property twice!");
1272 ResultLanes = InactiveLanesAreNotDefined;
1273 return *this;
1274 }
1275
1277 return ResultLanes == InactiveLanesAreUnused;
1278 }
1279
1281 assert(ResultLanes == Uninitialized && "Cannot set property twice!");
1282 ResultLanes = InactiveLanesAreUnused;
1283 return *this;
1284 }
1285
1286
1287
1288
1290
1292 ResultIsZeroInitialized = true;
1293 return *this;
1294 }
1295
1296
1297
1298
1299
1300
1301
1303 return OperandIdxWithNoActiveLanes != std::numeric_limits::max();
1304 }
1305
1308 return OperandIdxWithNoActiveLanes;
1309 }
1310
1313 OperandIdxWithNoActiveLanes = Index;
1314 return *this;
1315 }
1316
1317private:
1318 unsigned GoverningPredicateIdx = std::numeric_limits::max();
1319
1321 unsigned IROpcode = 0;
1322
1323 enum PredicationStyle {
1325 InactiveLanesTakenFromOperand,
1326 InactiveLanesAreNotDefined,
1327 InactiveLanesAreUnused
1329
1330 bool ResultIsZeroInitialized = false;
1331 unsigned OperandIdxForInactiveLanes = std::numeric_limits::max();
1332 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits::max();
1333};
1334
1336
1337
1340 return !isa(V->getType());
1341 }))
1343
1345 switch (IID) {
1346 default:
1347 break;
1348 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1349 case Intrinsic::aarch64_sve_fcvt_f16f32:
1350 case Intrinsic::aarch64_sve_fcvt_f16f64:
1351 case Intrinsic::aarch64_sve_fcvt_f32f16:
1352 case Intrinsic::aarch64_sve_fcvt_f32f64:
1353 case Intrinsic::aarch64_sve_fcvt_f64f16:
1354 case Intrinsic::aarch64_sve_fcvt_f64f32:
1355 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1356 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1357 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1358 case Intrinsic::aarch64_sve_fcvtzs:
1359 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1360 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1361 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1362 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1363 case Intrinsic::aarch64_sve_fcvtzu:
1364 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1365 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1366 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1367 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1368 case Intrinsic::aarch64_sve_scvtf:
1369 case Intrinsic::aarch64_sve_scvtf_f16i32:
1370 case Intrinsic::aarch64_sve_scvtf_f16i64:
1371 case Intrinsic::aarch64_sve_scvtf_f32i64:
1372 case Intrinsic::aarch64_sve_scvtf_f64i32:
1373 case Intrinsic::aarch64_sve_ucvtf:
1374 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1375 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1376 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1377 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1379
1380 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1381 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1382 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1383 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1385
1386 case Intrinsic::aarch64_sve_fabd:
1388 case Intrinsic::aarch64_sve_fadd:
1391 case Intrinsic::aarch64_sve_fdiv:
1394 case Intrinsic::aarch64_sve_fmax:
1396 case Intrinsic::aarch64_sve_fmaxnm:
1398 case Intrinsic::aarch64_sve_fmin:
1400 case Intrinsic::aarch64_sve_fminnm:
1402 case Intrinsic::aarch64_sve_fmla:
1404 case Intrinsic::aarch64_sve_fmls:
1406 case Intrinsic::aarch64_sve_fmul:
1409 case Intrinsic::aarch64_sve_fmulx:
1411 case Intrinsic::aarch64_sve_fnmla:
1413 case Intrinsic::aarch64_sve_fnmls:
1415 case Intrinsic::aarch64_sve_fsub:
1418 case Intrinsic::aarch64_sve_add:
1421 case Intrinsic::aarch64_sve_mla:
1423 case Intrinsic::aarch64_sve_mls:
1425 case Intrinsic::aarch64_sve_mul:
1428 case Intrinsic::aarch64_sve_sabd:
1430 case Intrinsic::aarch64_sve_sdiv:
1433 case Intrinsic::aarch64_sve_smax:
1435 case Intrinsic::aarch64_sve_smin:
1437 case Intrinsic::aarch64_sve_smulh:
1439 case Intrinsic::aarch64_sve_sub:
1442 case Intrinsic::aarch64_sve_uabd:
1444 case Intrinsic::aarch64_sve_udiv:
1447 case Intrinsic::aarch64_sve_umax:
1449 case Intrinsic::aarch64_sve_umin:
1451 case Intrinsic::aarch64_sve_umulh:
1453 case Intrinsic::aarch64_sve_asr:
1456 case Intrinsic::aarch64_sve_lsl:
1459 case Intrinsic::aarch64_sve_lsr:
1462 case Intrinsic::aarch64_sve_and:
1465 case Intrinsic::aarch64_sve_bic:
1467 case Intrinsic::aarch64_sve_eor:
1470 case Intrinsic::aarch64_sve_orr:
1473 case Intrinsic::aarch64_sve_shsub:
1475 case Intrinsic::aarch64_sve_shsubr:
1477 case Intrinsic::aarch64_sve_sqrshl:
1479 case Intrinsic::aarch64_sve_sqshl:
1481 case Intrinsic::aarch64_sve_sqsub:
1483 case Intrinsic::aarch64_sve_srshl:
1485 case Intrinsic::aarch64_sve_uhsub:
1487 case Intrinsic::aarch64_sve_uhsubr:
1489 case Intrinsic::aarch64_sve_uqrshl:
1491 case Intrinsic::aarch64_sve_uqshl:
1493 case Intrinsic::aarch64_sve_uqsub:
1495 case Intrinsic::aarch64_sve_urshl:
1497
1498 case Intrinsic::aarch64_sve_add_u:
1500 Instruction::Add);
1501 case Intrinsic::aarch64_sve_and_u:
1503 Instruction::And);
1504 case Intrinsic::aarch64_sve_asr_u:
1506 Instruction::AShr);
1507 case Intrinsic::aarch64_sve_eor_u:
1509 Instruction::Xor);
1510 case Intrinsic::aarch64_sve_fadd_u:
1512 Instruction::FAdd);
1513 case Intrinsic::aarch64_sve_fdiv_u:
1515 Instruction::FDiv);
1516 case Intrinsic::aarch64_sve_fmul_u:
1518 Instruction::FMul);
1519 case Intrinsic::aarch64_sve_fsub_u:
1521 Instruction::FSub);
1522 case Intrinsic::aarch64_sve_lsl_u:
1524 Instruction::Shl);
1525 case Intrinsic::aarch64_sve_lsr_u:
1527 Instruction::LShr);
1528 case Intrinsic::aarch64_sve_mul_u:
1530 Instruction::Mul);
1531 case Intrinsic::aarch64_sve_orr_u:
1533 Instruction::Or);
1534 case Intrinsic::aarch64_sve_sdiv_u:
1536 Instruction::SDiv);
1537 case Intrinsic::aarch64_sve_sub_u:
1539 Instruction::Sub);
1540 case Intrinsic::aarch64_sve_udiv_u:
1542 Instruction::UDiv);
1543
1544 case Intrinsic::aarch64_sve_addqv:
1545 case Intrinsic::aarch64_sve_and_z:
1546 case Intrinsic::aarch64_sve_bic_z:
1547 case Intrinsic::aarch64_sve_brka_z:
1548 case Intrinsic::aarch64_sve_brkb_z:
1549 case Intrinsic::aarch64_sve_brkn_z:
1550 case Intrinsic::aarch64_sve_brkpa_z:
1551 case Intrinsic::aarch64_sve_brkpb_z:
1552 case Intrinsic::aarch64_sve_cntp:
1553 case Intrinsic::aarch64_sve_compact:
1554 case Intrinsic::aarch64_sve_eor_z:
1555 case Intrinsic::aarch64_sve_eorv:
1556 case Intrinsic::aarch64_sve_eorqv:
1557 case Intrinsic::aarch64_sve_nand_z:
1558 case Intrinsic::aarch64_sve_nor_z:
1559 case Intrinsic::aarch64_sve_orn_z:
1560 case Intrinsic::aarch64_sve_orr_z:
1561 case Intrinsic::aarch64_sve_orv:
1562 case Intrinsic::aarch64_sve_orqv:
1563 case Intrinsic::aarch64_sve_pnext:
1564 case Intrinsic::aarch64_sve_rdffr_z:
1565 case Intrinsic::aarch64_sve_saddv:
1566 case Intrinsic::aarch64_sve_uaddv:
1567 case Intrinsic::aarch64_sve_umaxv:
1568 case Intrinsic::aarch64_sve_umaxqv:
1569 case Intrinsic::aarch64_sve_cmpeq:
1570 case Intrinsic::aarch64_sve_cmpeq_wide:
1571 case Intrinsic::aarch64_sve_cmpge:
1572 case Intrinsic::aarch64_sve_cmpge_wide:
1573 case Intrinsic::aarch64_sve_cmpgt:
1574 case Intrinsic::aarch64_sve_cmpgt_wide:
1575 case Intrinsic::aarch64_sve_cmphi:
1576 case Intrinsic::aarch64_sve_cmphi_wide:
1577 case Intrinsic::aarch64_sve_cmphs:
1578 case Intrinsic::aarch64_sve_cmphs_wide:
1579 case Intrinsic::aarch64_sve_cmple_wide:
1580 case Intrinsic::aarch64_sve_cmplo_wide:
1581 case Intrinsic::aarch64_sve_cmpls_wide:
1582 case Intrinsic::aarch64_sve_cmplt_wide:
1583 case Intrinsic::aarch64_sve_cmpne:
1584 case Intrinsic::aarch64_sve_cmpne_wide:
1585 case Intrinsic::aarch64_sve_facge:
1586 case Intrinsic::aarch64_sve_facgt:
1587 case Intrinsic::aarch64_sve_fcmpeq:
1588 case Intrinsic::aarch64_sve_fcmpge:
1589 case Intrinsic::aarch64_sve_fcmpgt:
1590 case Intrinsic::aarch64_sve_fcmpne:
1591 case Intrinsic::aarch64_sve_fcmpuo:
1592 case Intrinsic::aarch64_sve_ld1:
1593 case Intrinsic::aarch64_sve_ld1_gather:
1594 case Intrinsic::aarch64_sve_ld1_gather_index:
1595 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1596 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1597 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1598 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1599 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1600 case Intrinsic::aarch64_sve_ld1q_gather_index:
1601 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1602 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1603 case Intrinsic::aarch64_sve_ld1ro:
1604 case Intrinsic::aarch64_sve_ld1rq:
1605 case Intrinsic::aarch64_sve_ld1udq:
1606 case Intrinsic::aarch64_sve_ld1uwq:
1607 case Intrinsic::aarch64_sve_ld2_sret:
1608 case Intrinsic::aarch64_sve_ld2q_sret:
1609 case Intrinsic::aarch64_sve_ld3_sret:
1610 case Intrinsic::aarch64_sve_ld3q_sret:
1611 case Intrinsic::aarch64_sve_ld4_sret:
1612 case Intrinsic::aarch64_sve_ld4q_sret:
1613 case Intrinsic::aarch64_sve_ldff1:
1614 case Intrinsic::aarch64_sve_ldff1_gather:
1615 case Intrinsic::aarch64_sve_ldff1_gather_index:
1616 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1617 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1618 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1619 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1620 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1621 case Intrinsic::aarch64_sve_ldnf1:
1622 case Intrinsic::aarch64_sve_ldnt1:
1623 case Intrinsic::aarch64_sve_ldnt1_gather:
1624 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1625 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1626 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1628
1629 case Intrinsic::aarch64_sve_prf:
1630 case Intrinsic::aarch64_sve_prfb_gather_index:
1631 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1632 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1633 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1634 case Intrinsic::aarch64_sve_prfd_gather_index:
1635 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1636 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1637 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1638 case Intrinsic::aarch64_sve_prfh_gather_index:
1639 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1640 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1641 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1642 case Intrinsic::aarch64_sve_prfw_gather_index:
1643 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1644 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1645 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1647
1648 case Intrinsic::aarch64_sve_st1_scatter:
1649 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1650 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1651 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1652 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1653 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1654 case Intrinsic::aarch64_sve_st1dq:
1655 case Intrinsic::aarch64_sve_st1q_scatter_index:
1656 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1657 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1658 case Intrinsic::aarch64_sve_st1wq:
1659 case Intrinsic::aarch64_sve_stnt1:
1660 case Intrinsic::aarch64_sve_stnt1_scatter:
1661 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1662 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1663 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1665 case Intrinsic::aarch64_sve_st2:
1666 case Intrinsic::aarch64_sve_st2q:
1668 case Intrinsic::aarch64_sve_st3:
1669 case Intrinsic::aarch64_sve_st3q:
1671 case Intrinsic::aarch64_sve_st4:
1672 case Intrinsic::aarch64_sve_st4q:
1674 }
1675
1677}
1678
1680 Value *UncastedPred;
1681
1682
1684 m_Value(UncastedPred)))) {
1686 Pred = UncastedPred;
1687
1689 m_Value(UncastedPred))))
1690
1691
1692 if (OrigPredTy->getMinNumElements() <=
1694 ->getMinNumElements())
1695 Pred = UncastedPred;
1696 }
1697
1699 return C && C->isAllOnesValue();
1700}
1701
1702
1703
1706 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1707 Dup->getOperand(1) == Pg && isa(Dup->getOperand(2)))
1711
1712 return V;
1713}
1714
1715static std::optional<Instruction *>
1720
1721 Value *Pg = II.getOperand(0);
1722 Value *Op1 = II.getOperand(1);
1723 Value *Op2 = II.getOperand(2);
1725
1726
1731 return &II;
1732 }
1733
1734
1737
1738 Value *SimpleII;
1740 SimpleII = simplifyBinOp(Opc, Op1, Op2, FII->getFastMathFlags(), DL);
1741 else
1743
1744
1745
1746
1747
1749 return std::nullopt;
1750
1753
1755
1756
1757 if (SimpleII == Inactive)
1759
1760
1763}
1764
1765
1766
1767static std::optional<Instruction *>
1771 return std::nullopt;
1772
1774
1775
1780
1784
1786 }
1787 }
1788
1789
1795 }
1796
1800 II.setCalledFunction(NewDecl);
1801 return &II;
1802 }
1803 }
1804
1805
1809
1810 return std::nullopt;
1811}
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822static std::optional<Instruction *>
1825 if (!BinOp)
1826 return std::nullopt;
1827
1828 auto IntrinsicID = BinOp->getIntrinsicID();
1829 switch (IntrinsicID) {
1830 case Intrinsic::aarch64_sve_and_z:
1831 case Intrinsic::aarch64_sve_bic_z:
1832 case Intrinsic::aarch64_sve_eor_z:
1833 case Intrinsic::aarch64_sve_nand_z:
1834 case Intrinsic::aarch64_sve_nor_z:
1835 case Intrinsic::aarch64_sve_orn_z:
1836 case Intrinsic::aarch64_sve_orr_z:
1837 break;
1838 default:
1839 return std::nullopt;
1840 }
1841
1842 auto BinOpPred = BinOp->getOperand(0);
1843 auto BinOpOp1 = BinOp->getOperand(1);
1844 auto BinOpOp2 = BinOp->getOperand(2);
1845
1847 if (!PredIntr ||
1848 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1849 return std::nullopt;
1850
1851 auto PredOp = PredIntr->getOperand(0);
1853 if (PredOpTy != II.getType())
1854 return std::nullopt;
1855
1858 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1859 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
1860 if (BinOpOp1 == BinOpOp2)
1861 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
1862 else
1864 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1865
1866 auto NarrowedBinOp =
1869}
1870
1871static std::optional<Instruction *>
1873
1876
1878 return BinOpCombine;
1879
1880
1883 return std::nullopt;
1884
1886 Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
1887
1889
1890
1891 while (Cursor) {
1892
1893
1894 const auto *CursorVTy = cast(Cursor->getType());
1895 if (CursorVTy->getElementCount().getKnownMinValue() <
1896 IVTy->getElementCount().getKnownMinValue())
1897 break;
1898
1899
1900 if (Cursor->getType() == IVTy)
1901 EarliestReplacement = Cursor;
1902
1904
1905
1906 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1907 Intrinsic::aarch64_sve_convert_to_svbool ||
1908 IntrinsicCursor->getIntrinsicID() ==
1909 Intrinsic::aarch64_sve_convert_from_svbool))
1910 break;
1911
1912 CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
1913 Cursor = IntrinsicCursor->getOperand(0);
1914 }
1915
1916
1917
1918 if (!EarliestReplacement)
1919 return std::nullopt;
1920
1922}
1923
1926
1927 auto *OpPredicate = II.getOperand(0);
1930
1934}
1935
1938 Value *Pg = II.getOperand(1);
1939
1940
1944 II.getArgOperand(2));
1946 }
1947
1950 return std::nullopt;
1951
1952
1954 II.getArgOperand(0), II.getArgOperand(2), uint64_t(0));
1956}
1957
1960
1963 II.getArgOperand(0));
1966}
1967
1971
1973 return std::nullopt;
1974
1975
1976 auto *SplatValue =
1978 if (!SplatValue || !SplatValue->isZero())
1979 return std::nullopt;
1980
1981
1983 if (!DupQLane ||
1984 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1985 return std::nullopt;
1986
1987
1989 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1990 return std::nullopt;
1991
1993 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1994 return std::nullopt;
1995
1996
1997
1999 return std::nullopt;
2000
2002 return std::nullopt;
2003
2005 if (!ConstVec)
2006 return std::nullopt;
2007
2010 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2011 return std::nullopt;
2012
2013 unsigned NumElts = VecTy->getNumElements();
2014 unsigned PredicateBits = 0;
2015
2016
2017 for (unsigned I = 0; I < NumElts; ++I) {
2019 if (!Arg)
2020 return std::nullopt;
2021 if (!Arg->isZero())
2022 PredicateBits |= 1 << (I * (16 / NumElts));
2023 }
2024
2025
2026 if (PredicateBits == 0) {
2028 PFalse->takeName(&II);
2030 }
2031
2032
2033 unsigned Mask = 8;
2034 for (unsigned I = 0; I < 16; ++I)
2035 if ((PredicateBits & (1 << I)) != 0)
2036 Mask |= (I % 8);
2037
2038 unsigned PredSize = Mask & -Mask;
2041
2042
2043 for (unsigned I = 0; I < 16; I += PredSize)
2044 if ((PredicateBits & (1 << I)) == 0)
2045 return std::nullopt;
2046
2047 auto *PTruePat =
2048 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
2050 {PredType}, {PTruePat});
2052 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2053 auto *ConvertFromSVBool =
2055 {II.getType()}, {ConvertToSVBool});
2056
2059}
2060
2063 Value *Pg = II.getArgOperand(0);
2064 Value *Vec = II.getArgOperand(1);
2065 auto IntrinsicID = II.getIntrinsicID();
2066 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2067
2068
2071
2072
2073
2078 auto OpC = OldBinOp->getOpcode();
2079 auto *NewLHS =
2081 auto *NewRHS =
2084 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), II.getIterator());
2086 }
2087 }
2088
2090 if (IsAfter && C && C->isNullValue()) {
2091
2094 Extract->insertBefore(II.getIterator());
2095 Extract->takeName(&II);
2097 }
2098
2100 if (!IntrPG)
2101 return std::nullopt;
2102
2103 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2104 return std::nullopt;
2105
2106 const auto PTruePattern =
2108
2109
2111 if (!MinNumElts)
2112 return std::nullopt;
2113
2114 unsigned Idx = MinNumElts - 1;
2115
2116
2117 if (IsAfter)
2118 ++Idx;
2119
2120
2121
2122
2124 if (Idx >= PgVTy->getMinNumElements())
2125 return std::nullopt;
2126
2127
2130 Extract->insertBefore(II.getIterator());
2131 Extract->takeName(&II);
2133}
2134
2137
2138
2139
2140
2141
2142
2143
2144 Value *Pg = II.getArgOperand(0);
2146 Value *Vec = II.getArgOperand(2);
2148
2149 if (!Ty->isIntegerTy())
2150 return std::nullopt;
2151
2154 default:
2155 return std::nullopt;
2156 case 16:
2158 break;
2159 case 32:
2161 break;
2162 case 64:
2164 break;
2165 }
2166
2172 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2175}
2176
2180
2181
2182 auto *AllPat =
2183 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
2185 {II.getType()}, {AllPat});
2186 auto *RDFFR =
2190}
2191
2192static std::optional<Instruction *>
2195
2196 if (Pattern == AArch64SVEPredPattern::all) {
2201 }
2202
2204
2205 return MinNumElts && NumElts >= MinNumElts
2207 II, ConstantInt::get(II.getType(), MinNumElts)))
2208 : std::nullopt;
2209}
2210
2211static std::optional<Instruction *>
2214 if (!ST->isStreaming())
2215 return std::nullopt;
2216
2217
2218
2223}
2224
2227 Value *PgVal = II.getArgOperand(0);
2228 Value *OpVal = II.getArgOperand(1);
2229
2230
2231
2232 if (PgVal == OpVal &&
2233 (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2234 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2235 Value *Ops[] = {PgVal, OpVal};
2237
2238 auto *PTest =
2241
2243 }
2244
2247
2248 if (!Pg || )
2249 return std::nullopt;
2250
2252
2253 if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2254 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2258
2260
2263 }
2264
2265
2266
2267
2268 if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2269 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2270 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2271 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2272 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2273 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2274 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2275 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2276 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2277 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2278 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2279 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2280 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2283
2286
2288 }
2289
2290 return std::nullopt;
2291}
2292
2293template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2294static std::optional<Instruction *>
2296 bool MergeIntoAddendOp) {
2297 Value *P = II.getOperand(0);
2298 Value *MulOp0, *MulOp1, *AddendOp, *Mul;
2299 if (MergeIntoAddendOp) {
2300 AddendOp = II.getOperand(1);
2302 } else {
2303 AddendOp = II.getOperand(2);
2305 }
2306
2309 return std::nullopt;
2310
2311 if (->hasOneUse())
2312 return std::nullopt;
2313
2315 if (II.getType()->isFPOrFPVectorTy()) {
2317
2318
2320 return std::nullopt;
2322 return std::nullopt;
2324 }
2325
2327 if (MergeIntoAddendOp)
2329 {P, AddendOp, MulOp0, MulOp1}, FMFSource);
2330 else
2332 {P, MulOp0, MulOp1, AddendOp}, FMFSource);
2333
2335}
2336
2337static std::optional<Instruction *>
2339 Value *Pred = II.getOperand(0);
2340 Value *PtrOp = II.getOperand(1);
2341 Type *VecTy = II.getType();
2342
2345 Load->copyMetadata(II);
2347 }
2348
2354}
2355
2356static std::optional<Instruction *>
2358 Value *VecOp = II.getOperand(0);
2359 Value *Pred = II.getOperand(1);
2360 Value *PtrOp = II.getOperand(2);
2361
2364 Store->copyMetadata(II);
2366 }
2367
2372}
2373
2376 case Intrinsic::aarch64_sve_fmul_u:
2377 return Instruction::BinaryOps::FMul;
2378 case Intrinsic::aarch64_sve_fadd_u:
2379 return Instruction::BinaryOps::FAdd;
2380 case Intrinsic::aarch64_sve_fsub_u:
2381 return Instruction::BinaryOps::FSub;
2382 default:
2383 return Instruction::BinaryOpsEnd;
2384 }
2385}
2386
2387static std::optional<Instruction *>
2389
2390 if (II.isStrictFP())
2391 return std::nullopt;
2392
2393 auto *OpPredicate = II.getOperand(0);
2395 if (BinOpCode == Instruction::BinaryOpsEnd ||
2397 return std::nullopt;
2399 BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
2401}
2402
2406 Intrinsic::aarch64_sve_mla>(
2407 IC, II, true))
2408 return MLA;
2410 Intrinsic::aarch64_sve_mad>(
2411 IC, II, false))
2412 return MAD;
2413 return std::nullopt;
2414}
2415
2416static std::optional<Instruction *>
2418 if (auto FMLA =
2420 Intrinsic::aarch64_sve_fmla>(IC, II,
2421 true))
2422 return FMLA;
2423 if (auto FMAD =
2425 Intrinsic::aarch64_sve_fmad>(IC, II,
2426 false))
2427 return FMAD;
2428 if (auto FMLA =
2430 Intrinsic::aarch64_sve_fmla>(IC, II,
2431 true))
2432 return FMLA;
2433 return std::nullopt;
2434}
2435
2436static std::optional<Instruction *>
2438 if (auto FMLA =
2440 Intrinsic::aarch64_sve_fmla>(IC, II,
2441 true))
2442 return FMLA;
2443 if (auto FMAD =
2445 Intrinsic::aarch64_sve_fmad>(IC, II,
2446 false))
2447 return FMAD;
2448 if (auto FMLA_U =
2450 Intrinsic::aarch64_sve_fmla_u>(
2451 IC, II, true))
2452 return FMLA_U;
2454}
2455
2456static std::optional<Instruction *>
2458 if (auto FMLS =
2460 Intrinsic::aarch64_sve_fmls>(IC, II,
2461 true))
2462 return FMLS;
2463 if (auto FMSB =
2465 Intrinsic::aarch64_sve_fnmsb>(
2466 IC, II, false))
2467 return FMSB;
2468 if (auto FMLS =
2470 Intrinsic::aarch64_sve_fmls>(IC, II,
2471 true))
2472 return FMLS;
2473 return std::nullopt;
2474}
2475
2476static std::optional<Instruction *>
2478 if (auto FMLS =
2480 Intrinsic::aarch64_sve_fmls>(IC, II,
2481 true))
2482 return FMLS;
2483 if (auto FMSB =
2485 Intrinsic::aarch64_sve_fnmsb>(
2486 IC, II, false))
2487 return FMSB;
2488 if (auto FMLS_U =
2490 Intrinsic::aarch64_sve_fmls_u>(
2491 IC, II, true))
2492 return FMLS_U;
2494}
2495
2499 Intrinsic::aarch64_sve_mls>(
2500 IC, II, true))
2501 return MLS;
2502 return std::nullopt;
2503}
2504
2507 Value *UnpackArg = II.getArgOperand(0);
2509 bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2510 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2511
2512
2513
2514 if (auto *ScalarArg = getSplatValue(UnpackArg)) {
2515 ScalarArg =
2521 }
2522
2523 return std::nullopt;
2524}
2527 auto *OpVal = II.getOperand(0);
2528 auto *OpIndices = II.getOperand(1);
2530
2531
2532
2534 if (!SplatValue ||
2535 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2536 return std::nullopt;
2537
2538
2539
2541 auto *VectorSplat =
2543
2546}
2547
2551 Type *RetTy = II.getType();
2552 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2553 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2554
2555
2556
2557 if ((match(II.getArgOperand(0),
2559 match(II.getArgOperand(1),
2564 if (TyA == B->getType() &&
2569 TyA->getMinNumElements());
2572 }
2573 }
2574
2575 return std::nullopt;
2576}
2577
2580
2581
2583 if (match(II.getArgOperand(0),
2588 II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
2589
2590 return std::nullopt;
2591}
2592
2593static std::optional<Instruction *>
2595 Value *Mask = II.getOperand(0);
2596 Value *BasePtr = II.getOperand(1);
2597 Value *Index = II.getOperand(2);
2600
2601
2602
2603
2604 Value *IndexBase;
2607 Align Alignment =
2608 BasePtr->getPointerAlignment(II.getDataLayout());
2609
2611 BasePtr, IndexBase);
2616 }
2617
2618 return std::nullopt;
2619}
2620
2621static std::optional<Instruction *>
2623 Value *Val = II.getOperand(0);
2624 Value *Mask = II.getOperand(1);
2625 Value *BasePtr = II.getOperand(2);
2626 Value *Index = II.getOperand(3);
2628
2629
2630
2631
2632 Value *IndexBase;
2635 Align Alignment =
2636 BasePtr->getPointerAlignment(II.getDataLayout());
2637
2639 BasePtr, IndexBase);
2641
2643 }
2644
2645 return std::nullopt;
2646}
2647
2651 Value *Pred = II.getOperand(0);
2652 Value *Vec = II.getOperand(1);
2653 Value *DivVec = II.getOperand(2);
2654
2657 if (!SplatConstantInt)
2658 return std::nullopt;
2659
2661 const int64_t DivisorValue = Divisor.getSExtValue();
2662 if (DivisorValue == -1)
2663 return std::nullopt;
2664 if (DivisorValue == 1)
2666
2670 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
2672 }
2677 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
2679 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2681 }
2682
2683 return std::nullopt;
2684}
2685
2687 size_t VecSize = Vec.size();
2688 if (VecSize == 1)
2689 return true;
2691 return false;
2692 size_t HalfVecSize = VecSize / 2;
2693
2694 for (auto LHS = Vec.begin(), RHS = Vec.begin() + HalfVecSize;
2696 if (*LHS != nullptr && *RHS != nullptr) {
2698 continue;
2699 else
2700 return false;
2701 }
2702 if (!AllowPoison)
2703 return false;
2704 if (*LHS == nullptr && *RHS != nullptr)
2706 }
2707
2708 Vec.resize(HalfVecSize);
2710 return true;
2711}
2712
2713
2714
2717 Value *CurrentInsertElt = nullptr, *Default = nullptr;
2718 if ((II.getOperand(0),
2722 return std::nullopt;
2724
2725
2729 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2730 CurrentInsertElt = InsertElt->getOperand(0);
2731 }
2732
2733 bool AllowPoison =
2736 return std::nullopt;
2737
2738
2740 for (size_t I = 0; I < Elts.size(); I++) {
2741 if (Elts[I] == nullptr)
2742 continue;
2745 }
2746 if (InsertEltChain == nullptr)
2747 return std::nullopt;
2748
2749
2750
2751
2752
2753 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.size();
2754 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2755 IIScalableTy->getMinNumElements() /
2756 PatternWidth;
2757
2760 auto *WideShuffleMaskTy =
2762
2766 auto WideBitcast =
2770 WideBitcast, PoisonValue::get(WideScalableTy), WideShuffleMask);
2771 auto NarrowBitcast =
2773
2775}
2776
2779 Value *A = II.getArgOperand(0);
2780 Value *B = II.getArgOperand(1);
2783
2784 return std::nullopt;
2785}
2786
2789 Value *Pred = II.getOperand(0);
2790 Value *Vec = II.getOperand(1);
2791 Value *Shift = II.getOperand(2);
2792
2793
2794 Value *AbsPred, *MergedValue;
2799
2800 return std::nullopt;
2801
2802
2803
2804
2805
2808 return std::nullopt;
2809
2810
2811
2813 return std::nullopt;
2814
2816 {II.getType()}, {Pred, Vec, Shift});
2817
2819}
2820
2823 Value *Vec = II.getOperand(0);
2824
2827
2828 return std::nullopt;
2829}
2830
2833
2834 auto *NI = II.getNextNode();
2837 return ->mayReadOrWriteMemory() &&
->mayHaveSideEffects();
2838 };
2839 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2840 auto *NIBB = NI->getParent();
2841 NI = NI->getNextNode();
2842 if (!NI) {
2843 if (auto *SuccBB = NIBB->getUniqueSuccessor())
2844 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2845 else
2846 break;
2847 }
2848 }
2850 if (NextII && II.isIdenticalTo(NextII))
2852
2853 return std::nullopt;
2854}
2855
2861 {II.getType(), II.getOperand(0)->getType()},
2862 {II.getOperand(0), II.getOperand(1)}));
2863}
2864
2869 return std::nullopt;
2870}
2871
2874 unsigned NumBits) {
2875 Value *Passthru = II.getOperand(0);
2876 Value *Pg = II.getOperand(1);
2878
2879
2883 auto *Mask = ConstantInt::get(Ty, MaskValue);
2885 {Pg, Op, Mask});
2887 }
2888
2889 return std::nullopt;
2890}
2891
2892static std::optional<Instruction *>
2894 SMEAttrs FnSMEAttrs(*II.getFunction());
2899 return std::nullopt;
2900}
2901
2902std::optional<Instruction *>
2907 return I;
2908
2910 switch (IID) {
2911 default:
2912 break;
2913 case Intrinsic::aarch64_dmb:
2915 case Intrinsic::aarch64_neon_fmaxnm:
2916 case Intrinsic::aarch64_neon_fminnm:
2918 case Intrinsic::aarch64_sve_convert_from_svbool:
2920 case Intrinsic::aarch64_sve_dup:
2922 case Intrinsic::aarch64_sve_dup_x:
2924 case Intrinsic::aarch64_sve_cmpne:
2925 case Intrinsic::aarch64_sve_cmpne_wide:
2927 case Intrinsic::aarch64_sve_rdffr:
2929 case Intrinsic::aarch64_sve_lasta:
2930 case Intrinsic::aarch64_sve_lastb:
2932 case Intrinsic::aarch64_sve_clasta_n:
2933 case Intrinsic::aarch64_sve_clastb_n:
2935 case Intrinsic::aarch64_sve_cntd:
2937 case Intrinsic::aarch64_sve_cntw:
2939 case Intrinsic::aarch64_sve_cnth:
2941 case Intrinsic::aarch64_sve_cntb:
2943 case Intrinsic::aarch64_sme_cntsd:
2945 case Intrinsic::aarch64_sve_ptest_any:
2946 case Intrinsic::aarch64_sve_ptest_first:
2947 case Intrinsic::aarch64_sve_ptest_last:
2949 case Intrinsic::aarch64_sve_fadd:
2951 case Intrinsic::aarch64_sve_fadd_u:
2953 case Intrinsic::aarch64_sve_fmul_u:
2955 case Intrinsic::aarch64_sve_fsub:
2957 case Intrinsic::aarch64_sve_fsub_u:
2959 case Intrinsic::aarch64_sve_add:
2961 case Intrinsic::aarch64_sve_add_u:
2963 Intrinsic::aarch64_sve_mla_u>(
2964 IC, II, true);
2965 case Intrinsic::aarch64_sve_sub:
2967 case Intrinsic::aarch64_sve_sub_u:
2969 Intrinsic::aarch64_sve_mls_u>(
2970 IC, II, true);
2971 case Intrinsic::aarch64_sve_tbl:
2973 case Intrinsic::aarch64_sve_uunpkhi:
2974 case Intrinsic::aarch64_sve_uunpklo:
2975 case Intrinsic::aarch64_sve_sunpkhi:
2976 case Intrinsic::aarch64_sve_sunpklo:
2978 case Intrinsic::aarch64_sve_uzp1:
2980 case Intrinsic::aarch64_sve_zip1:
2981 case Intrinsic::aarch64_sve_zip2:
2983 case Intrinsic::aarch64_sve_ld1_gather_index:
2985 case Intrinsic::aarch64_sve_st1_scatter_index:
2987 case Intrinsic::aarch64_sve_ld1:
2989 case Intrinsic::aarch64_sve_st1:
2991 case Intrinsic::aarch64_sve_sdiv:
2993 case Intrinsic::aarch64_sve_sel:
2995 case Intrinsic::aarch64_sve_srshl:
2997 case Intrinsic::aarch64_sve_dupq_lane:
2999 case Intrinsic::aarch64_sve_insr:
3001 case Intrinsic::aarch64_sve_whilelo:
3003 case Intrinsic::aarch64_sve_ptrue:
3005 case Intrinsic::aarch64_sve_uxtb:
3007 case Intrinsic::aarch64_sve_uxth:
3009 case Intrinsic::aarch64_sve_uxtw:
3011 case Intrinsic::aarch64_sme_in_streaming_mode:
3013 }
3014
3015 return std::nullopt;
3016}
3017
3022 SimplifyAndSetOp) const {
3023 switch (II.getIntrinsicID()) {
3024 default:
3025 break;
3026 case Intrinsic::aarch64_neon_fcvtxn:
3027 case Intrinsic::aarch64_neon_rshrn:
3028 case Intrinsic::aarch64_neon_sqrshrn:
3029 case Intrinsic::aarch64_neon_sqrshrun:
3030 case Intrinsic::aarch64_neon_sqshrn:
3031 case Intrinsic::aarch64_neon_sqshrun:
3032 case Intrinsic::aarch64_neon_sqxtn:
3033 case Intrinsic::aarch64_neon_sqxtun:
3034 case Intrinsic::aarch64_neon_uqrshrn:
3035 case Intrinsic::aarch64_neon_uqshrn:
3036 case Intrinsic::aarch64_neon_uqxtn:
3037 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
3038 break;
3039 }
3040
3041 return std::nullopt;
3042}
3043
3045 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3047}
3048
3051 switch (K) {
3055 if (ST->useSVEForFixedLengthVectors() &&
3058 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3059 else if (ST->isNeonAvailable())
3061 else
3064 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3067 else
3069 }
3071}
3072
3073bool AArch64TTIImpl::isSingleExtWideningInstruction(
3075 Type *SrcOverrideTy) const {
3076
3077
3081 };
3082
3083
3084
3085
3086
3087
3089 if ((DstTy) || Args.size() != 2 ||
3090 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3091 return false;
3092
3093 Type *SrcTy = SrcOverrideTy;
3094 switch (Opcode) {
3095 case Instruction::Add:
3096 case Instruction::Sub: {
3097
3099 if (!SrcTy)
3100 SrcTy =
3102 break;
3103 }
3104
3105 if (Opcode == Instruction::Sub)
3106 return false;
3107
3108
3110 if (!SrcTy)
3111 SrcTy =
3113 break;
3114 }
3115 return false;
3116 }
3117 default:
3118 return false;
3119 }
3120
3121
3122
3124 if (!DstTyL.second.isVector() || DstEltSize != DstTy->getScalarSizeInBits())
3125 return false;
3126
3127
3128
3129 assert(SrcTy && "Expected some SrcTy");
3131 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3132 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
3133 return false;
3134
3135
3137 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3139 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3140
3141
3142
3143 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3144}
3145
3146Type *AArch64TTIImpl::isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
3148 Type *SrcOverrideTy) const {
3149 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3150 Opcode != Instruction::Mul)
3151 return nullptr;
3152
3153
3154
3155
3156
3157
3160 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3161 return nullptr;
3162
3163 auto getScalarSizeWithOverride = [&](const Value *V) {
3164 if (SrcOverrideTy)
3167 ->getOperand(0)
3168 ->getType()
3169 ->getScalarSizeInBits();
3170 };
3171
3172 unsigned MaxEltSize = 0;
3175 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3176 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3177 MaxEltSize = std::max(EltSize0, EltSize1);
3180 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3181 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3182
3183
3184 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3185 return nullptr;
3186 MaxEltSize = DstEltSize / 2;
3187 } else if (Opcode == Instruction::Mul &&
3189
3190
3191
3192 KnownBits Known =
3197 return nullptr;
3198
3199 MaxEltSize =
3200 getScalarSizeWithOverride(isa(Args[0]) ? Args[0] : Args[1]);
3201 } else
3202 return nullptr;
3203
3204 if (MaxEltSize * 2 > DstEltSize)
3205 return nullptr;
3206
3209 return nullptr;
3210 return ExtTy;
3211}
3212
3213
3214
3215
3216
3217
3218
3220 Type *Src) const {
3221
3222 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(DL, Src)) ||
3223 (Src->isScalableTy() && !ST->hasSVE2()))
3224 return false;
3225
3226 if (ExtUser->getOpcode() != Instruction::Add || !ExtUser->hasOneUse())
3227 return false;
3228
3229
3231 auto *AddUser =
3233 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3234 Add = AddUser;
3235
3237 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3238 return false;
3239
3241 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3242 Src->getScalarSizeInBits() !=
3243 cast(Trunc)->getDestTy()->getScalarSizeInBits())
3244 return false;
3245
3246
3247
3251 return false;
3252
3253
3256 return true;
3257
3258 return false;
3259}
3260
3266 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3267 assert(ISD && "Invalid opcode");
3268
3269
3270 if (I && I->hasOneUser()) {
3273 if (Type *ExtTy = isBinExtWideningInstruction(
3274 SingleUser->getOpcode(), Dst, Operands,
3275 Src != I->getOperand(0)->getType() ? Src : nullptr)) {
3276
3277
3279 Type *DoubleSrcTy =
3283 }
3284
3285 return 0;
3286 }
3287
3288 if (isSingleExtWideningInstruction(
3289 SingleUser->getOpcode(), Dst, Operands,
3290 Src != I->getOperand(0)->getType() ? Src : nullptr)) {
3291
3292
3293
3294 if (SingleUser->getOpcode() == Instruction::Add) {
3295 if (I == SingleUser->getOperand(1) ||
3297 cast(SingleUser->getOperand(1))->getOpcode() == Opcode))
3298 return 0;
3299 } else {
3300
3301
3302 return 0;
3303 }
3304 }
3305
3306
3309 return 0;
3310 }
3311
3312
3315 return Cost == 0 ? 0 : 1;
3316 return Cost;
3317 };
3318
3319 EVT SrcTy = TLI->getValueType(DL, Src);
3320 EVT DstTy = TLI->getValueType(DL, Dst);
3321
3322 if (!SrcTy.isSimple() || !DstTy.isSimple())
3323 return AdjustCost(
3325
3326
3327
3328 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3332
3334 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1},
3335 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1},
3336 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1},
3337 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2},
3338 {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2},
3339 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3},
3340 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6},
3341 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1},
3342 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1},
3343 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3},
3344 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2},
3345 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5},
3346 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11},
3347 };
3348
3349 if (ST->hasBF16())
3351 BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
3352 return AdjustCost(Entry->Cost);
3353
3354
3355
3356
3357
3358
3359
3360 const unsigned int SVE_EXT_COST = 1;
3361 const unsigned int SVE_FCVT_COST = 1;
3362 const unsigned int SVE_UNPACK_ONCE = 4;
3363 const unsigned int SVE_UNPACK_TWICE = 16;
3364
3366 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1},
3367 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1},
3368 {ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1},
3369 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1},
3370 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 3},
3371 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1},
3372 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2},
3373 {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1},
3374 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1},
3375 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2},
3376 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 4},
3377 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1},
3378 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 3},
3379 {ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 2},
3380 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 1},
3381 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 3},
3382 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 7},
3383 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2},
3384 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i64, 6},
3385 {ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4},
3386
3387
3418 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},
3419 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},
3420 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},
3421
3422
3439
3440
3441 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3442 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3443 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3444
3445 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3446 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3447 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3448 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3449 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3450 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3451 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3452
3453 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3454 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3455 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3456 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3457 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3458 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3459 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3460
3462 {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1},
3463 {ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 2},
3464
3467 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, 1},
3468 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, 2},
3469 {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2},
3470 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3},
3471 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6},
3472
3473 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8},
3474 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9},
3481
3482
3489
3490
3492 SVE_EXT_COST + SVE_FCVT_COST},
3493 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
3494 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
3495 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
3497 SVE_EXT_COST + SVE_FCVT_COST},
3498 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
3499 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
3500 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
3501
3502
3504 SVE_EXT_COST + SVE_FCVT_COST},
3505 {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
3506 {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
3508 SVE_EXT_COST + SVE_FCVT_COST},
3509 {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
3510 {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
3511
3512
3514 SVE_EXT_COST + SVE_FCVT_COST},
3515 {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
3517 SVE_EXT_COST + SVE_FCVT_COST},
3518 {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
3519
3520
3522 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3524 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3525
3526
3531
3532
3534 SVE_EXT_COST + SVE_FCVT_COST},
3535 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3536 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3537 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3539 SVE_EXT_COST + SVE_FCVT_COST},
3540 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3541 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3542 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3543
3544
3549
3550
3552 SVE_EXT_COST + SVE_FCVT_COST},
3553 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3554 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3556 SVE_EXT_COST + SVE_FCVT_COST},
3557 {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3558 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3559
3560
3565
3566
3568 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3570 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3572 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3574 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3575
3576
3578 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3580 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3581
3582
3585
3586
3593
3594
3596 SVE_EXT_COST + SVE_FCVT_COST},
3597 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3598 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3599 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3601 SVE_EXT_COST + SVE_FCVT_COST},
3602 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3603 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3604 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3605
3606
3609
3610
3612 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3614 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3616 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3618 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3620 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3622 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3623
3624
3626 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3628 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3630 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3632 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3633
3634
3641
3642
3649
3650
3655
3656
3665
3666
3673
3674
3685
3686
3697
3698
3703
3704
3711
3712
3717
3718
3729
3730
3739
3740
3749
3750
3754
3755
3756 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},
3757 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},
3758 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},
3759
3760
3764
3765
3766 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},
3767 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},
3768 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},
3769
3770
3774
3775
3776 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3777 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3778 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3779
3780
3781 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3782 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3783 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3784
3785
3786 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3787 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3788 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3789
3790
3791 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3792 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3793 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3794
3795
3796 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3797 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3798 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3799
3800
3801 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3802 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3803 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3804
3805
3806 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3807 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3808 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3809
3810
3811
3812
3819
3826 };
3827
3828
3829
3830
3831 EVT WiderTy = SrcTy.bitsGT(DstTy) ? SrcTy : DstTy;
3834 ST->useSVEForFixedLengthVectors(WiderTy)) {
3835 std::pair<InstructionCost, MVT> LT =
3837 unsigned NumElements =
3839 return AdjustCost(
3840 LT.first *
3845 }
3846
3848 ConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
3849 return AdjustCost(Entry->Cost);
3850
3856 {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2},
3858 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2},
3862 {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4},
3864 {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3},
3866 {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2},
3868 {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8},
3870 {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},
3871 {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},
3872 {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},
3873 {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},
3874 };
3875
3876 if (ST->hasFullFP16())
3878 FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
3879 return AdjustCost(Entry->Cost);
3880
3881
3882
3886 return AdjustCost(
3888 getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(),
3894
3897 ST->isSVEorStreamingSVEAvailable() &&
3898 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3900 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3902
3903
3904
3905
3909 Opcode, LegalTy, Src, CCH, CostKind, I);
3912 return Part1 + Part2;
3913 }
3914
3915
3916
3919 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3921
3922 return AdjustCost(
3924}
3925
3930
3931
3932 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3933 "Invalid opcode");
3934
3935
3936
3938
3939
3941
3942
3943
3945 CostKind, Index, nullptr, nullptr);
3946
3947
3949 auto DstVT = TLI->getValueType(DL, Dst);
3950 auto SrcVT = TLI->getValueType(DL, Src);
3951
3952
3953
3954
3955 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3958
3959
3960
3961 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3964
3965 switch (Opcode) {
3966 default:
3968
3969
3970
3971 case Instruction::SExt:
3972 return Cost;
3973
3974
3975
3976 case Instruction::ZExt:
3977 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3978 return Cost;
3979 }
3980
3981
3984}
3985
3990 return Opcode == Instruction::PHI ? 0 : 1;
3992
3993 return 0;
3994}
3995
3996InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
3999 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
4001
4002 if (Index != -1U) {
4003
4005
4006
4007 if (!LT.second.isVector())
4008 return 0;
4009
4010
4011
4012 if (LT.second.isFixedLengthVector()) {
4013 unsigned Width = LT.second.getVectorNumElements();
4014 Index = Index % Width;
4015 }
4016
4017
4018
4019
4020
4022 return 0;
4023
4024
4025
4026
4027
4030 ? 0
4032
4033
4034
4037 ? 2
4039
4040
4041
4042
4043
4044
4045 }
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062 auto ExtractCanFuseWithFmul = [&]() {
4063
4064 if (Index == 0)
4065 return false;
4066
4067
4068
4069 auto IsAllowedScalarTy = [&](const Type *T) {
4070 return T->isFloatTy() || T->isDoubleTy() ||
4071 (T->isHalfTy() && ST->hasFullFP16());
4072 };
4073
4074
4075 auto IsUserFMulScalarTy = [](const Value *EEUser) {
4076
4078 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4079 !BO->getType()->isVectorTy();
4080 };
4081
4082
4083
4084 auto IsExtractLaneEquivalentToZero = [&](unsigned Idx, unsigned EltSz) {
4085 auto RegWidth =
4088 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4089 };
4090
4091
4092
4094 return false;
4095
4096 if (Scalar) {
4097 DenseMap<User *, unsigned> UserToExtractIdx;
4098 for (auto *U : Scalar->users()) {
4099 if (!IsUserFMulScalarTy(U))
4100 return false;
4101
4102
4103 UserToExtractIdx[U];
4104 }
4105 if (UserToExtractIdx.empty())
4106 return false;
4107 for (auto &[S, U, L] : ScalarUserAndIdx) {
4108 for (auto *U : S->users()) {
4109 if (UserToExtractIdx.contains(U)) {
4111 auto *Op0 = FMul->getOperand(0);
4112 auto *Op1 = FMul->getOperand(1);
4113 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4115 break;
4116 }
4117 }
4118 }
4119 }
4120 for (auto &[U, L] : UserToExtractIdx) {
4121 if (!IsExtractLaneEquivalentToZero(Index, Val->getScalarSizeInBits()) &&
4123 return false;
4124 }
4125 } else {
4127
4129 if (!IdxOp)
4130 return false;
4131
4132 return !EE->users().empty() && all_of(EE->users(), [&](const User *U) {
4133 if (!IsUserFMulScalarTy(U))
4134 return false;
4135
4136
4137
4138 const auto *BO = cast(U);
4139 const auto *OtherEE = dyn_cast(
4140 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4141 if (OtherEE) {
4142 const auto *IdxOp = dyn_cast(OtherEE->getIndexOperand());
4143 if (!IdxOp)
4144 return false;
4145 return IsExtractLaneEquivalentToZero(
4146 cast(OtherEE->getIndexOperand())
4147 ->getValue()
4148 .getZExtValue(),
4149 OtherEE->getType()->getScalarSizeInBits());
4150 }
4151 return true;
4152 });
4153 }
4154 return true;
4155 };
4156
4157 if (Opcode == Instruction::ExtractElement && (I || Scalar) &&
4158 ExtractCanFuseWithFmul())
4159 return 0;
4160
4161
4163 : ST->getVectorInsertExtractBaseCost();
4164}
4165
4168 unsigned Index,
4169 const Value *Op0,
4170 const Value *Op1) const {
4171
4172
4173
4174 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4176 return 0;
4177 return getVectorInstrCostHelper(Opcode, Val, CostKind, Index);
4178}
4179
4183 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
4184 return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, nullptr, Scalar,
4185 ScalarUserAndIdx);
4186}
4187
4191 unsigned Index) const {
4192 return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index, &I);
4193}
4194
4198 unsigned Index) const {
4201 Index);
4202
4203
4204
4205
4206
4207
4209 ? 2
4210 : ST->getVectorInsertExtractBaseCost() + 1;
4211}
4212
4214 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
4219 if (Ty->getElementType()->isFloatingPointTy())
4222 unsigned VecInstCost =
4224 return DemandedElts.popcount() * (Insert + Extract) * VecInstCost;
4225}
4226
4231 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4232 return std::nullopt;
4233 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4234 return std::nullopt;
4235 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4236 ST->isNonStreamingSVEorSME2Available())
4237 return std::nullopt;
4238
4244 Cost += InstCost(PromotedTy);
4245 if (IncludeTrunc)
4248 return Cost;
4249}
4250
4255
4256
4257
4258
4259
4263
4264
4267 Op2Info, Args, CxtI);
4268
4269
4271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4272
4273
4274
4278 Ty, CostKind, Op1Info, Op2Info, true,
4279
4281 [&](Type *PromotedTy) {
4283 Op1Info, Op2Info);
4284 }))
4285 return *PromotedCost;
4286
4287
4288
4289
4290
4291 if (Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4292 if (ExtTy != Ty)
4296 return LT.first;
4297 }
4298
4299 switch (ISD) {
4300 default:
4302 Op2Info);
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4356
4357
4358 auto VT = TLI->getValueType(DL, Ty);
4359 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4361
4362 return ISD == ISD::SDIV ? (3 * AddCost + AsrCost)
4363 : (3 * AsrCost + AddCost);
4364 } else {
4365 return MulCost + AsrCost + 2 * AddCost;
4366 }
4367 } else if (VT.isVector()) {
4370
4371
4372
4373
4374
4377 if (Ty->isScalableTy() && ST->hasSVE())
4378 Cost += 2 * AsrCost;
4379 else {
4381 UsraCost +
4383 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4384 : 2 * AddCost);
4385 }
4386 return Cost;
4387 } else if (LT.second == MVT::v2i64) {
4388 return VT.getVectorNumElements() *
4392 } else {
4393
4394
4395 if (Ty->isScalableTy() && ST->hasSVE())
4396 return MulCost + 2 * AddCost + 2 * AsrCost;
4397 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4398 }
4399 }
4400 }
4402 LT.second.isFixedLengthVector()) {
4403
4404
4405
4406
4407 auto ExtractCost = 2 * getVectorInstrCost(Instruction::ExtractElement, Ty,
4408 CostKind, -1, nullptr, nullptr);
4409 auto InsertCost = getVectorInstrCost(Instruction::InsertElement, Ty,
4410 CostKind, -1, nullptr, nullptr);
4412 return ExtractCost + InsertCost +
4416 }
4417 [[fallthrough]];
4420 auto VT = TLI->getValueType(DL, Ty);
4422
4431
4433
4434
4435
4436 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4437 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4438 LT.second == MVT::nxv16i8;
4439 bool Is128bit = LT.second.is128BitVector();
4440
4450 InstructionCost DivCost = MulCost * (Is128bit ? 2 : 1) +
4451 (HasMULH ? 0 : ShrCost) +
4452 AddCost * 2 + ShrCost;
4453 return DivCost + (ISD == ISD::UREM ? MulCost + AddCost : 0);
4454 }
4455 }
4456
4457
4458
4459
4460 if (!VT.isVector() && VT.getSizeInBits() > 64)
4462
4464 Opcode, Ty, CostKind, Op1Info, Op2Info);
4466 if (TLI->isOperationLegalOrCustom(ISD, LT.second) && ST->hasSVE()) {
4467
4468
4470 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4478
4480 if (nullptr != Entry)
4481 return Entry->Cost;
4482 }
4483
4484
4485 if (LT.second.getScalarType() == MVT::i8)
4487 else if (LT.second.getScalarType() == MVT::i16)
4489 return Cost;
4490 } else {
4491
4492
4493
4494
4499 Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info);
4500 return (4 + DivCost) * VTy->getNumElements();
4501 }
4502 }
4503
4504
4506 -1, nullptr, nullptr);
4508 nullptr, nullptr);
4509 }
4510
4511
4512
4514 }
4515 return Cost;
4516 }
4518
4519
4520 if (LT.second == MVT::v2i64 && ST->hasSVE())
4521 return LT.first;
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533 if (LT.second != MVT::v2i64)
4534 return LT.first;
4535 return cast(Ty)->getElementCount().getKnownMinValue() *
4538 nullptr, nullptr) *
4539 2 +
4541 nullptr, nullptr));
4549
4550
4551 return LT.first;
4552
4553 case ISD::FNEG:
4554
4555 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4556 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4557 CxtI &&
4561 return 0;
4562 [[fallthrough]];
4565 if (!Ty->getScalarType()->isFP128Ty())
4566 return LT.first;
4567 [[fallthrough]];
4570
4571
4572 if (!Ty->getScalarType()->isFP128Ty())
4573 return 2 * LT.first;
4574
4576 Op2Info);
4578
4579
4580 if (!Ty->isVectorTy())
4583 Op2Info);
4584 }
4585}
4586
4589 const SCEV *Ptr,
4591
4592
4593
4594
4596 int MaxMergeDistance = 64;
4597
4600 return NumVectorInstToHideOverhead;
4601
4602
4603
4604 return 1;
4605}
4606
4607
4608
4610 unsigned Opcode1, unsigned Opcode2) const {
4613 if (.hasInstrSchedModel())
4614 return false;
4615
4617 Sched.getSchedClassDesc(TII->get(Opcode1).getSchedClass());
4619 Sched.getSchedClassDesc(TII->get(Opcode2).getSchedClass());
4620
4621
4622
4623
4625 "Cannot handle variant scheduling classes without an MI");
4627 return false;
4628
4631}
4632
4637
4638
4640
4641 const int AmortizationCost = 20;
4642
4643
4644
4649 VecPred = CurrentPred;
4650 }
4651
4652
4657 static const auto ValidMinMaxTys = {
4658 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4659 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4660 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4661
4663 if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }) ||
4664 (ST->hasFullFP16() &&
4665 any_of(ValidFP16MinMaxTys, [<](MVT M) { return M == LT.second; })))
4666 return LT.first;
4667 }
4668
4670 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4671 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4672 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4673 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4674 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4675 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4676 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4677 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4678 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4679 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4680 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4681
4682 EVT SelCondTy = TLI->getValueType(DL, CondTy);
4683 EVT SelValTy = TLI->getValueType(DL, ValTy);
4688 return Entry->Cost;
4689 }
4690 }
4691
4692 if (Opcode == Instruction::FCmp) {
4694 ValTy, CostKind, Op1Info, Op2Info, false,
4695
4696 false, [&](Type *PromotedTy) {
4699 CostKind, Op1Info, Op2Info);
4702 Instruction::Trunc,
4706 return Cost;
4707 }))
4708 return *PromotedCost;
4709
4711
4712 if (LT.second.getScalarType() != MVT::f64 &&
4713 LT.second.getScalarType() != MVT::f32 &&
4714 LT.second.getScalarType() != MVT::f16)
4715 return LT.first * getCallInstrCost( nullptr, ValTy,
4717
4718
4719 unsigned Factor = 1;
4722 Factor = 2;
4726 Factor = 3;
4729 Factor = 3;
4730
4734 AArch64::FCMEQv4f32))
4735 Factor *= 2;
4736
4738 }
4739
4740
4741
4742
4743
4746 TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
4749 return 0;
4750
4751
4754 return 0;
4755
4756
4759 return 0;
4760 }
4761
4762
4763
4765 Op1Info, Op2Info, I);
4766}
4767
4771 if (ST->requiresStrictAlign()) {
4772
4773
4775 }
4776 Options.AllowOverlappingLoads = true;
4777 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4779
4780
4781
4782 Options.LoadSizes = {8, 4, 2, 1};
4783 Options.AllowedTailExpansions = {3, 5, 6};
4785}
4786
4788 return ST->hasSVE();
4789}
4790
4794 switch (MICA.getID()) {
4795 case Intrinsic::masked_scatter:
4796 case Intrinsic::masked_gather:
4798 case Intrinsic::masked_load:
4799 case Intrinsic::masked_store:
4801 }
4803}
4804
4809
4813 if (!LT.first.isValid())
4815
4816
4818 if (VT->getElementType()->isIntegerTy(1))
4820
4821
4822
4823
4824
4827
4828 return LT.first;
4829}
4830
4831
4832
4835 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4836 "Should be called on only load or stores.");
4837 switch (Opcode) {
4838 case Instruction::Load:
4841 return ST->getGatherOverhead();
4842 break;
4843 case Instruction::Store:
4846 return ST->getScatterOverhead();
4847 break;
4848 default:
4850 }
4851}
4852
4856
4857 unsigned Opcode = (MICA.getID() == Intrinsic::masked_gather ||
4858 MICA.getID() == Intrinsic::vp_gather)
4859 ? Instruction::Load
4860 : Instruction::Store;
4861
4865
4870 if (!LT.first.isValid())
4872
4873
4874 if (!LT.second.isVector() ||
4876 VT->getElementType()->isIntegerTy(1))
4878
4879
4880
4881
4882
4885
4886 ElementCount LegalVF = LT.second.getVectorElementCount();
4889 {TTI::OK_AnyValue, TTI::OP_None}, I);
4890
4893}
4894
4898
4900 Align Alignment,
4905 EVT VT = TLI->getValueType(DL, Ty, true);
4906
4907 if (VT == MVT::Other)
4910
4912 if (!LT.first.isValid())
4914
4915
4916
4917
4918
4919
4922 (VTy->getElementType()->isIntegerTy(1) &&
4923 !VTy->getElementCount().isKnownMultipleOf(
4926
4927
4929 return LT.first;
4930
4932 return 1;
4933
4934 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4935 LT.second.is128BitVector() && Alignment < Align(16)) {
4936
4937
4938
4939
4940
4941 const int AmortizationCost = 6;
4942
4943 return LT.first * 2 * AmortizationCost;
4944 }
4945
4946
4947 if (Ty->isPtrOrPtrVectorTy())
4948 return LT.first;
4949
4951
4952 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4953
4954 if (VT == MVT::v4i8)
4955 return 2;
4956
4958 }
4961 if ((EltSize) || EltSize < 8 || EltSize > 64 ||
4963 return LT.first;
4964
4965
4967 return LT.first;
4968
4969
4970
4971
4976 while (!TypeWorklist.empty()) {
4981 continue;
4982 }
4983
4984 unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;
4988 }
4989 return Cost;
4990 }
4991
4992 return LT.first;
4993}
4994
4998 bool UseMaskForCond, bool UseMaskForGaps) const {
4999 assert(Factor >= 2 && "Invalid interleave factor");
5001
5004
5005
5006
5007
5008
5011
5012
5013
5014 if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5016
5017 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5018 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5019 auto *SubVecTy =
5021 VecVTy->getElementCount().divideCoefficientBy(Factor));
5022
5023
5024
5025
5026 bool UseScalable;
5027 if (MinElts % Factor == 0 &&
5028 TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
5029 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
5030 }
5031
5034 UseMaskForCond, UseMaskForGaps);
5035}
5036
5041 for (auto *I : Tys) {
5042 if (->isVectorTy())
5043 continue;
5045 128)
5048 }
5049 return Cost;
5050}
5051
5053 return ST->getMaxInterleaveFactor();
5054}
5055
5056
5057
5058
5059
5060static void
5063 enum { MaxStridedLoads = 7 };
5065 int StridedLoads = 0;
5066
5067
5068 for (const auto BB : L->blocks()) {
5069 for (auto &I : *BB) {
5071 if (!LMemI)
5072 continue;
5073
5075 if (L->isLoopInvariant(PtrValue))
5076 continue;
5077
5078 const SCEV *LSCEV = SE.getSCEV(PtrValue);
5080 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
5081 continue;
5082
5083
5084
5085
5086 ++StridedLoads;
5087
5088
5089 if (StridedLoads > MaxStridedLoads / 2)
5090 return StridedLoads;
5091 }
5092 }
5093 return StridedLoads;
5094 };
5095
5096 int StridedLoads = countStridedLoads(L, SE);
5097 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
5098 << " strided loads\n");
5099
5100
5101 if (StridedLoads) {
5102 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
5103 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
5105 }
5106}
5107
5108
5109
5110
5111
5114 unsigned *FinalSize) {
5115
5117
5118 for (auto *BB : L->getBlocks()) {
5119 for (auto &I : *BB) {
5123
5124
5125 if (!Cost.isValid())
5126 return false;
5127
5128 LoopCost += Cost;
5129 if (LoopCost > Budget)
5130 return false;
5131 }
5132 }
5133
5134 if (FinalSize)
5135 *FinalSize = LoopCost.getValue();
5136 return true;
5137}
5138
5141
5142
5143
5146 return false;
5147
5148
5149
5151 if (MaxTC > 0 && MaxTC <= 32)
5152 return false;
5153
5154
5156 return false;
5157
5158
5159
5160
5162 if (Blocks.size() != 2)
5163 return false;
5164
5167 }))
5168 return false;
5169
5170 return true;
5171}
5172
5173
5174
5175static void
5179
5180
5181
5182
5183
5184 if (!L->isInnermost() || L->getNumBlocks() > 8)
5185 return;
5186
5187
5188 if (!L->getExitBlock())
5189 return;
5190
5191
5192
5193
5194 bool HasParellelizableReductions =
5195 L->getNumBlocks() == 1 &&
5196 any_of(L->getHeader()->phis(),
5197 [&SE, L](PHINode &Phi) {
5198 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5199 }) &&
5201 if (HasParellelizableReductions &&
5206 }
5207
5212 return;
5213
5215 return;
5216
5218 return;
5219
5220
5222
5223 if (HasParellelizableReductions) {
5227 }
5228
5229
5230
5231
5232 BasicBlock *Header = L->getHeader();
5233 BasicBlock *Latch = L->getLoopLatch();
5234 if (Header == Latch) {
5235
5236 unsigned Size;
5237 unsigned Width = 10;
5239 return;
5240
5241
5242
5243 unsigned MaxInstsPerLine = 16;
5244 unsigned UC = 1;
5245 unsigned BestUC = 1;
5246 unsigned SizeWithBestUC = BestUC * Size;
5247 while (UC <= 8) {
5248 unsigned SizeWithUC = UC * Size;
5249 if (SizeWithUC > 48)
5250 break;
5251 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5252 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5253 BestUC = UC;
5254 SizeWithBestUC = BestUC * Size;
5255 }
5256 UC++;
5257 }
5258
5259 if (BestUC == 1)
5260 return;
5261
5264 for (auto *BB : L->blocks()) {
5265 for (auto &I : *BB) {
5267 if (!Ptr)
5268 continue;
5271 continue;
5273 LoadedValuesPlus.insert(&I);
5274
5275 for (auto *U : I.users())
5277 LoadedValuesPlus.insert(U);
5278 } else
5280 }
5281 }
5282
5284 return LoadedValuesPlus.contains(SI->getOperand(0));
5285 }))
5286 return;
5287
5290 return;
5291 }
5292
5293
5294
5297 if (!Term || !Term->isConditional() || Preds.size() == 1 ||
5299 none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))
5300 return;
5301
5302 std::function<bool(Instruction *, unsigned)> DependsOnLoopLoad =
5305 return false;
5306
5308 return true;
5309
5310 return any_of(I->operands(), [&](Value *V) {
5311 auto *I = dyn_cast(V);
5312 return I && DependsOnLoopLoad(I, Depth + 1);
5313 });
5314 };
5319 DependsOnLoopLoad(I, 0)) {
5321 }
5322}
5323
5327
5329
5331
5332
5333
5334
5335 if (L->getLoopDepth() > 1)
5337
5338
5340
5341
5342
5343
5346 for (auto *BB : L->getBlocks()) {
5347 for (auto &I : *BB) {
5348
5349
5350
5351 if (IsVectorized && I.getType()->isVectorTy())
5352 return;
5357 continue;
5358 return;
5359 }
5360
5364 }
5365 }
5366
5367
5368 if (ST->isAppleMLike())
5370 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5373
5374
5375
5379
5381
5382
5384 return;
5385 }
5386
5387
5388
5389
5390
5392 !ST->getSchedModel().isOutOfOrder()) {
5397
5400 }
5401
5402
5403
5405 UP.Force = true;
5406}
5407
5412
5414 Type *ExpectedType,
5415 bool CanCreate) const {
5417 default:
5418 return nullptr;
5419 case Intrinsic::aarch64_neon_st2:
5420 case Intrinsic::aarch64_neon_st3:
5421 case Intrinsic::aarch64_neon_st4: {
5422
5424 if (!CanCreate || !ST)
5425 return nullptr;
5426 unsigned NumElts = Inst->arg_size() - 1;
5427 if (ST->getNumElements() != NumElts)
5428 return nullptr;
5429 for (unsigned i = 0, e = NumElts; i != e; ++i) {
5431 return nullptr;
5432 }
5435 for (unsigned i = 0, e = NumElts; i != e; ++i) {
5437 Res = Builder.CreateInsertValue(Res, L, i);
5438 }
5439 return Res;
5440 }
5441 case Intrinsic::aarch64_neon_ld2:
5442 case Intrinsic::aarch64_neon_ld3:
5443 case Intrinsic::aarch64_neon_ld4:
5444 if (Inst->getType() == ExpectedType)
5445 return Inst;
5446 return nullptr;
5447 }
5448}
5449
5453 default:
5454 break;
5455 case Intrinsic::aarch64_neon_ld2:
5456 case Intrinsic::aarch64_neon_ld3:
5457 case Intrinsic::aarch64_neon_ld4:
5458 Info.ReadMem = true;
5459 Info.WriteMem = false;
5461 break;
5462 case Intrinsic::aarch64_neon_st2:
5463 case Intrinsic::aarch64_neon_st3:
5464 case Intrinsic::aarch64_neon_st4:
5465 Info.ReadMem = false;
5466 Info.WriteMem = true;
5468 break;
5469 }
5470
5472 default:
5473 return false;
5474 case Intrinsic::aarch64_neon_ld2:
5475 case Intrinsic::aarch64_neon_st2:
5476 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5477 break;
5478 case Intrinsic::aarch64_neon_ld3:
5479 case Intrinsic::aarch64_neon_st3:
5480 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5481 break;
5482 case Intrinsic::aarch64_neon_ld4:
5483 case Intrinsic::aarch64_neon_st4:
5484 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5485 break;
5486 }
5487 return true;
5488}
5489
5490
5491
5492
5493
5494
5496 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
5497 bool Considerable = false;
5498 AllowPromotionWithoutCommonHeader = false;
5500 return false;
5501 Type *ConsideredSExtType =
5503 if (I.getType() != ConsideredSExtType)
5504 return false;
5505
5506
5507 for (const User *U : I.users()) {
5509 Considerable = true;
5510
5511
5512
5513 if (GEPInst->getNumOperands() > 2) {
5514 AllowPromotionWithoutCommonHeader = true;
5515 break;
5516 }
5517 }
5518 }
5519 return Considerable;
5520}
5521
5525 return true;
5526
5529 return false;
5530
5547 return true;
5548 default:
5549 return false;
5550 }
5551}
5552
5557
5558
5559
5560
5564
5566
5567 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5569
5571 if (LT.first > 1) {
5575 }
5576
5577 return LegalizationCost + 2;
5578}
5579
5584 if (LT.first > 1) {
5587 LegalizationCost *= LT.first - 1;
5588 }
5589
5590 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5591 assert(ISD && "Invalid opcode");
5592
5593 switch (ISD) {
5599 return LegalizationCost + 2;
5600 default:
5602 }
5603}
5604
5607 std::optional FMF,
5609
5610
5611
5612
5616
5621
5622
5623 return BaseCost + FixedVTy->getNumElements();
5624 }
5625
5626 if (Opcode != Instruction::FAdd)
5628
5633 return Cost;
5634 }
5635
5638
5640 MVT MTy = LT.second;
5641 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5642 assert(ISD && "Invalid opcode");
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652 static const CostTblEntry CostTblNoPairwise[]{
5660 {ISD::OR, MVT::v8i8, 5},
5661 {ISD::OR, MVT::v16i8, 7},
5662 {ISD::OR, MVT::v4i16, 4},
5663 {ISD::OR, MVT::v8i16, 6},
5664 {ISD::OR, MVT::v2i32, 3},
5665 {ISD::OR, MVT::v4i32, 5},
5666 {ISD::OR, MVT::v2i64, 3},
5667 {ISD::XOR, MVT::v8i8, 5},
5674 {ISD::AND, MVT::v8i8, 5},
5681 };
5682 switch (ISD) {
5683 default:
5684 break;
5687
5688
5689 MTy.isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5690 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5692 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5694
5695
5696
5697
5698
5699
5700
5701
5702 return (LT.first - 1) + Log2_32(NElts);
5703 }
5704 break;
5706 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
5707 return (LT.first - 1) + Entry->Cost;
5708 break;
5713 if (!Entry)
5714 break;
5719 if (LT.first != 1) {
5720
5721
5725 ExtraCost *= LT.first - 1;
5726 }
5727
5728 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5729 return Cost + ExtraCost;
5730 }
5731 break;
5732 }
5734}
5735
5737 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *VecTy,
5739 EVT VecVT = TLI->getValueType(DL, VecTy);
5740 EVT ResVT = TLI->getValueType(DL, ResTy);
5741
5742 if (Opcode == Instruction::Add && VecVT.isSimple() && ResVT.isSimple() &&
5745
5746
5747
5748
5750 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5751 RevVTSize <= 32) ||
5752 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5753 RevVTSize <= 32) ||
5754 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5755 RevVTSize <= 64))
5756 return (LT.first - 1) * 2 + 2;
5757 }
5758
5761}
5762
5767 EVT VecVT = TLI->getValueType(DL, VecTy);
5768 EVT ResVT = TLI->getValueType(DL, ResTy);
5769
5770 if (ST->hasDotProd() && VecVT.isSimple() && ResVT.isSimple() &&
5771 RedOpcode == Instruction::Add) {
5773
5774
5775
5776
5777 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5778 ResVT == MVT::i32)
5779 return LT.first + 2;
5780 }
5781
5784}
5785
5803 };
5804
5805
5806
5807
5808
5811
5814 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5815 ? TLI->getPromotedVTForPredicate(EVT(LT.second))
5816 : LT.second;
5819 if (Index < 0) {
5820 LegalizationCost =
5825 }
5826
5827
5828
5829 if (LT.second.getScalarType() == MVT::i1) {
5830 LegalizationCost +=
5835 }
5836 const auto *Entry =
5838 assert(Entry && "Illegal Type for Splice");
5839 LegalizationCost += Entry->Cost;
5840 return LegalizationCost * LT.first;
5841}
5842
5844 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
5849
5852
5853 if (VF.isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5854 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5856
5857 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5860
5862 (!BinOp || (OpBExtend != TTI::PR_None && InputTypeB)) &&
5863 "Unexpected values for OpBExtend or InputTypeB");
5864
5865
5866
5867 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5869
5870 bool IsUSDot = OpBExtend != TTI::PR_None && OpAExtend != OpBExtend;
5871 if (IsUSDot && !ST->hasMatMulInt8())
5873
5874 unsigned Ratio =
5878
5882
5883 auto TC = TLI->getTypeConversion(AccumVectorType->getContext(),
5885 switch (TC.first) {
5886 default:
5891
5892 if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) !=
5895 break;
5896 }
5897
5898 std::pair<InstructionCost, MVT> AccumLT =
5900 std::pair<InstructionCost, MVT> InputLT =
5902
5904
5905
5908
5909
5910
5912
5913 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5914
5915 if (AccumLT.second.getScalarType() == MVT::i64 &&
5916 InputLT.second.getScalarType() == MVT::i16)
5917 return Cost;
5918
5919 if (AccumLT.second.getScalarType() == MVT::i64 &&
5920 InputLT.second.getScalarType() == MVT::i8)
5921
5922
5923
5924
5925
5926 return Cost;
5927 }
5928
5929
5930 if (ST->isSVEorStreamingSVEAvailable() ||
5931 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5932 ST->hasDotProd())) {
5933 if (AccumLT.second.getScalarType() == MVT::i32 &&
5934 InputLT.second.getScalarType() == MVT::i8)
5935 return Cost;
5936 }
5937
5938
5939 return Cost + 2;
5940}
5941
5950 "Expected the Mask to match the return size if given");
5952 "Expected the same scalar types");
5954
5955
5956
5958 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5959 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5960 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5961
5962
5963
5964
5965 if (Args.size() >= 1 && isa(Args[0]) &&
5968 return std::max(1, LT.first / 4);
5969
5970
5971
5972
5973
5976 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5978 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5979 return LT.first;
5980
5981 unsigned TpNumElts = Mask.size();
5982 unsigned LTNumElts = LT.second.getVectorNumElements();
5983 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5985 LT.second.getVectorElementCount());
5987 std::map<std::tuple<unsigned, unsigned, SmallVector>, InstructionCost>
5988 PreviousCosts;
5989 for (unsigned N = 0; N < NumVecs; N++) {
5991
5992
5993 unsigned Source1 = -1U, Source2 = -1U;
5994 unsigned NumSources = 0;
5995 for (unsigned E = 0; E < LTNumElts; E++) {
5996 int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]
5998 if (MaskElt < 0) {
6000 continue;
6001 }
6002
6003
6004
6005 unsigned Source = MaskElt / LTNumElts;
6006 if (NumSources == 0) {
6007 Source1 = Source;
6008 NumSources = 1;
6009 } else if (NumSources == 1 && Source != Source1) {
6010 Source2 = Source;
6011 NumSources = 2;
6012 } else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6013 NumSources++;
6014 }
6015
6016
6017
6018 if (Source == Source1)
6019 NMask.push_back(MaskElt % LTNumElts);
6020 else if (Source == Source2)
6021 NMask.push_back(MaskElt % LTNumElts + LTNumElts);
6022 else
6023 NMask.push_back(MaskElt % LTNumElts);
6024 }
6025
6026
6027
6028
6029 auto Result =
6030 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6031
6032 if (!Result.second)
6033 continue;
6034
6035
6036
6038 NumSources <= 2
6041 NTp, NTp, NMask, CostKind, 0, nullptr, Args,
6042 CxtI)
6043 : LTNumElts;
6044 Result.first->second = NCost;
6045 Cost += NCost;
6046 }
6047 return Cost;
6048 }
6049
6052
6053
6054
6055
6056
6057
6058 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6059 if (LT.second.getFixedSizeInBits() >= 128 &&
6061 LT.second.getVectorNumElements() / 2) {
6062 if (Index == 0)
6063 return 0;
6064 if (Index == (int)LT.second.getVectorNumElements() / 2)
6065 return 1;
6066 }
6068 }
6069
6070
6073 SrcTy = DstTy;
6074 }
6075
6076
6077
6078 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6081 return M.value() < 0 || M.value() == (int)M.index();
6082 }))
6083 return 0;
6084
6085
6087 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6088 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6090
6092 unsigned Segments =
6094 unsigned SegmentElts = VTy->getNumElements() / Segments;
6095
6096
6097 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6098 ST->isSVEorStreamingSVEAvailable() &&
6099 isDUPQMask(Mask, Segments, SegmentElts))
6100 return LT.first;
6101
6102
6103 if (ST->isSVEorStreamingSVEAvailable() &&
6105 return LT.first;
6106 }
6107
6108
6109
6110
6111
6112
6113
6114
6116 bool IsLoad = !Args.empty() && isa(Args[0]);
6117 if (IsLoad && LT.second.isVector() &&
6119 LT.second.getVectorElementCount()))
6120 return 0;
6121 }
6122
6123
6124
6125 if (Mask.size() == 4 &&
6127 (SrcTy->getScalarSizeInBits() == 16 ||
6128 SrcTy->getScalarSizeInBits() == 32) &&
6129 all_of(Mask, [](int E) { return E < 8; }))
6131
6132
6133
6134 unsigned Unused;
6135 if (LT.second.isFixedLengthVector() &&
6136 LT.second.getVectorNumElements() == Mask.size() &&
6138 (isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6139 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6140 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6141 LT.second.getVectorNumElements(), 16) ||
6142 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6143 LT.second.getVectorNumElements(), 32) ||
6144 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6145 LT.second.getVectorNumElements(), 64) ||
6146
6148 [&Mask](int M) { return M < 0 || M == Mask[0]; })))
6149 return 1;
6150
6155
6170
6171
6186
6187
6189 {TTI::SK_Select, MVT::v4i32, 2},
6192 {TTI::SK_Select, MVT::v4f32, 2},
6194
6209
6224
6239
6257
6275 };
6276 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
6277 return LT.first * Entry->Cost;
6278 }
6279
6282
6283
6284
6286 LT.second.getSizeInBits() <= 128 && SubTp) {
6288 if (SubLT.second.isVector()) {
6289 int NumElts = LT.second.getVectorNumElements();
6290 int NumSubElts = SubLT.second.getVectorNumElements();
6291 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6292 return SubLT.first;
6293 }
6294 }
6295
6296
6297 if (IsExtractSubvector)
6300 Args, CxtI);
6301}
6302
6308
6309
6314 if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6315 true, false)
6316 .value_or(0) < 0)
6317 return true;
6318 }
6319 }
6320 }
6321 return false;
6322}
6323
6327
6328
6329
6330
6331 if (IsEpilogue)
6332 return true;
6333 return ST->useFixedOverScalableIfEqualCost();
6334}
6335
6337 return ST->getEpilogueVectorizationMinVF();
6338}
6339
6341 if (!ST->hasSVE())
6342 return false;
6343
6344
6345
6346
6348 return false;
6349
6355
6356
6357
6358
6365
6368 return false;
6369
6370
6371
6372 unsigned NumInsns = 0;
6374 NumInsns += BB->sizeWithoutDebug();
6375 }
6376
6377
6379}
6380
6383 StackOffset BaseOffset, bool HasBaseReg,
6384 int64_t Scale, unsigned AddrSpace) const {
6385
6386
6387
6388
6389
6390
6391
6396 AM.Scale = Scale;
6399
6400
6401 return AM.Scale != 0 && AM.Scale != 1;
6403}
6404
6408
6409
6410
6411
6412 if (I->getOpcode() == Instruction::Or &&
6415 return true;
6416
6417 if (I->getOpcode() == Instruction::Add ||
6418 I->getOpcode() == Instruction::Sub)
6419 return true;
6420 }
6422}
6423
6427
6428
6429
6430
6431
6437
6439}
6440
6443 return all_equal(Shuf->getShuffleMask());
6444 return false;
6445}
6446
6447
6448
6450 bool AllowSplat = false) {
6451
6453 return false;
6454
6455 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
6456 auto *FullTy = FullV->getType();
6457 auto *HalfTy = HalfV->getType();
6459 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6460 };
6461
6462 auto extractHalf = [](Value *FullV, Value *HalfV) {
6465 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6466 };
6467
6469 Value *S1Op1 = nullptr, *S2Op1 = nullptr;
6472 return false;
6473
6474
6475
6477 S1Op1 = nullptr;
6479 S2Op1 = nullptr;
6480
6481
6482
6483 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6484 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6485 return false;
6486
6487
6488
6489 int M1Start = 0;
6490 int M2Start = 0;
6492 if ((S1Op1 &&
6494 (S2Op1 &&
6496 return false;
6497
6498 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6499 (M2Start != 0 && M2Start != (NumElements / 2)))
6500 return false;
6501 if (S1Op1 && S2Op1 && M1Start != M2Start)
6502 return false;
6503
6504 return true;
6505}
6506
6507
6508
6510 auto areExtDoubled = [](Instruction *Ext) {
6511 return Ext->getType()->getScalarSizeInBits() ==
6512 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6513 };
6514
6519 return false;
6520
6521 return true;
6522}
6523
6524
6526 Value *VectorOperand = nullptr;
6530 ElementIndex->getValue() == 1 &&
6533}
6534
6535
6539
6541
6543 if ( || GEP->getNumOperands() != 2)
6544 return false;
6545
6547 Value *Offsets = GEP->getOperand(1);
6548
6549
6550 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6551 return false;
6552
6553
6556 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6557 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6558 Ops.push_back(&GEP->getOperandUse(1));
6559 }
6560
6561
6562 return true;
6563}
6564
6565
6566
6567
6570 return true;
6574 return true;
6575 }
6581 return true;
6582 }
6583 return false;
6584}
6585
6586
6587
6588
6592 switch (II->getIntrinsicID()) {
6593 case Intrinsic::aarch64_neon_smull:
6594 case Intrinsic::aarch64_neon_umull:
6596 true)) {
6597 Ops.push_back(&II->getOperandUse(0));
6598 Ops.push_back(&II->getOperandUse(1));
6599 return true;
6600 }
6601 [[fallthrough]];
6602
6603 case Intrinsic::fma:
6604 case Intrinsic::fmuladd:
6607 !ST->hasFullFP16())
6608 return false;
6609 [[fallthrough]];
6610 case Intrinsic::aarch64_neon_sqdmull:
6611 case Intrinsic::aarch64_neon_sqdmulh:
6612 case Intrinsic::aarch64_neon_sqrdmulh:
6613
6615 Ops.push_back(&II->getOperandUse(0));
6617 Ops.push_back(&II->getOperandUse(1));
6618 return .empty();
6619 case Intrinsic::aarch64_neon_fmlal:
6620 case Intrinsic::aarch64_neon_fmlal2:
6621 case Intrinsic::aarch64_neon_fmlsl:
6622 case Intrinsic::aarch64_neon_fmlsl2:
6623
6625 Ops.push_back(&II->getOperandUse(1));
6627 Ops.push_back(&II->getOperandUse(2));
6628 return .empty();
6629 case Intrinsic::aarch64_sve_ptest_first:
6630 case Intrinsic::aarch64_sve_ptest_last:
6632 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6633 Ops.push_back(&II->getOperandUse(0));
6634 return .empty();
6635 case Intrinsic::aarch64_sme_write_horiz:
6636 case Intrinsic::aarch64_sme_write_vert:
6637 case Intrinsic::aarch64_sme_writeq_horiz:
6638 case Intrinsic::aarch64_sme_writeq_vert: {
6640 if (!Idx || Idx->getOpcode() != Instruction::Add)
6641 return false;
6642 Ops.push_back(&II->getOperandUse(1));
6643 return true;
6644 }
6645 case Intrinsic::aarch64_sme_read_horiz:
6646 case Intrinsic::aarch64_sme_read_vert:
6647 case Intrinsic::aarch64_sme_readq_horiz:
6648 case Intrinsic::aarch64_sme_readq_vert:
6649 case Intrinsic::aarch64_sme_ld1b_vert:
6650 case Intrinsic::aarch64_sme_ld1h_vert:
6651 case Intrinsic::aarch64_sme_ld1w_vert:
6652 case Intrinsic::aarch64_sme_ld1d_vert:
6653 case Intrinsic::aarch64_sme_ld1q_vert:
6654 case Intrinsic::aarch64_sme_st1b_vert:
6655 case Intrinsic::aarch64_sme_st1h_vert:
6656 case Intrinsic::aarch64_sme_st1w_vert:
6657 case Intrinsic::aarch64_sme_st1d_vert:
6658 case Intrinsic::aarch64_sme_st1q_vert:
6659 case Intrinsic::aarch64_sme_ld1b_horiz:
6660 case Intrinsic::aarch64_sme_ld1h_horiz:
6661 case Intrinsic::aarch64_sme_ld1w_horiz:
6662 case Intrinsic::aarch64_sme_ld1d_horiz:
6663 case Intrinsic::aarch64_sme_ld1q_horiz:
6664 case Intrinsic::aarch64_sme_st1b_horiz:
6665 case Intrinsic::aarch64_sme_st1h_horiz:
6666 case Intrinsic::aarch64_sme_st1w_horiz:
6667 case Intrinsic::aarch64_sme_st1d_horiz:
6668 case Intrinsic::aarch64_sme_st1q_horiz: {
6670 if (!Idx || Idx->getOpcode() != Instruction::Add)
6671 return false;
6672 Ops.push_back(&II->getOperandUse(3));
6673 return true;
6674 }
6675 case Intrinsic::aarch64_neon_pmull:
6677 return false;
6678 Ops.push_back(&II->getOperandUse(0));
6679 Ops.push_back(&II->getOperandUse(1));
6680 return true;
6681 case Intrinsic::aarch64_neon_pmull64:
6683 II->getArgOperand(1)))
6684 return false;
6685 Ops.push_back(&II->getArgOperandUse(0));
6686 Ops.push_back(&II->getArgOperandUse(1));
6687 return true;
6688 case Intrinsic::masked_gather:
6690 return false;
6691 Ops.push_back(&II->getArgOperandUse(0));
6692 return true;
6693 case Intrinsic::masked_scatter:
6695 return false;
6696 Ops.push_back(&II->getArgOperandUse(1));
6697 return true;
6698 default:
6699 return false;
6700 }
6701 }
6702
6703 auto ShouldSinkCondition = [](Value *Cond,
6706 return false;
6708 if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6710 return false;
6712 Ops.push_back(&II->getOperandUse(0));
6713 return true;
6714 };
6715
6716 switch (I->getOpcode()) {
6717 case Instruction::GetElementPtr:
6718 case Instruction::Add:
6719 case Instruction::Sub:
6720
6721 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
6723 Ops.push_back(&I->getOperandUse(Op));
6724 return true;
6725 }
6726 }
6727 break;
6728 case Instruction::Select: {
6729 if (!ShouldSinkCondition(I->getOperand(0), Ops))
6730 return false;
6731
6732 Ops.push_back(&I->getOperandUse(0));
6733 return true;
6734 }
6735 case Instruction::Br: {
6737 return false;
6738
6740 return false;
6741
6742 Ops.push_back(&I->getOperandUse(0));
6743 return true;
6744 }
6745 default:
6746 break;
6747 }
6748
6749 if (->getType()->isVectorTy())
6750 return false;
6751
6752 switch (I->getOpcode()) {
6753 case Instruction::Sub:
6754 case Instruction::Add: {
6756 return false;
6757
6758
6759
6763 Ops.push_back(&Ext1->getOperandUse(0));
6764 Ops.push_back(&Ext2->getOperandUse(0));
6765 }
6766
6767 Ops.push_back(&I->getOperandUse(0));
6768 Ops.push_back(&I->getOperandUse(1));
6769
6770 return true;
6771 }
6772 case Instruction::Or: {
6773
6774
6775 if (ST->hasNEON()) {
6777 Value *MaskValue;
6778
6782 if (match(OtherAnd,
6784 Instruction *MainAnd = I->getOperand(0) == OtherAnd
6787
6788
6789 if (I->getParent() != MainAnd->getParent() ||
6790 I->getParent() != OtherAnd->getParent())
6791 return false;
6792
6793
6794 if (I->getParent() != IA->getParent() ||
6795 I->getParent() != IB->getParent())
6796 return false;
6797
6798 Ops.push_back(
6800 Ops.push_back(&I->getOperandUse(0));
6801 Ops.push_back(&I->getOperandUse(1));
6802
6803 return true;
6804 }
6805 }
6806 }
6807
6808 return false;
6809 }
6810 case Instruction::Mul: {
6811 auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
6813
6814 if (Ty->isScalableTy())
6815 return false;
6816
6817
6818 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6819 };
6820
6821 int NumZExts = 0, NumSExts = 0;
6822 for (auto &Op : I->operands()) {
6823
6824 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
6825 continue;
6826
6829 auto *ExtOp = Ext->getOperand(0);
6830 if (isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6831 Ops.push_back(&Ext->getOperandUse(0));
6833
6835 NumSExts++;
6836 } else {
6837 NumZExts++;
6838
6839 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6840 I->getType()->getScalarSizeInBits())
6841 NumSExts++;
6842 }
6843
6844 continue;
6845 }
6846
6848 if (!Shuffle)
6849 continue;
6850
6851
6852
6853
6859 NumSExts++;
6860 else
6861 NumZExts++;
6862 continue;
6863 }
6864
6867 if (!Insert)
6868 continue;
6869
6871 if (!OperandInstr)
6872 continue;
6873
6876
6877 if (!ElementConstant || !ElementConstant->isZero())
6878 continue;
6879
6880 unsigned Opcode = OperandInstr->getOpcode();
6881 if (Opcode == Instruction::SExt)
6882 NumSExts++;
6883 else if (Opcode == Instruction::ZExt)
6884 NumZExts++;
6885 else {
6886
6887
6888 unsigned Bitwidth = I->getType()->getScalarSizeInBits();
6891 continue;
6892 NumZExts++;
6893 }
6894
6895
6896
6898 Ops.push_back(&Insert->getOperandUse(1));
6901 }
6902
6903
6904 if (.empty() && (NumSExts == 2 || NumZExts == 2))
6905 return true;
6906
6907
6908 if (!ShouldSinkSplatForIndexedVariant(I))
6909 return false;
6910
6911 Ops.clear();
6913 Ops.push_back(&I->getOperandUse(0));
6915 Ops.push_back(&I->getOperandUse(1));
6916
6917 return .empty();
6918 }
6919 case Instruction::FMul: {
6920
6921 if (I->getType()->isScalableTy())
6922 return false;
6923
6924 if (cast(I->getType())->getElementType()->isHalfTy() &&
6925 !ST->hasFullFP16())
6926 return false;
6927
6928
6930 Ops.push_back(&I->getOperandUse(0));
6932 Ops.push_back(&I->getOperandUse(1));
6933 return .empty();
6934 }
6935 default:
6936 return false;
6937 }
6938 return false;
6939}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static std::optional< Instruction * > instCombinePTrue(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2865
TailFoldingOption TailFoldingOptionLoc
Definition AArch64TargetTransformInfo.cpp:191
static std::optional< Instruction * > instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2417
static std::optional< Instruction * > instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, bool MergeIntoAddendOp)
Definition AArch64TargetTransformInfo.cpp:2295
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
Definition AArch64TargetTransformInfo.cpp:5061
bool SimplifyValuePattern(SmallVector< Value * > &Vec, bool AllowPoison)
Definition AArch64TargetTransformInfo.cpp:2686
static std::optional< Instruction * > instCombineSVESel(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1924
static bool hasPossibleIncompatibleOps(const Function *F, const AArch64TargetLowering &TLI)
Returns true if the function has explicit operations that can only be lowered using incompatible inst...
Definition AArch64TargetTransformInfo.cpp:238
static bool shouldSinkVScale(Value *Op, SmallVectorImpl< Use * > &Ops)
We want to sink following cases: (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A,...
Definition AArch64TargetTransformInfo.cpp:6568
static InstructionCost getHistogramCost(const AArch64Subtarget *ST, const IntrinsicCostAttributes &ICA)
Definition AArch64TargetTransformInfo.cpp:576
static std::optional< Instruction * > tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1823
static std::optional< Instruction * > instCombineSVEUnpack(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2505
static cl::opt< unsigned > SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold", cl::init(15), cl::Hidden)
static cl::opt< bool > EnableFixedwidthAutovecInStreamingMode("enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden)
static void getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP, const AArch64TTIImpl &TTI)
For Apple CPUs, we want to runtime-unroll loops to make better use if the OOO engine's wide instructi...
Definition AArch64TargetTransformInfo.cpp:5176
static std::optional< Instruction * > instCombineWhilelo(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2856
static std::optional< Instruction * > instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2437
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
Definition AArch64TargetTransformInfo.cpp:6509
static cl::opt< bool > EnableLSRCostOpt("enable-aarch64-lsr-cost-opt", cl::init(true), cl::Hidden)
static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl< Use * > &Ops)
Definition AArch64TargetTransformInfo.cpp:6540
static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE, const AArch64TTIImpl &TTI)
Definition AArch64TargetTransformInfo.cpp:5139
static std::optional< Instruction * > simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II, const SVEIntrinsicInfo &IInfo)
Definition AArch64TargetTransformInfo.cpp:1716
static std::optional< Instruction * > instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2496
static bool isLoopSizeWithinBudget(Loop *L, const AArch64TTIImpl &TTI, InstructionCost Budget, unsigned *FinalSize)
Definition AArch64TargetTransformInfo.cpp:5112
static std::optional< Instruction * > instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2594
static std::optional< Instruction * > instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2457
static std::optional< Instruction * > processPhiNode(InstCombiner &IC, IntrinsicInst &II)
The function will remove redundant reinterprets casting in the presence of the control flow.
Definition AArch64TargetTransformInfo.cpp:1095
static std::optional< Instruction * > instCombineSVEInsr(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2821
static std::optional< Instruction * > instCombineSMECntsd(InstCombiner &IC, IntrinsicInst &II, const AArch64Subtarget *ST)
Definition AArch64TargetTransformInfo.cpp:2212
static void extractAttrFeatures(const Function &F, const AArch64TTIImpl *TTI, SmallVectorImpl< StringRef > &Features)
Definition AArch64TargetTransformInfo.cpp:255
static std::optional< Instruction * > instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2622
static bool isSMEABIRoutineCall(const CallInst &CI, const AArch64TargetLowering &TLI)
Definition AArch64TargetTransformInfo.cpp:228
static std::optional< Instruction * > instCombineSVESDIV(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2648
static std::optional< Instruction * > instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)
Definition AArch64TargetTransformInfo.cpp:2357
static Value * stripInactiveLanes(Value *V, const Value *Pg)
Definition AArch64TargetTransformInfo.cpp:1704
static cl::opt< bool > SVEPreferFixedOverScalableIfEqualCost("sve-prefer-fixed-over-scalable-if-equal", cl::Hidden)
static bool isUnpackedVectorVT(EVT VecVT)
Definition AArch64TargetTransformInfo.cpp:571
static std::optional< Instruction * > instCombineSVEDupX(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1958
static std::optional< Instruction * > instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1968
static std::optional< Instruction * > instCombineDMB(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2831
static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1335
static std::optional< Instruction * > instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2477
static std::optional< Instruction * > instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2177
static std::optional< Instruction * > instCombineMaxMinNM(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2777
static cl::opt< unsigned > SVEGatherOverhead("sve-gather-overhead", cl::init(10), cl::Hidden)
static std::optional< Instruction * > instCombineSVECondLast(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2135
static std::optional< Instruction * > instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2225
static std::optional< Instruction * > instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2578
static cl::opt< int > Aarch64ForceUnrollThreshold("aarch64-force-unroll-threshold", cl::init(0), cl::Hidden, cl::desc("Threshold for forced unrolling of small loops in AArch64"))
static std::optional< Instruction * > instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1936
static cl::opt< unsigned > BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, cl::desc("The cost of a histcnt instruction"))
static std::optional< Instruction * > instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1872
static cl::opt< unsigned > CallPenaltyChangeSM("call-penalty-sm-change", cl::init(5), cl::Hidden, cl::desc("Penalty of calling a function that requires a change to PSTATE.SM"))
static std::optional< Instruction * > instCombineSVEUzp1(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2548
static std::optional< Instruction * > instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2388
static cl::opt< bool > EnableScalableAutovecInStreamingMode("enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden)
static std::optional< Instruction * > instCombineSVETBL(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2525
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
Definition AArch64TargetTransformInfo.cpp:6536
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic)
Definition AArch64TargetTransformInfo.cpp:2374
static bool containsDecreasingPointers(Loop *TheLoop, PredicatedScalarEvolution *PSE, const DominatorTree &DT)
Definition AArch64TargetTransformInfo.cpp:6303
static bool isSplatShuffle(Value *V)
Definition AArch64TargetTransformInfo.cpp:6441
static cl::opt< unsigned > InlineCallPenaltyChangeSM("inline-call-penalty-sm-change", cl::init(10), cl::Hidden, cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"))
static std::optional< Instruction * > instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)
Definition AArch64TargetTransformInfo.cpp:2338
static std::optional< Instruction * > instCombineSVESrshl(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2787
static cl::opt< unsigned > DMBLookaheadThreshold("dmb-lookahead-threshold", cl::init(10), cl::Hidden, cl::desc("The number of instructions to search for a redundant dmb"))
static std::optional< Instruction * > simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II, const SVEIntrinsicInfo &IInfo)
Definition AArch64TargetTransformInfo.cpp:1768
static unsigned getSVEGatherScatterOverhead(unsigned Opcode, const AArch64Subtarget *ST)
Definition AArch64TargetTransformInfo.cpp:4833
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
Definition AArch64TargetTransformInfo.cpp:6525
static std::optional< Instruction * > instCombineInStreamingMode(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2893
static std::optional< Instruction * > instCombineSVELast(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2061
static cl::opt< unsigned > NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10), cl::Hidden)
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
static std::optional< Instruction * > instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts)
Definition AArch64TargetTransformInfo.cpp:2193
static std::optional< Instruction * > instCombineSVEUxt(InstCombiner &IC, IntrinsicInst &II, unsigned NumBits)
Definition AArch64TargetTransformInfo.cpp:2872
static cl::opt< TailFoldingOption, true, cl::parser< std::string > > SVETailFolding("sve-tail-folding", cl::desc("Control the use of vectorisation using tail-folding for SVE where the" " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" "\ndisabled (Initial) No loop types will vectorize using " "tail-folding" "\ndefault (Initial) Uses the default tail-folding settings for " "the target CPU" "\nall (Initial) All legal loop types will vectorize using " "tail-folding" "\nsimple (Initial) Use tail-folding for simple loops (not " "reductions or recurrences)" "\nreductions Use tail-folding for loops containing reductions" "\nnoreductions Inverse of above" "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" "\nnorecurrences Inverse of above" "\nreverse Use tail-folding for loops requiring reversed " "predicates" "\nnoreverse Inverse of above"), cl::location(TailFoldingOptionLoc))
static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
Definition AArch64TargetTransformInfo.cpp:6449
static std::optional< Instruction * > instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2403
static cl::opt< bool > EnableOrLikeSelectOpt("enable-aarch64-or-like-select", cl::init(true), cl::Hidden)
static cl::opt< unsigned > SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden)
static std::optional< Instruction * > instCombineSVEDupqLane(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2715
This file a TargetTransformInfoImplBase conforming object specific to the AArch64 target machine.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5843
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5606
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:4251
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
Definition AArch64TargetTransformInfo.cpp:5038
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition AArch64TargetTransformInfo.cpp:5052
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:4806
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:4854
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:4588
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
Definition AArch64TargetTransformInfo.cpp:3219
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
Definition AArch64TargetTransformInfo.cpp:401
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
Definition AArch64TargetTransformInfo.cpp:4227
bool prefersVectorizedAddressing() const override
Definition AArch64TargetTransformInfo.cpp:4787
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
Definition AArch64TargetTransformInfo.cpp:4196
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:619
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
Definition AArch64TargetTransformInfo.cpp:5764
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition AArch64TargetTransformInfo.cpp:4166
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:442
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition AArch64TargetTransformInfo.cpp:5408
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:3261
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition AArch64TargetTransformInfo.cpp:5324
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
Definition AArch64TargetTransformInfo.cpp:5450
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5554
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:4899
APInt getPriorityMask(const Function &F) const override
Definition AArch64TargetTransformInfo.cpp:269
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
Definition AArch64TargetTransformInfo.cpp:386
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
Definition AArch64TargetTransformInfo.cpp:6424
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Definition AArch64TargetTransformInfo.cpp:4213
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:3986
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
Definition AArch64TargetTransformInfo.cpp:6589
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition AArch64TargetTransformInfo.cpp:3018
bool useNeonVector(const Type *Ty) const
Definition AArch64TargetTransformInfo.cpp:4895
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition AArch64TargetTransformInfo.cpp:2903
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:4633
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:5943
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
Definition AArch64TargetTransformInfo.cpp:6340
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5736
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition AArch64TargetTransformInfo.cpp:563
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:3927
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
Definition AArch64TargetTransformInfo.cpp:351
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition AArch64TargetTransformInfo.cpp:283
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
Definition AArch64TargetTransformInfo.cpp:6405
bool isMultiversionedFunction(const Function &F) const override
Definition AArch64TargetTransformInfo.cpp:275
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition AArch64TargetTransformInfo.cpp:3050
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
Definition AArch64TargetTransformInfo.cpp:5522
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
Definition AArch64TargetTransformInfo.cpp:4769
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:511
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
Definition AArch64TargetTransformInfo.cpp:5495
APInt getFeatureMask(const Function &F) const override
Definition AArch64TargetTransformInfo.cpp:263
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition AArch64TargetTransformInfo.cpp:4995
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
Definition AArch64TargetTransformInfo.cpp:326
bool enableScalableVectorization() const override
Definition AArch64TargetTransformInfo.cpp:3044
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:4792
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
Definition AArch64TargetTransformInfo.cpp:5413
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
Definition AArch64TargetTransformInfo.cpp:4609
unsigned getEpilogueVectorizationMinVF() const override
Definition AArch64TargetTransformInfo.cpp:6336
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:5787
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:5580
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition AArch64TargetTransformInfo.cpp:6382
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Definition AArch64TargetTransformInfo.cpp:6324
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const DataLayout & getDataLayout() const
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const
virtual bool isLoweredToCall(const Function *F) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
Definition AArch64TargetTransformInfo.cpp:1235
bool inactiveLanesAreUnused() const
Definition AArch64TargetTransformInfo.cpp:1276
bool inactiveLanesAreNotDefined() const
Definition AArch64TargetTransformInfo.cpp:1266
bool hasMatchingUndefIntrinsic() const
Definition AArch64TargetTransformInfo.cpp:1218
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
Definition AArch64TargetTransformInfo.cpp:1157
static SVEIntrinsicInfo defaultZeroingOp()
Definition AArch64TargetTransformInfo.cpp:1180
bool hasGoverningPredicate() const
Definition AArch64TargetTransformInfo.cpp:1195
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
Definition AArch64TargetTransformInfo.cpp:1259
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
Definition AArch64TargetTransformInfo.cpp:1141
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
Definition AArch64TargetTransformInfo.cpp:1311
unsigned getOperandIdxWithNoActiveLanes() const
Definition AArch64TargetTransformInfo.cpp:1306
SVEIntrinsicInfo & setInactiveLanesAreUnused()
Definition AArch64TargetTransformInfo.cpp:1280
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
Definition AArch64TargetTransformInfo.cpp:1270
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
Definition AArch64TargetTransformInfo.cpp:1204
bool inactiveLanesTakenFromOperand() const
Definition AArch64TargetTransformInfo.cpp:1250
static SVEIntrinsicInfo defaultUndefOp()
Definition AArch64TargetTransformInfo.cpp:1164
bool hasOperandWithNoActiveLanes() const
Definition AArch64TargetTransformInfo.cpp:1302
Intrinsic::ID getMatchingUndefIntrinsic() const
Definition AArch64TargetTransformInfo.cpp:1222
SVEIntrinsicInfo & setResultIsZeroInitialized()
Definition AArch64TargetTransformInfo.cpp:1291
static SVEIntrinsicInfo defaultMergingUnaryOp()
Definition AArch64TargetTransformInfo.cpp:1149
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
Definition AArch64TargetTransformInfo.cpp:1227
unsigned getGoverningPredicateOperandIdx() const
Definition AArch64TargetTransformInfo.cpp:1199
bool hasMatchingIROpode() const
Definition AArch64TargetTransformInfo.cpp:1233
bool resultIsZeroInitialized() const
Definition AArch64TargetTransformInfo.cpp:1289
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
Definition AArch64TargetTransformInfo.cpp:1240
unsigned getOperandIdxInactiveLanesTakenFrom() const
Definition AArch64TargetTransformInfo.cpp:1254
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
Definition AArch64TargetTransformInfo.cpp:1172
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool isNegatedPowerOf2() const
OperandValueInfo getNoProps() const
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
bool AddAdditionalAccumulators
Allow unrolling to add parallel reduction phis.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...