LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
23#include "llvm/IR/IntrinsicsAArch64.h"
30#include
31#include
32using namespace llvm;
34
35#define DEBUG_TYPE "aarch64tti"
36
39
41 "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden);
42
45
48
51
55
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
60
63 cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));
64
67
70
71
74 cl::desc("The cost of a histcnt instruction"));
75
78 cl::desc("The number of instructions to search for a redundant dmb"));
79
82 cl::desc("Threshold for forced unrolling of small loops in AArch64"));
83
84namespace {
85class TailFoldingOption {
86
87
88
89
90
91
92
93
97
98
99
100 bool NeedsDefault = true;
101
102 void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }
103
104 void setNeedsDefault(bool V) { NeedsDefault = V; }
105
107 EnableBits |= Bit;
108 DisableBits &= ~Bit;
109 }
110
112 EnableBits &= ~Bit;
113 DisableBits |= Bit;
114 }
115
118
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
123 Bits |= EnableBits;
124 Bits &= ~DisableBits;
125
127 }
128
130 errs() << "invalid argument '" << Opt
131 << "' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
135 }
136
137public:
138
139 void operator=(const std::string &Val) {
140
141 if (Val.empty()) {
143 return;
144 }
145
146
147
148 setNeedsDefault(false);
149
151 StringRef(Val).split(TailFoldTypes, '+', -1, false);
152
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] == "disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] == "all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] == "default")
159 setNeedsDefault(true);
160 else if (TailFoldTypes[0] == "simple")
161 setInitialBits(TailFoldingOpts::Simple);
162 else {
163 StartIdx = 0;
164 setInitialBits(TailFoldingOpts::Disabled);
165 }
166
167 for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {
168 if (TailFoldTypes[I] == "reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[I] == "recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[I] == "reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[I] == "noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[I] == "norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[I] == "noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
180 else
182 }
183 }
184
187 }
188};
189}
190
192
194 "sve-tail-folding",
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
199 "tail-folding"
200 "\ndefault (Initial) Uses the default tail-folding settings for "
201 "the target CPU"
202 "\nall (Initial) All legal loop types will vectorize using "
203 "tail-folding"
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
209 "recurrences"
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
212 "predicates"
213 "\nnoreverse Inverse of above"),
215
216
217
218
220 "enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
221
222
223
224
226 "enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
227
234
235
236
237
242
243
244
245
249 return true;
250 }
251 }
252 return false;
253}
254
258 StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.split(Features, ",");
262}
263
265 return F.hasFnAttribute("fmv-features");
266}
267
268const FeatureBitset AArch64TTIImpl::InlineInverseFeatures = {
269 AArch64::FeatureExecuteOnly,
270};
271
273 const Function *Callee) const {
275
276
277
278
281 return false;
282
283
284
288 }
289
291 return false;
292
297 return false;
298 }
299
300 const TargetMachine &TM = getTLI()->getTargetMachine();
305
306
307
308
309 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
310 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
311
312 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
313}
314
319 return false;
320
321
322
323
324
325
326
327
328
329 if (ST->useSVEForFixedLengthVectors() && llvm::any_of(Types, [](Type *Ty) {
330 auto FVTy = dyn_cast(Ty);
331 return FVTy &&
332 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
333 }))
334 return false;
335
336 return true;
337}
338
339unsigned
341 unsigned DefaultCallPenalty) const {
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
363 SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo());
364
366 if (F == Call.getCaller())
370 }
371
372 return DefaultCallPenalty;
373}
374
378
380 return true;
381
383 ST->isSVEorStreamingSVEAvailable() &&
384 !ST->disableMaximizeScalableBandwidth();
385}
386
387
388
389
391
393 return 0;
394
395 if (Val < 0)
396 Val = ~Val;
397
398
401 return Insn.size();
402}
403
404
408 assert(Ty->isIntegerTy());
409
410 unsigned BitSize = Ty->getPrimitiveSizeInBits();
411 if (BitSize == 0)
412 return ~0U;
413
414
415 APInt ImmVal = Imm;
416 if (BitSize & 0x3f)
417 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
418
419
420
422 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
426 }
427
428 return std::max(1, Cost);
429}
430
435 assert(Ty->isIntegerTy());
436
437 unsigned BitSize = Ty->getPrimitiveSizeInBits();
438
439
440 if (BitSize == 0)
442
443 unsigned ImmIdx = ~0U;
444 switch (Opcode) {
445 default:
447 case Instruction::GetElementPtr:
448
449 if (Idx == 0)
452 case Instruction::Store:
453 ImmIdx = 0;
454 break;
455 case Instruction::Add:
456 case Instruction::Sub:
457 case Instruction::Mul:
458 case Instruction::UDiv:
459 case Instruction::SDiv:
460 case Instruction::URem:
461 case Instruction::SRem:
462 case Instruction::And:
463 case Instruction::Or:
464 case Instruction::Xor:
465 case Instruction::ICmp:
466 ImmIdx = 1;
467 break;
468
469 case Instruction::Shl:
470 case Instruction::LShr:
471 case Instruction::AShr:
472 if (Idx == 1)
474 break;
475 case Instruction::Trunc:
476 case Instruction::ZExt:
477 case Instruction::SExt:
478 case Instruction::IntToPtr:
479 case Instruction::PtrToInt:
480 case Instruction::BitCast:
481 case Instruction::PHI:
482 case Instruction::Call:
483 case Instruction::Select:
484 case Instruction::Ret:
485 case Instruction::Load:
486 break;
487 }
488
489 if (Idx == ImmIdx) {
490 int NumConstants = (BitSize + 63) / 64;
495 }
497}
498
503 assert(Ty->isIntegerTy());
504
505 unsigned BitSize = Ty->getPrimitiveSizeInBits();
506
507
508 if (BitSize == 0)
510
511
512
513
514 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
516
517 switch (IID) {
518 default:
520 case Intrinsic::sadd_with_overflow:
521 case Intrinsic::uadd_with_overflow:
522 case Intrinsic::ssub_with_overflow:
523 case Intrinsic::usub_with_overflow:
524 case Intrinsic::smul_with_overflow:
525 case Intrinsic::umul_with_overflow:
526 if (Idx == 1) {
527 int NumConstants = (BitSize + 63) / 64;
532 }
533 break;
534 case Intrinsic::experimental_stackmap:
535 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
537 break;
538 case Intrinsic::experimental_patchpoint_void:
539 case Intrinsic::experimental_patchpoint:
540 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
542 break;
543 case Intrinsic::experimental_gc_statepoint:
544 if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
546 break;
547 }
549}
550
554 if (TyWidth == 32 || TyWidth == 64)
556
558}
559
564
567
568
571
572
573 if (!ST->hasSVE2())
575
576 Type *BucketPtrsTy = ICA.getArgTypes()[0];
577 Type *EltTy = ICA.getArgTypes()[1];
578 unsigned TotalHistCnts = 1;
579
581
584
585
586
588 unsigned EC = VTy->getElementCount().getKnownMinValue();
591
592
593 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
594
595 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
597
599 TotalHistCnts = EC / NaturalVectorWidth;
600
602 }
603
605}
606
610
611
612
613
618
619 switch (ICA.getID()) {
620 case Intrinsic::experimental_vector_histogram_add: {
622
624 return HistCost;
625 break;
626 }
627 case Intrinsic::umin:
628 case Intrinsic::umax:
629 case Intrinsic::smin:
630 case Intrinsic::smax: {
631 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
632 MVT::v8i16, MVT::v2i32, MVT::v4i32,
633 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
634 MVT::nxv2i64};
636
637 if (LT.second == MVT::v2i64)
638 return LT.first * 2;
639 if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }))
640 return LT.first;
641 break;
642 }
643 case Intrinsic::sadd_sat:
644 case Intrinsic::ssub_sat:
645 case Intrinsic::uadd_sat:
646 case Intrinsic::usub_sat: {
647 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
648 MVT::v8i16, MVT::v2i32, MVT::v4i32,
649 MVT::v2i64};
651
652
653 unsigned Instrs =
654 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
655 if (any_of(ValidSatTys, [<](MVT M) { return M == LT.second; }))
656 return LT.first * Instrs;
657
660
661 if (ST->isSVEAvailable() && VectorSize >= 128 && isPowerOf2_64(VectorSize))
662 return LT.first * Instrs;
663
664 break;
665 }
666 case Intrinsic::abs: {
667 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
668 MVT::v8i16, MVT::v2i32, MVT::v4i32,
669 MVT::v2i64};
671 if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }))
672 return LT.first;
673 break;
674 }
675 case Intrinsic::bswap: {
676 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
677 MVT::v4i32, MVT::v2i64};
679 if (any_of(ValidAbsTys, [<](MVT M) { return M == LT.second; }) &&
680 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
681 return LT.first;
682 break;
683 }
684 case Intrinsic::fma:
685 case Intrinsic::fmuladd: {
686
687
690 (EltTy->isHalfTy() && ST->hasFullFP16()))
692 break;
693 }
694 case Intrinsic::stepvector: {
697
698
699 if (LT.first > 1) {
703 Cost += AddCost * (LT.first - 1);
704 }
706 }
707 case Intrinsic::vector_extract:
708 case Intrinsic::vector_insert: {
709
710
711
712
713
714
717 break;
718
720 EVT VecVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
721 bool IsExtract = ICA.getID() == Intrinsic::vector_extract;
722 EVT SubVecVT = IsExtract ? getTLI()->getValueType(DL, RetTy)
723 : getTLI()->getValueType(DL, ICA.getArgTypes()[1]);
724
725
727 break;
728
730 getTLI()->getTypeConversion(C, SubVecVT);
732 getTLI()->getTypeConversion(C, VecVT);
738 break;
739 }
740 case Intrinsic::bitreverse: {
741 static const CostTblEntry BitreverseTbl[] = {
742 {Intrinsic::bitreverse, MVT::i32, 1},
743 {Intrinsic::bitreverse, MVT::i64, 1},
744 {Intrinsic::bitreverse, MVT::v8i8, 1},
745 {Intrinsic::bitreverse, MVT::v16i8, 1},
746 {Intrinsic::bitreverse, MVT::v4i16, 2},
747 {Intrinsic::bitreverse, MVT::v8i16, 2},
748 {Intrinsic::bitreverse, MVT::v2i32, 2},
749 {Intrinsic::bitreverse, MVT::v4i32, 2},
750 {Intrinsic::bitreverse, MVT::v1i64, 2},
751 {Intrinsic::bitreverse, MVT::v2i64, 2},
752 };
754 const auto *Entry =
756 if (Entry) {
757
758
759 if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||
760 TLI->getValueType(DL, RetTy, true) == MVT::i16)
761 return LegalisationCost.first * Entry->Cost + 1;
762
763 return LegalisationCost.first * Entry->Cost;
764 }
765 break;
766 }
767 case Intrinsic::ctpop: {
768 if (!ST->hasNEON()) {
769
771 }
782 };
784 MVT MTy = LT.second;
786
787
789 RetTy->getScalarSizeInBits()
790 ? 1
791 : 0;
792 return LT.first * Entry->Cost + ExtraCost;
793 }
794 break;
795 }
796 case Intrinsic::sadd_with_overflow:
797 case Intrinsic::uadd_with_overflow:
798 case Intrinsic::ssub_with_overflow:
799 case Intrinsic::usub_with_overflow:
800 case Intrinsic::smul_with_overflow:
801 case Intrinsic::umul_with_overflow: {
802 static const CostTblEntry WithOverflowCostTbl[] = {
803 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
804 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
805 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
806 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
807 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
808 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
809 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
810 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
811 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
812 {Intrinsic::usub_with_overflow, MVT::i8, 3},
813 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
814 {Intrinsic::usub_with_overflow, MVT::i16, 3},
815 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
816 {Intrinsic::usub_with_overflow, MVT::i32, 1},
817 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
818 {Intrinsic::usub_with_overflow, MVT::i64, 1},
819 {Intrinsic::smul_with_overflow, MVT::i8, 5},
820 {Intrinsic::umul_with_overflow, MVT::i8, 4},
821 {Intrinsic::smul_with_overflow, MVT::i16, 5},
822 {Intrinsic::umul_with_overflow, MVT::i16, 4},
823 {Intrinsic::smul_with_overflow, MVT::i32, 2},
824 {Intrinsic::umul_with_overflow, MVT::i32, 2},
825 {Intrinsic::smul_with_overflow, MVT::i64, 3},
826 {Intrinsic::umul_with_overflow, MVT::i64, 3},
827 };
828 EVT MTy = TLI->getValueType(DL, RetTy->getContainedType(0), true);
832 return Entry->Cost;
833 break;
834 }
835 case Intrinsic::fptosi_sat:
836 case Intrinsic::fptoui_sat: {
838 break;
839 bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
841 EVT MTy = TLI->getValueType(DL, RetTy);
842
843
844 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
845 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
846 LT.second == MVT::v2f64)) {
848 (LT.second == MVT::f64 && MTy == MVT::i32) ||
849 (LT.second == MVT::f32 && MTy == MVT::i64)))
850 return LT.first;
851
852 if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() &&
855 }
856
857
858 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
861 RetTy,
865 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
866 (LT.second == MVT::f16 && MTy == MVT::i64) ||
867 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
869 return LT.first;
870
871 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
874
875
876 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&
879
880
881 if ((LT.second.getScalarType() == MVT::f32 ||
882 LT.second.getScalarType() == MVT::f64 ||
883 LT.second.getScalarType() == MVT::f16) &&
885 Type *LegalTy =
886 Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());
887 if (LT.second.isVector())
888 LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());
891 LegalTy, {LegalTy, LegalTy});
894 LegalTy, {LegalTy, LegalTy});
896 return LT.first * Cost +
897 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
898 : 1);
899 }
900
901
903 RetTy = RetTy->getScalarType();
904 if (LT.second.isVector()) {
905 FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount());
906 RetTy = VectorType::get(RetTy, LT.second.getVectorElementCount());
907 }
913 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
915 if (IsSigned) {
921 }
922 return LT.first * Cost;
923 }
924 case Intrinsic::fshl:
925 case Intrinsic::fshr: {
927 break;
928
930
931
932
933
934 if (RetTy->isIntegerTy() && ICA.getArgs()[0] == ICA.getArgs()[1] &&
935 (RetTy->getPrimitiveSizeInBits() == 32 ||
936 RetTy->getPrimitiveSizeInBits() == 64)) {
938 (ICA.getID() == Intrinsic::fshl && !OpInfoZ.isConstant()) ? 1 : 0;
939 return 1 + NegCost;
940 }
941
942
944 break;
945
949 {Intrinsic::fshl, MVT::v4i32, 2},
950 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
951 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
952 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
953
954
955 const auto *Entry =
956 CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);
957 if (Entry)
958 return LegalisationCost.first * Entry->Cost;
959 }
960
962 if (!RetTy->isIntegerTy())
963 break;
964
965
966
967 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
968 RetTy->getScalarSizeInBits() < 64) ||
969 (RetTy->getScalarSizeInBits() % 64 != 0);
970 unsigned ExtraCost = HigherCost ? 1 : 0;
971 if (RetTy->getScalarSizeInBits() == 32 ||
972 RetTy->getScalarSizeInBits() == 64)
973 ExtraCost = 0;
974
975 else if (HigherCost)
976 ExtraCost = 1;
977 else
978 break;
979 return TyL.first + ExtraCost;
980 }
981 case Intrinsic::get_active_lane_mask: {
983 EVT RetVT = getTLI()->getValueType(DL, RetTy);
985 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
986 break;
987
988 if (RetTy->isScalableTy()) {
989 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
991 break;
992
995
996
997
998
999 if (ST->hasSVE2p1() || ST->hasSME2()) {
1001 if (Cost == 1)
1002 return Cost;
1003 }
1004
1005
1006
1007
1014 return Cost + (SplitCost * (Cost - 1));
1015 } else if (!getTLI()->isTypeLegal(RetVT)) {
1016
1017
1018
1019
1020
1021
1022
1023
1024
1026 }
1027 break;
1028 }
1029 case Intrinsic::experimental_vector_match: {
1031 EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
1032 unsigned SearchSize = NeedleTy->getNumElements();
1033 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1034
1035
1036
1037
1038
1042 return Cost;
1043 }
1044 break;
1045 }
1046 case Intrinsic::experimental_cttz_elts: {
1047 EVT ArgVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
1048 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1049
1050
1051
1052 return 4;
1053 }
1054 break;
1055 }
1056 case Intrinsic::experimental_vector_extract_last_active:
1057 if (ST->isSVEorStreamingSVEAvailable()) {
1059
1060 return LegalCost;
1061 }
1062 break;
1063 default:
1064 break;
1065 }
1067}
1068
1069
1070
1074 auto RequiredType = II.getType();
1075
1077 assert(PN && "Expected Phi Node!");
1078
1079
1080 if (!PN->hasOneUse())
1081 return std::nullopt;
1082
1083 for (Value *IncValPhi : PN->incoming_values()) {
1085 if (!Reinterpret ||
1086 Reinterpret->getIntrinsicID() !=
1087 Intrinsic::aarch64_sve_convert_to_svbool ||
1088 RequiredType != Reinterpret->getArgOperand(0)->getType())
1089 return std::nullopt;
1090 }
1091
1092
1096
1097 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
1099 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
1100 Worklist.push_back(Reinterpret);
1101 }
1102
1103
1105}
1106
1107
1108
1110
1111
1112
1113
1114
1115
1123
1124
1131
1132
1138
1139
1145
1146
1147
1153
1154
1155
1162
1163
1164
1166
1167
1168
1169
1170
1172 return GoverningPredicateIdx != std::numeric_limits::max();
1173 }
1174
1177 return GoverningPredicateIdx;
1178 }
1179
1182 GoverningPredicateIdx = Index;
1183 return *this;
1184 }
1185
1186
1187
1188
1189
1190
1191
1192
1193
1197
1200 return UndefIntrinsic;
1201 }
1202
1205 UndefIntrinsic = IID;
1206 return *this;
1207 }
1208
1210
1213 return IROpcode;
1214 }
1215
1218 IROpcode = Opcode;
1219 return *this;
1220 }
1221
1222
1223
1224
1225
1227 return ResultLanes == InactiveLanesTakenFromOperand;
1228 }
1229
1232 return OperandIdxForInactiveLanes;
1233 }
1234
1236 assert(ResultLanes == Uninitialized && "Cannot set property twice!");
1237 ResultLanes = InactiveLanesTakenFromOperand;
1238 OperandIdxForInactiveLanes = Index;
1239 return *this;
1240 }
1241
1243 return ResultLanes == InactiveLanesAreNotDefined;
1244 }
1245
1247 assert(ResultLanes == Uninitialized && "Cannot set property twice!");
1248 ResultLanes = InactiveLanesAreNotDefined;
1249 return *this;
1250 }
1251
1253 return ResultLanes == InactiveLanesAreUnused;
1254 }
1255
1257 assert(ResultLanes == Uninitialized && "Cannot set property twice!");
1258 ResultLanes = InactiveLanesAreUnused;
1259 return *this;
1260 }
1261
1262
1263
1264
1266
1268 ResultIsZeroInitialized = true;
1269 return *this;
1270 }
1271
1272
1273
1274
1275
1276
1277
1279 return OperandIdxWithNoActiveLanes != std::numeric_limits::max();
1280 }
1281
1284 return OperandIdxWithNoActiveLanes;
1285 }
1286
1289 OperandIdxWithNoActiveLanes = Index;
1290 return *this;
1291 }
1292
1293private:
1294 unsigned GoverningPredicateIdx = std::numeric_limits::max();
1295
1297 unsigned IROpcode = 0;
1298
1299 enum PredicationStyle {
1301 InactiveLanesTakenFromOperand,
1302 InactiveLanesAreNotDefined,
1303 InactiveLanesAreUnused
1305
1306 bool ResultIsZeroInitialized = false;
1307 unsigned OperandIdxForInactiveLanes = std::numeric_limits::max();
1308 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits::max();
1309};
1310
1312
1313
1316 return !isa(V->getType());
1317 }))
1319
1321 switch (IID) {
1322 default:
1323 break;
1324 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1325 case Intrinsic::aarch64_sve_fcvt_f16f32:
1326 case Intrinsic::aarch64_sve_fcvt_f16f64:
1327 case Intrinsic::aarch64_sve_fcvt_f32f16:
1328 case Intrinsic::aarch64_sve_fcvt_f32f64:
1329 case Intrinsic::aarch64_sve_fcvt_f64f16:
1330 case Intrinsic::aarch64_sve_fcvt_f64f32:
1331 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1332 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1333 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1334 case Intrinsic::aarch64_sve_fcvtzs:
1335 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1336 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1337 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1338 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1339 case Intrinsic::aarch64_sve_fcvtzu:
1340 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1341 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1342 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1343 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1344 case Intrinsic::aarch64_sve_scvtf:
1345 case Intrinsic::aarch64_sve_scvtf_f16i32:
1346 case Intrinsic::aarch64_sve_scvtf_f16i64:
1347 case Intrinsic::aarch64_sve_scvtf_f32i64:
1348 case Intrinsic::aarch64_sve_scvtf_f64i32:
1349 case Intrinsic::aarch64_sve_ucvtf:
1350 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1351 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1352 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1353 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1355
1356 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1357 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1358 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1359 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1361
1362 case Intrinsic::aarch64_sve_fabd:
1364 case Intrinsic::aarch64_sve_fadd:
1367 case Intrinsic::aarch64_sve_fdiv:
1370 case Intrinsic::aarch64_sve_fmax:
1372 case Intrinsic::aarch64_sve_fmaxnm:
1374 case Intrinsic::aarch64_sve_fmin:
1376 case Intrinsic::aarch64_sve_fminnm:
1378 case Intrinsic::aarch64_sve_fmla:
1380 case Intrinsic::aarch64_sve_fmls:
1382 case Intrinsic::aarch64_sve_fmul:
1385 case Intrinsic::aarch64_sve_fmulx:
1387 case Intrinsic::aarch64_sve_fnmla:
1389 case Intrinsic::aarch64_sve_fnmls:
1391 case Intrinsic::aarch64_sve_fsub:
1394 case Intrinsic::aarch64_sve_add:
1397 case Intrinsic::aarch64_sve_mla:
1399 case Intrinsic::aarch64_sve_mls:
1401 case Intrinsic::aarch64_sve_mul:
1404 case Intrinsic::aarch64_sve_sabd:
1406 case Intrinsic::aarch64_sve_sdiv:
1409 case Intrinsic::aarch64_sve_smax:
1411 case Intrinsic::aarch64_sve_smin:
1413 case Intrinsic::aarch64_sve_smulh:
1415 case Intrinsic::aarch64_sve_sub:
1418 case Intrinsic::aarch64_sve_uabd:
1420 case Intrinsic::aarch64_sve_udiv:
1423 case Intrinsic::aarch64_sve_umax:
1425 case Intrinsic::aarch64_sve_umin:
1427 case Intrinsic::aarch64_sve_umulh:
1429 case Intrinsic::aarch64_sve_asr:
1432 case Intrinsic::aarch64_sve_lsl:
1435 case Intrinsic::aarch64_sve_lsr:
1438 case Intrinsic::aarch64_sve_and:
1441 case Intrinsic::aarch64_sve_bic:
1443 case Intrinsic::aarch64_sve_eor:
1446 case Intrinsic::aarch64_sve_orr:
1449 case Intrinsic::aarch64_sve_sqrshl:
1451 case Intrinsic::aarch64_sve_sqshl:
1453 case Intrinsic::aarch64_sve_sqsub:
1455 case Intrinsic::aarch64_sve_srshl:
1457 case Intrinsic::aarch64_sve_uqrshl:
1459 case Intrinsic::aarch64_sve_uqshl:
1461 case Intrinsic::aarch64_sve_uqsub:
1463 case Intrinsic::aarch64_sve_urshl:
1465
1466 case Intrinsic::aarch64_sve_add_u:
1468 Instruction::Add);
1469 case Intrinsic::aarch64_sve_and_u:
1471 Instruction::And);
1472 case Intrinsic::aarch64_sve_asr_u:
1474 Instruction::AShr);
1475 case Intrinsic::aarch64_sve_eor_u:
1477 Instruction::Xor);
1478 case Intrinsic::aarch64_sve_fadd_u:
1480 Instruction::FAdd);
1481 case Intrinsic::aarch64_sve_fdiv_u:
1483 Instruction::FDiv);
1484 case Intrinsic::aarch64_sve_fmul_u:
1486 Instruction::FMul);
1487 case Intrinsic::aarch64_sve_fsub_u:
1489 Instruction::FSub);
1490 case Intrinsic::aarch64_sve_lsl_u:
1492 Instruction::Shl);
1493 case Intrinsic::aarch64_sve_lsr_u:
1495 Instruction::LShr);
1496 case Intrinsic::aarch64_sve_mul_u:
1498 Instruction::Mul);
1499 case Intrinsic::aarch64_sve_orr_u:
1501 Instruction::Or);
1502 case Intrinsic::aarch64_sve_sdiv_u:
1504 Instruction::SDiv);
1505 case Intrinsic::aarch64_sve_sub_u:
1507 Instruction::Sub);
1508 case Intrinsic::aarch64_sve_udiv_u:
1510 Instruction::UDiv);
1511
1512 case Intrinsic::aarch64_sve_addqv:
1513 case Intrinsic::aarch64_sve_and_z:
1514 case Intrinsic::aarch64_sve_bic_z:
1515 case Intrinsic::aarch64_sve_brka_z:
1516 case Intrinsic::aarch64_sve_brkb_z:
1517 case Intrinsic::aarch64_sve_brkn_z:
1518 case Intrinsic::aarch64_sve_brkpa_z:
1519 case Intrinsic::aarch64_sve_brkpb_z:
1520 case Intrinsic::aarch64_sve_cntp:
1521 case Intrinsic::aarch64_sve_compact:
1522 case Intrinsic::aarch64_sve_eor_z:
1523 case Intrinsic::aarch64_sve_eorv:
1524 case Intrinsic::aarch64_sve_eorqv:
1525 case Intrinsic::aarch64_sve_nand_z:
1526 case Intrinsic::aarch64_sve_nor_z:
1527 case Intrinsic::aarch64_sve_orn_z:
1528 case Intrinsic::aarch64_sve_orr_z:
1529 case Intrinsic::aarch64_sve_orv:
1530 case Intrinsic::aarch64_sve_orqv:
1531 case Intrinsic::aarch64_sve_pnext:
1532 case Intrinsic::aarch64_sve_rdffr_z:
1533 case Intrinsic::aarch64_sve_saddv:
1534 case Intrinsic::aarch64_sve_uaddv:
1535 case Intrinsic::aarch64_sve_umaxv:
1536 case Intrinsic::aarch64_sve_umaxqv:
1537 case Intrinsic::aarch64_sve_cmpeq:
1538 case Intrinsic::aarch64_sve_cmpeq_wide:
1539 case Intrinsic::aarch64_sve_cmpge:
1540 case Intrinsic::aarch64_sve_cmpge_wide:
1541 case Intrinsic::aarch64_sve_cmpgt:
1542 case Intrinsic::aarch64_sve_cmpgt_wide:
1543 case Intrinsic::aarch64_sve_cmphi:
1544 case Intrinsic::aarch64_sve_cmphi_wide:
1545 case Intrinsic::aarch64_sve_cmphs:
1546 case Intrinsic::aarch64_sve_cmphs_wide:
1547 case Intrinsic::aarch64_sve_cmple_wide:
1548 case Intrinsic::aarch64_sve_cmplo_wide:
1549 case Intrinsic::aarch64_sve_cmpls_wide:
1550 case Intrinsic::aarch64_sve_cmplt_wide:
1551 case Intrinsic::aarch64_sve_cmpne:
1552 case Intrinsic::aarch64_sve_cmpne_wide:
1553 case Intrinsic::aarch64_sve_facge:
1554 case Intrinsic::aarch64_sve_facgt:
1555 case Intrinsic::aarch64_sve_fcmpeq:
1556 case Intrinsic::aarch64_sve_fcmpge:
1557 case Intrinsic::aarch64_sve_fcmpgt:
1558 case Intrinsic::aarch64_sve_fcmpne:
1559 case Intrinsic::aarch64_sve_fcmpuo:
1560 case Intrinsic::aarch64_sve_ld1:
1561 case Intrinsic::aarch64_sve_ld1_gather:
1562 case Intrinsic::aarch64_sve_ld1_gather_index:
1563 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1564 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1565 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1566 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1567 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1568 case Intrinsic::aarch64_sve_ld1q_gather_index:
1569 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1570 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1571 case Intrinsic::aarch64_sve_ld1ro:
1572 case Intrinsic::aarch64_sve_ld1rq:
1573 case Intrinsic::aarch64_sve_ld1udq:
1574 case Intrinsic::aarch64_sve_ld1uwq:
1575 case Intrinsic::aarch64_sve_ld2_sret:
1576 case Intrinsic::aarch64_sve_ld2q_sret:
1577 case Intrinsic::aarch64_sve_ld3_sret:
1578 case Intrinsic::aarch64_sve_ld3q_sret:
1579 case Intrinsic::aarch64_sve_ld4_sret:
1580 case Intrinsic::aarch64_sve_ld4q_sret:
1581 case Intrinsic::aarch64_sve_ldff1:
1582 case Intrinsic::aarch64_sve_ldff1_gather:
1583 case Intrinsic::aarch64_sve_ldff1_gather_index:
1584 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1585 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1586 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1587 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1588 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1589 case Intrinsic::aarch64_sve_ldnf1:
1590 case Intrinsic::aarch64_sve_ldnt1:
1591 case Intrinsic::aarch64_sve_ldnt1_gather:
1592 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1593 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1594 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1596
1597 case Intrinsic::aarch64_sve_prf:
1598 case Intrinsic::aarch64_sve_prfb_gather_index:
1599 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1600 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1601 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1602 case Intrinsic::aarch64_sve_prfd_gather_index:
1603 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1604 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1605 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1606 case Intrinsic::aarch64_sve_prfh_gather_index:
1607 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1608 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1609 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1610 case Intrinsic::aarch64_sve_prfw_gather_index:
1611 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1612 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1613 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1615
1616 case Intrinsic::aarch64_sve_st1_scatter:
1617 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1618 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1619 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1620 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1621 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1622 case Intrinsic::aarch64_sve_st1dq:
1623 case Intrinsic::aarch64_sve_st1q_scatter_index:
1624 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1625 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1626 case Intrinsic::aarch64_sve_st1wq:
1627 case Intrinsic::aarch64_sve_stnt1:
1628 case Intrinsic::aarch64_sve_stnt1_scatter:
1629 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1630 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1631 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1633 case Intrinsic::aarch64_sve_st2:
1634 case Intrinsic::aarch64_sve_st2q:
1636 case Intrinsic::aarch64_sve_st3:
1637 case Intrinsic::aarch64_sve_st3q:
1639 case Intrinsic::aarch64_sve_st4:
1640 case Intrinsic::aarch64_sve_st4q:
1642 }
1643
1645}
1646
1648 Value *UncastedPred;
1649
1650
1652 m_Value(UncastedPred)))) {
1654 Pred = UncastedPred;
1655
1657 m_Value(UncastedPred))))
1658
1659
1660 if (OrigPredTy->getMinNumElements() <=
1662 ->getMinNumElements())
1663 Pred = UncastedPred;
1664 }
1665
1667 return C && C->isAllOnesValue();
1668}
1669
1670
1671
1674 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1675 Dup->getOperand(1) == Pg && isa(Dup->getOperand(2)))
1679
1680 return V;
1681}
1682
1683static std::optional<Instruction *>
1688
1689 Value *Pg = II.getOperand(0);
1690 Value *Op1 = II.getOperand(1);
1691 Value *Op2 = II.getOperand(2);
1693
1694
1699 return &II;
1700 }
1701
1702
1705
1706 Value *SimpleII;
1708 SimpleII = simplifyBinOp(Opc, Op1, Op2, FII->getFastMathFlags(), DL);
1709 else
1711
1712
1713
1714
1715
1717 return std::nullopt;
1718
1721
1723
1724
1725 if (SimpleII == Inactive)
1727
1728
1731}
1732
1733
1734
1735static std::optional<Instruction *>
1739 return std::nullopt;
1740
1742
1743
1748
1752
1754 }
1755 }
1756
1757
1763 }
1764
1768 II.setCalledFunction(NewDecl);
1769 return &II;
1770 }
1771 }
1772
1773
1777
1778 return std::nullopt;
1779}
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790static std::optional<Instruction *>
1793 if (!BinOp)
1794 return std::nullopt;
1795
1796 auto IntrinsicID = BinOp->getIntrinsicID();
1797 switch (IntrinsicID) {
1798 case Intrinsic::aarch64_sve_and_z:
1799 case Intrinsic::aarch64_sve_bic_z:
1800 case Intrinsic::aarch64_sve_eor_z:
1801 case Intrinsic::aarch64_sve_nand_z:
1802 case Intrinsic::aarch64_sve_nor_z:
1803 case Intrinsic::aarch64_sve_orn_z:
1804 case Intrinsic::aarch64_sve_orr_z:
1805 break;
1806 default:
1807 return std::nullopt;
1808 }
1809
1810 auto BinOpPred = BinOp->getOperand(0);
1811 auto BinOpOp1 = BinOp->getOperand(1);
1812 auto BinOpOp2 = BinOp->getOperand(2);
1813
1815 if (!PredIntr ||
1816 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1817 return std::nullopt;
1818
1819 auto PredOp = PredIntr->getOperand(0);
1821 if (PredOpTy != II.getType())
1822 return std::nullopt;
1823
1826 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1827 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
1828 if (BinOpOp1 == BinOpOp2)
1829 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
1830 else
1832 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1833
1834 auto NarrowedBinOp =
1837}
1838
1839static std::optional<Instruction *>
1841
1844
1846 return BinOpCombine;
1847
1848
1851 return std::nullopt;
1852
1854 Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
1855
1857
1858
1859 while (Cursor) {
1860
1861
1862 const auto *CursorVTy = cast(Cursor->getType());
1863 if (CursorVTy->getElementCount().getKnownMinValue() <
1864 IVTy->getElementCount().getKnownMinValue())
1865 break;
1866
1867
1868 if (Cursor->getType() == IVTy)
1869 EarliestReplacement = Cursor;
1870
1872
1873
1874 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1875 Intrinsic::aarch64_sve_convert_to_svbool ||
1876 IntrinsicCursor->getIntrinsicID() ==
1877 Intrinsic::aarch64_sve_convert_from_svbool))
1878 break;
1879
1880 CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
1881 Cursor = IntrinsicCursor->getOperand(0);
1882 }
1883
1884
1885
1886 if (!EarliestReplacement)
1887 return std::nullopt;
1888
1890}
1891
1894
1895 auto *OpPredicate = II.getOperand(0);
1898
1902}
1903
1906 Value *Pg = II.getOperand(1);
1907
1908
1912 II.getArgOperand(2));
1914 }
1915
1918 return std::nullopt;
1919
1920
1922 II.getArgOperand(0), II.getArgOperand(2), uint64_t(0));
1924}
1925
1928
1931 II.getArgOperand(0));
1934}
1935
1939
1941 return std::nullopt;
1942
1943
1944 auto *SplatValue =
1946 if (!SplatValue || !SplatValue->isZero())
1947 return std::nullopt;
1948
1949
1951 if (!DupQLane ||
1952 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
1953 return std::nullopt;
1954
1955
1957 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
1958 return std::nullopt;
1959
1961 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
1962 return std::nullopt;
1963
1964
1965
1967 return std::nullopt;
1968
1970 return std::nullopt;
1971
1973 if (!ConstVec)
1974 return std::nullopt;
1975
1978 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
1979 return std::nullopt;
1980
1981 unsigned NumElts = VecTy->getNumElements();
1982 unsigned PredicateBits = 0;
1983
1984
1985 for (unsigned I = 0; I < NumElts; ++I) {
1987 if (!Arg)
1988 return std::nullopt;
1989 if (!Arg->isZero())
1990 PredicateBits |= 1 << (I * (16 / NumElts));
1991 }
1992
1993
1994 if (PredicateBits == 0) {
1996 PFalse->takeName(&II);
1998 }
1999
2000
2001 unsigned Mask = 8;
2002 for (unsigned I = 0; I < 16; ++I)
2003 if ((PredicateBits & (1 << I)) != 0)
2004 Mask |= (I % 8);
2005
2006 unsigned PredSize = Mask & -Mask;
2009
2010
2011 for (unsigned I = 0; I < 16; I += PredSize)
2012 if ((PredicateBits & (1 << I)) == 0)
2013 return std::nullopt;
2014
2015 auto *PTruePat =
2016 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
2018 {PredType}, {PTruePat});
2020 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2021 auto *ConvertFromSVBool =
2023 {II.getType()}, {ConvertToSVBool});
2024
2027}
2028
2031 Value *Pg = II.getArgOperand(0);
2032 Value *Vec = II.getArgOperand(1);
2033 auto IntrinsicID = II.getIntrinsicID();
2034 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2035
2036
2039
2040
2041
2046 auto OpC = OldBinOp->getOpcode();
2047 auto *NewLHS =
2049 auto *NewRHS =
2052 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), II.getIterator());
2054 }
2055 }
2056
2058 if (IsAfter && C && C->isNullValue()) {
2059
2062 Extract->insertBefore(II.getIterator());
2063 Extract->takeName(&II);
2065 }
2066
2068 if (!IntrPG)
2069 return std::nullopt;
2070
2071 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2072 return std::nullopt;
2073
2074 const auto PTruePattern =
2076
2077
2079 if (!MinNumElts)
2080 return std::nullopt;
2081
2082 unsigned Idx = MinNumElts - 1;
2083
2084
2085 if (IsAfter)
2086 ++Idx;
2087
2088
2089
2090
2092 if (Idx >= PgVTy->getMinNumElements())
2093 return std::nullopt;
2094
2095
2098 Extract->insertBefore(II.getIterator());
2099 Extract->takeName(&II);
2101}
2102
2105
2106
2107
2108
2109
2110
2111
2112 Value *Pg = II.getArgOperand(0);
2114 Value *Vec = II.getArgOperand(2);
2116
2117 if (!Ty->isIntegerTy())
2118 return std::nullopt;
2119
2122 default:
2123 return std::nullopt;
2124 case 16:
2126 break;
2127 case 32:
2129 break;
2130 case 64:
2132 break;
2133 }
2134
2140 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2143}
2144
2148
2149
2150 auto *AllPat =
2151 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
2153 {II.getType()}, {AllPat});
2154 auto *RDFFR =
2158}
2159
2160static std::optional<Instruction *>
2163
2164 if (Pattern == AArch64SVEPredPattern::all) {
2169 }
2170
2172
2173 return MinNumElts && NumElts >= MinNumElts
2175 II, ConstantInt::get(II.getType(), MinNumElts)))
2176 : std::nullopt;
2177}
2178
2179static std::optional<Instruction *>
2182 if (!ST->isStreaming())
2183 return std::nullopt;
2184
2185
2186
2191}
2192
2195 Value *PgVal = II.getArgOperand(0);
2196 Value *OpVal = II.getArgOperand(1);
2197
2198
2199
2200 if (PgVal == OpVal &&
2201 (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2202 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2203 Value *Ops[] = {PgVal, OpVal};
2205
2206 auto *PTest =
2209
2211 }
2212
2215
2216 if (!Pg || )
2217 return std::nullopt;
2218
2220
2221 if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2222 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2226
2228
2231 }
2232
2233
2234
2235
2236 if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2237 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2238 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2239 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2240 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2241 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2242 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2243 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2244 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2245 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2246 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2247 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2248 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2251
2254
2256 }
2257
2258 return std::nullopt;
2259}
2260
2261template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2262static std::optional<Instruction *>
2264 bool MergeIntoAddendOp) {
2265 Value *P = II.getOperand(0);
2266 Value *MulOp0, *MulOp1, *AddendOp, *Mul;
2267 if (MergeIntoAddendOp) {
2268 AddendOp = II.getOperand(1);
2270 } else {
2271 AddendOp = II.getOperand(2);
2273 }
2274
2277 return std::nullopt;
2278
2279 if (->hasOneUse())
2280 return std::nullopt;
2281
2283 if (II.getType()->isFPOrFPVectorTy()) {
2285
2286
2288 return std::nullopt;
2290 return std::nullopt;
2292 }
2293
2295 if (MergeIntoAddendOp)
2297 {P, AddendOp, MulOp0, MulOp1}, FMFSource);
2298 else
2300 {P, MulOp0, MulOp1, AddendOp}, FMFSource);
2301
2303}
2304
2305static std::optional<Instruction *>
2307 Value *Pred = II.getOperand(0);
2308 Value *PtrOp = II.getOperand(1);
2309 Type *VecTy = II.getType();
2310
2313 Load->copyMetadata(II);
2315 }
2316
2322}
2323
2324static std::optional<Instruction *>
2326 Value *VecOp = II.getOperand(0);
2327 Value *Pred = II.getOperand(1);
2328 Value *PtrOp = II.getOperand(2);
2329
2332 Store->copyMetadata(II);
2334 }
2335
2340}
2341
2344 case Intrinsic::aarch64_sve_fmul_u:
2345 return Instruction::BinaryOps::FMul;
2346 case Intrinsic::aarch64_sve_fadd_u:
2347 return Instruction::BinaryOps::FAdd;
2348 case Intrinsic::aarch64_sve_fsub_u:
2349 return Instruction::BinaryOps::FSub;
2350 default:
2351 return Instruction::BinaryOpsEnd;
2352 }
2353}
2354
2355static std::optional<Instruction *>
2357
2358 if (II.isStrictFP())
2359 return std::nullopt;
2360
2361 auto *OpPredicate = II.getOperand(0);
2363 if (BinOpCode == Instruction::BinaryOpsEnd ||
2365 return std::nullopt;
2367 BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
2369}
2370
2374 Intrinsic::aarch64_sve_mla>(
2375 IC, II, true))
2376 return MLA;
2378 Intrinsic::aarch64_sve_mad>(
2379 IC, II, false))
2380 return MAD;
2381 return std::nullopt;
2382}
2383
2384static std::optional<Instruction *>
2386 if (auto FMLA =
2388 Intrinsic::aarch64_sve_fmla>(IC, II,
2389 true))
2390 return FMLA;
2391 if (auto FMAD =
2393 Intrinsic::aarch64_sve_fmad>(IC, II,
2394 false))
2395 return FMAD;
2396 if (auto FMLA =
2398 Intrinsic::aarch64_sve_fmla>(IC, II,
2399 true))
2400 return FMLA;
2401 return std::nullopt;
2402}
2403
2404static std::optional<Instruction *>
2406 if (auto FMLA =
2408 Intrinsic::aarch64_sve_fmla>(IC, II,
2409 true))
2410 return FMLA;
2411 if (auto FMAD =
2413 Intrinsic::aarch64_sve_fmad>(IC, II,
2414 false))
2415 return FMAD;
2416 if (auto FMLA_U =
2418 Intrinsic::aarch64_sve_fmla_u>(
2419 IC, II, true))
2420 return FMLA_U;
2422}
2423
2424static std::optional<Instruction *>
2426 if (auto FMLS =
2428 Intrinsic::aarch64_sve_fmls>(IC, II,
2429 true))
2430 return FMLS;
2431 if (auto FMSB =
2433 Intrinsic::aarch64_sve_fnmsb>(
2434 IC, II, false))
2435 return FMSB;
2436 if (auto FMLS =
2438 Intrinsic::aarch64_sve_fmls>(IC, II,
2439 true))
2440 return FMLS;
2441 return std::nullopt;
2442}
2443
2444static std::optional<Instruction *>
2446 if (auto FMLS =
2448 Intrinsic::aarch64_sve_fmls>(IC, II,
2449 true))
2450 return FMLS;
2451 if (auto FMSB =
2453 Intrinsic::aarch64_sve_fnmsb>(
2454 IC, II, false))
2455 return FMSB;
2456 if (auto FMLS_U =
2458 Intrinsic::aarch64_sve_fmls_u>(
2459 IC, II, true))
2460 return FMLS_U;
2462}
2463
2467 Intrinsic::aarch64_sve_mls>(
2468 IC, II, true))
2469 return MLS;
2470 return std::nullopt;
2471}
2472
2475 Value *UnpackArg = II.getArgOperand(0);
2477 bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2478 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2479
2480
2481
2482 if (auto *ScalarArg = getSplatValue(UnpackArg)) {
2483 ScalarArg =
2489 }
2490
2491 return std::nullopt;
2492}
2495 auto *OpVal = II.getOperand(0);
2496 auto *OpIndices = II.getOperand(1);
2498
2499
2500
2502 if (!SplatValue ||
2503 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2504 return std::nullopt;
2505
2506
2507
2509 auto *VectorSplat =
2511
2514}
2515
2519 Type *RetTy = II.getType();
2520 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2521 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2522
2523
2524
2525 if ((match(II.getArgOperand(0),
2527 match(II.getArgOperand(1),
2532 if (TyA == B->getType() &&
2537 TyA->getMinNumElements());
2540 }
2541 }
2542
2543 return std::nullopt;
2544}
2545
2548
2549
2551 if (match(II.getArgOperand(0),
2556 II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
2557
2558 return std::nullopt;
2559}
2560
2561static std::optional<Instruction *>
2563 Value *Mask = II.getOperand(0);
2564 Value *BasePtr = II.getOperand(1);
2565 Value *Index = II.getOperand(2);
2568
2569
2570
2571
2572 Value *IndexBase;
2575 Align Alignment =
2576 BasePtr->getPointerAlignment(II.getDataLayout());
2577
2579 BasePtr, IndexBase);
2584 }
2585
2586 return std::nullopt;
2587}
2588
2589static std::optional<Instruction *>
2591 Value *Val = II.getOperand(0);
2592 Value *Mask = II.getOperand(1);
2593 Value *BasePtr = II.getOperand(2);
2594 Value *Index = II.getOperand(3);
2596
2597
2598
2599
2600 Value *IndexBase;
2603 Align Alignment =
2604 BasePtr->getPointerAlignment(II.getDataLayout());
2605
2607 BasePtr, IndexBase);
2609
2611 }
2612
2613 return std::nullopt;
2614}
2615
2619 Value *Pred = II.getOperand(0);
2620 Value *Vec = II.getOperand(1);
2621 Value *DivVec = II.getOperand(2);
2622
2625 if (!SplatConstantInt)
2626 return std::nullopt;
2627
2629 const int64_t DivisorValue = Divisor.getSExtValue();
2630 if (DivisorValue == -1)
2631 return std::nullopt;
2632 if (DivisorValue == 1)
2634
2638 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
2640 }
2645 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
2647 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2649 }
2650
2651 return std::nullopt;
2652}
2653
2655 size_t VecSize = Vec.size();
2656 if (VecSize == 1)
2657 return true;
2659 return false;
2660 size_t HalfVecSize = VecSize / 2;
2661
2662 for (auto LHS = Vec.begin(), RHS = Vec.begin() + HalfVecSize;
2664 if (*LHS != nullptr && *RHS != nullptr) {
2666 continue;
2667 else
2668 return false;
2669 }
2670 if (!AllowPoison)
2671 return false;
2672 if (*LHS == nullptr && *RHS != nullptr)
2674 }
2675
2676 Vec.resize(HalfVecSize);
2678 return true;
2679}
2680
2681
2682
2685 Value *CurrentInsertElt = nullptr, *Default = nullptr;
2686 if ((II.getOperand(0),
2690 return std::nullopt;
2692
2693
2697 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2698 CurrentInsertElt = InsertElt->getOperand(0);
2699 }
2700
2701 bool AllowPoison =
2704 return std::nullopt;
2705
2706
2708 for (size_t I = 0; I < Elts.size(); I++) {
2709 if (Elts[I] == nullptr)
2710 continue;
2713 }
2714 if (InsertEltChain == nullptr)
2715 return std::nullopt;
2716
2717
2718
2719
2720
2721 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.size();
2722 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2723 IIScalableTy->getMinNumElements() /
2724 PatternWidth;
2725
2728 auto *WideShuffleMaskTy =
2730
2734 auto WideBitcast =
2738 WideBitcast, PoisonValue::get(WideScalableTy), WideShuffleMask);
2739 auto NarrowBitcast =
2741
2743}
2744
2747 Value *A = II.getArgOperand(0);
2748 Value *B = II.getArgOperand(1);
2751
2752 return std::nullopt;
2753}
2754
2757 Value *Pred = II.getOperand(0);
2758 Value *Vec = II.getOperand(1);
2759 Value *Shift = II.getOperand(2);
2760
2761
2762 Value *AbsPred, *MergedValue;
2767
2768 return std::nullopt;
2769
2770
2771
2772
2773
2776 return std::nullopt;
2777
2778
2779
2781 return std::nullopt;
2782
2784 {II.getType()}, {Pred, Vec, Shift});
2785
2787}
2788
2791 Value *Vec = II.getOperand(0);
2792
2795
2796 return std::nullopt;
2797}
2798
2801
2802 auto *NI = II.getNextNode();
2805 return ->mayReadOrWriteMemory() &&
->mayHaveSideEffects();
2806 };
2807 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2808 auto *NIBB = NI->getParent();
2809 NI = NI->getNextNode();
2810 if (!NI) {
2811 if (auto *SuccBB = NIBB->getUniqueSuccessor())
2812 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2813 else
2814 break;
2815 }
2816 }
2818 if (NextII && II.isIdenticalTo(NextII))
2820
2821 return std::nullopt;
2822}
2823
2829 {II.getType(), II.getOperand(0)->getType()},
2830 {II.getOperand(0), II.getOperand(1)}));
2831}
2832
2837 return std::nullopt;
2838}
2839
2842 unsigned NumBits) {
2843 Value *Passthru = II.getOperand(0);
2844 Value *Pg = II.getOperand(1);
2846
2847
2851 auto *Mask = ConstantInt::get(Ty, MaskValue);
2853 {Pg, Op, Mask});
2855 }
2856
2857 return std::nullopt;
2858}
2859
2860static std::optional<Instruction *>
2862 SMEAttrs FnSMEAttrs(*II.getFunction());
2867 return std::nullopt;
2868}
2869
2870std::optional<Instruction *>
2875 return I;
2876
2878 switch (IID) {
2879 default:
2880 break;
2881 case Intrinsic::aarch64_dmb:
2883 case Intrinsic::aarch64_neon_fmaxnm:
2884 case Intrinsic::aarch64_neon_fminnm:
2886 case Intrinsic::aarch64_sve_convert_from_svbool:
2888 case Intrinsic::aarch64_sve_dup:
2890 case Intrinsic::aarch64_sve_dup_x:
2892 case Intrinsic::aarch64_sve_cmpne:
2893 case Intrinsic::aarch64_sve_cmpne_wide:
2895 case Intrinsic::aarch64_sve_rdffr:
2897 case Intrinsic::aarch64_sve_lasta:
2898 case Intrinsic::aarch64_sve_lastb:
2900 case Intrinsic::aarch64_sve_clasta_n:
2901 case Intrinsic::aarch64_sve_clastb_n:
2903 case Intrinsic::aarch64_sve_cntd:
2905 case Intrinsic::aarch64_sve_cntw:
2907 case Intrinsic::aarch64_sve_cnth:
2909 case Intrinsic::aarch64_sve_cntb:
2911 case Intrinsic::aarch64_sme_cntsd:
2913 case Intrinsic::aarch64_sve_ptest_any:
2914 case Intrinsic::aarch64_sve_ptest_first:
2915 case Intrinsic::aarch64_sve_ptest_last:
2917 case Intrinsic::aarch64_sve_fadd:
2919 case Intrinsic::aarch64_sve_fadd_u:
2921 case Intrinsic::aarch64_sve_fmul_u:
2923 case Intrinsic::aarch64_sve_fsub:
2925 case Intrinsic::aarch64_sve_fsub_u:
2927 case Intrinsic::aarch64_sve_add:
2929 case Intrinsic::aarch64_sve_add_u:
2931 Intrinsic::aarch64_sve_mla_u>(
2932 IC, II, true);
2933 case Intrinsic::aarch64_sve_sub:
2935 case Intrinsic::aarch64_sve_sub_u:
2937 Intrinsic::aarch64_sve_mls_u>(
2938 IC, II, true);
2939 case Intrinsic::aarch64_sve_tbl:
2941 case Intrinsic::aarch64_sve_uunpkhi:
2942 case Intrinsic::aarch64_sve_uunpklo:
2943 case Intrinsic::aarch64_sve_sunpkhi:
2944 case Intrinsic::aarch64_sve_sunpklo:
2946 case Intrinsic::aarch64_sve_uzp1:
2948 case Intrinsic::aarch64_sve_zip1:
2949 case Intrinsic::aarch64_sve_zip2:
2951 case Intrinsic::aarch64_sve_ld1_gather_index:
2953 case Intrinsic::aarch64_sve_st1_scatter_index:
2955 case Intrinsic::aarch64_sve_ld1:
2957 case Intrinsic::aarch64_sve_st1:
2959 case Intrinsic::aarch64_sve_sdiv:
2961 case Intrinsic::aarch64_sve_sel:
2963 case Intrinsic::aarch64_sve_srshl:
2965 case Intrinsic::aarch64_sve_dupq_lane:
2967 case Intrinsic::aarch64_sve_insr:
2969 case Intrinsic::aarch64_sve_whilelo:
2971 case Intrinsic::aarch64_sve_ptrue:
2973 case Intrinsic::aarch64_sve_uxtb:
2975 case Intrinsic::aarch64_sve_uxth:
2977 case Intrinsic::aarch64_sve_uxtw:
2979 case Intrinsic::aarch64_sme_in_streaming_mode:
2981 }
2982
2983 return std::nullopt;
2984}
2985
2990 SimplifyAndSetOp) const {
2991 switch (II.getIntrinsicID()) {
2992 default:
2993 break;
2994 case Intrinsic::aarch64_neon_fcvtxn:
2995 case Intrinsic::aarch64_neon_rshrn:
2996 case Intrinsic::aarch64_neon_sqrshrn:
2997 case Intrinsic::aarch64_neon_sqrshrun:
2998 case Intrinsic::aarch64_neon_sqshrn:
2999 case Intrinsic::aarch64_neon_sqshrun:
3000 case Intrinsic::aarch64_neon_sqxtn:
3001 case Intrinsic::aarch64_neon_sqxtun:
3002 case Intrinsic::aarch64_neon_uqrshrn:
3003 case Intrinsic::aarch64_neon_uqshrn:
3004 case Intrinsic::aarch64_neon_uqxtn:
3005 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);
3006 break;
3007 }
3008
3009 return std::nullopt;
3010}
3011
3013 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3015}
3016
3019 switch (K) {
3023 if (ST->useSVEForFixedLengthVectors() &&
3026 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3027 else if (ST->isNeonAvailable())
3029 else
3032 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3035 else
3037 }
3039}
3040
3041bool AArch64TTIImpl::isSingleExtWideningInstruction(
3043 Type *SrcOverrideTy) const {
3044
3045
3049 };
3050
3051
3052
3053
3054
3055
3057 if ((DstTy) || Args.size() != 2 ||
3058 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3059 return false;
3060
3061 Type *SrcTy = SrcOverrideTy;
3062 switch (Opcode) {
3063 case Instruction::Add:
3064 case Instruction::Sub: {
3065
3067 if (!SrcTy)
3068 SrcTy =
3070 break;
3071 }
3072
3073 if (Opcode == Instruction::Sub)
3074 return false;
3075
3076
3078 if (!SrcTy)
3079 SrcTy =
3081 break;
3082 }
3083 return false;
3084 }
3085 default:
3086 return false;
3087 }
3088
3089
3090
3092 if (!DstTyL.second.isVector() || DstEltSize != DstTy->getScalarSizeInBits())
3093 return false;
3094
3095
3096
3097 assert(SrcTy && "Expected some SrcTy");
3099 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3100 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
3101 return false;
3102
3103
3105 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3107 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3108
3109
3110
3111 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3112}
3113
3114Type *AArch64TTIImpl::isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,
3116 Type *SrcOverrideTy) const {
3117 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3118 Opcode != Instruction::Mul)
3119 return nullptr;
3120
3121
3122
3123
3124
3125
3128 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3129 return nullptr;
3130
3131 auto getScalarSizeWithOverride = [&](const Value *V) {
3132 if (SrcOverrideTy)
3135 ->getOperand(0)
3136 ->getType()
3137 ->getScalarSizeInBits();
3138 };
3139
3140 unsigned MaxEltSize = 0;
3143 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3144 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3145 MaxEltSize = std::max(EltSize0, EltSize1);
3148 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3149 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3150
3151
3152 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3153 return nullptr;
3154 MaxEltSize = DstEltSize / 2;
3155 } else if (Opcode == Instruction::Mul &&
3157
3158
3159
3160 KnownBits Known =
3165 return nullptr;
3166
3167 MaxEltSize =
3168 getScalarSizeWithOverride(isa(Args[0]) ? Args[0] : Args[1]);
3169 } else
3170 return nullptr;
3171
3172 if (MaxEltSize * 2 > DstEltSize)
3173 return nullptr;
3174
3177 return nullptr;
3178 return ExtTy;
3179}
3180
3181
3182
3183
3184
3185
3186
3188 Type *Src) const {
3189
3190 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(DL, Src)) ||
3191 (Src->isScalableTy() && !ST->hasSVE2()))
3192 return false;
3193
3194 if (ExtUser->getOpcode() != Instruction::Add || !ExtUser->hasOneUse())
3195 return false;
3196
3197
3199 auto *AddUser =
3201 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3202 Add = AddUser;
3203
3205 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3206 return false;
3207
3209 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3210 Src->getScalarSizeInBits() !=
3211 cast(Trunc)->getDestTy()->getScalarSizeInBits())
3212 return false;
3213
3214
3215
3219 return false;
3220
3221
3224 return true;
3225
3226 return false;
3227}
3228
3234 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3235 assert(ISD && "Invalid opcode");
3236
3237
3238 if (I && I->hasOneUser()) {
3241 if (Type *ExtTy = isBinExtWideningInstruction(
3242 SingleUser->getOpcode(), Dst, Operands,
3243 Src != I->getOperand(0)->getType() ? Src : nullptr)) {
3244
3245
3247 Type *DoubleSrcTy =
3251 }
3252
3253 return 0;
3254 }
3255
3256 if (isSingleExtWideningInstruction(
3257 SingleUser->getOpcode(), Dst, Operands,
3258 Src != I->getOperand(0)->getType() ? Src : nullptr)) {
3259
3260
3261
3262 if (SingleUser->getOpcode() == Instruction::Add) {
3263 if (I == SingleUser->getOperand(1) ||
3265 cast(SingleUser->getOperand(1))->getOpcode() == Opcode))
3266 return 0;
3267 } else {
3268
3269
3270 return 0;
3271 }
3272 }
3273
3274
3277 return 0;
3278 }
3279
3280
3283 return Cost == 0 ? 0 : 1;
3284 return Cost;
3285 };
3286
3287 EVT SrcTy = TLI->getValueType(DL, Src);
3288 EVT DstTy = TLI->getValueType(DL, Dst);
3289
3290 if (!SrcTy.isSimple() || !DstTy.isSimple())
3291 return AdjustCost(
3293
3294
3295
3296 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3300
3302 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1},
3303 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1},
3304 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1},
3305 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2},
3306 {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2},
3307 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3},
3308 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6},
3309 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1},
3310 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1},
3311 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3},
3312 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2},
3313 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5},
3314 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11},
3315 };
3316
3317 if (ST->hasBF16())
3319 BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
3320 return AdjustCost(Entry->Cost);
3321
3322
3323
3324
3325
3326
3327
3328 const unsigned int SVE_EXT_COST = 1;
3329 const unsigned int SVE_FCVT_COST = 1;
3330 const unsigned int SVE_UNPACK_ONCE = 4;
3331 const unsigned int SVE_UNPACK_TWICE = 16;
3332
3334 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1},
3335 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1},
3336 {ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1},
3337 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1},
3338 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 3},
3339 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1},
3340 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2},
3341 {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1},
3342 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1},
3343 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2},
3344 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 4},
3345 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1},
3346 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 3},
3347 {ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 2},
3348 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 1},
3349 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 3},
3350 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 7},
3351 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2},
3352 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i64, 6},
3353 {ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4},
3354
3355
3386 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},
3387 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},
3388 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},
3389
3390
3407
3408
3409 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},
3410 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},
3411 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},
3412
3413 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},
3414 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},
3415 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
3416 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},
3417 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},
3418 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},
3419 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},
3420
3421 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},
3422 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},
3423 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},
3424 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},
3425 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},
3426 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},
3427 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},
3428
3430 {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1},
3431 {ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 2},
3432
3435 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, 1},
3436 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, 2},
3437 {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2},
3438 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3},
3439 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6},
3440
3441 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8},
3442 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9},
3449
3450
3457
3458
3460 SVE_EXT_COST + SVE_FCVT_COST},
3461 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
3462 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
3463 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
3465 SVE_EXT_COST + SVE_FCVT_COST},
3466 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
3467 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
3468 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
3469
3470
3472 SVE_EXT_COST + SVE_FCVT_COST},
3473 {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
3474 {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
3476 SVE_EXT_COST + SVE_FCVT_COST},
3477 {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
3478 {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
3479
3480
3482 SVE_EXT_COST + SVE_FCVT_COST},
3483 {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
3485 SVE_EXT_COST + SVE_FCVT_COST},
3486 {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
3487
3488
3490 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3492 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3493
3494
3499
3500
3502 SVE_EXT_COST + SVE_FCVT_COST},
3503 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3504 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3505 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3507 SVE_EXT_COST + SVE_FCVT_COST},
3508 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3509 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3510 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3511
3512
3517
3518
3520 SVE_EXT_COST + SVE_FCVT_COST},
3521 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3522 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3524 SVE_EXT_COST + SVE_FCVT_COST},
3525 {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3526 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3527
3528
3533
3534
3536 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3538 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3540 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3542 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3543
3544
3546 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3548 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3549
3550
3553
3554
3561
3562
3564 SVE_EXT_COST + SVE_FCVT_COST},
3565 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3566 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3567 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3569 SVE_EXT_COST + SVE_FCVT_COST},
3570 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3571 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3572 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3573
3574
3577
3578
3580 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3582 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3584 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3586 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3588 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3590 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3591
3592
3594 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3596 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3598 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3600 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3601
3602
3609
3610
3617
3618
3623
3624
3633
3634
3641
3642
3653
3654
3665
3666
3671
3672
3679
3680
3685
3686
3697
3698
3707
3708
3717
3718
3722
3723
3724 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},
3725 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},
3726 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},
3727
3728
3732
3733
3734 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},
3735 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},
3736 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},
3737
3738
3742
3743
3744 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},
3745 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
3746 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
3747
3748
3749 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},
3750 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},
3751 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},
3752
3753
3754 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
3755 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
3756 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
3757
3758
3759 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},
3760 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},
3761 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},
3762
3763
3764 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
3765 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
3766 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
3767
3768
3769 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},
3770 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},
3771 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},
3772
3773
3774 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},
3775 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},
3776 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},
3777
3778
3779
3780
3787
3794 };
3795
3796
3797
3798
3799 EVT WiderTy = SrcTy.bitsGT(DstTy) ? SrcTy : DstTy;
3802 ST->useSVEForFixedLengthVectors(WiderTy)) {
3803 std::pair<InstructionCost, MVT> LT =
3805 unsigned NumElements =
3807 return AdjustCost(
3808 LT.first *
3813 }
3814
3816 ConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
3817 return AdjustCost(Entry->Cost);
3818
3824 {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2},
3826 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2},
3830 {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4},
3832 {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3},
3834 {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2},
3836 {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8},
3838 {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},
3839 {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},
3840 {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},
3841 {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},
3842 };
3843
3844 if (ST->hasFullFP16())
3846 FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
3847 return AdjustCost(Entry->Cost);
3848
3849
3850
3854 return AdjustCost(
3856 getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(),
3862
3865 ST->isSVEorStreamingSVEAvailable() &&
3866 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3868 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3870
3871
3872
3873
3877 Opcode, LegalTy, Src, CCH, CostKind, I);
3880 return Part1 + Part2;
3881 }
3882
3883
3884
3887 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3889
3890 return AdjustCost(
3892}
3893
3898
3899
3900 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3901 "Invalid opcode");
3902
3903
3904
3906
3907
3909
3910
3911
3913 CostKind, Index, nullptr, nullptr);
3914
3915
3917 auto DstVT = TLI->getValueType(DL, Dst);
3918 auto SrcVT = TLI->getValueType(DL, Src);
3919
3920
3921
3922
3923 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
3926
3927
3928
3929 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
3932
3933 switch (Opcode) {
3934 default:
3936
3937
3938
3939 case Instruction::SExt:
3940 return Cost;
3941
3942
3943
3944 case Instruction::ZExt:
3945 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
3946 return Cost;
3947 }
3948
3949
3952}
3953
3958 return Opcode == Instruction::PHI ? 0 : 1;
3960
3961 return 0;
3962}
3963
3964InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(
3967 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
3969
3970 if (Index != -1U) {
3971
3973
3974
3975 if (!LT.second.isVector())
3976 return 0;
3977
3978
3979
3980 if (LT.second.isFixedLengthVector()) {
3981 unsigned Width = LT.second.getVectorNumElements();
3982 Index = Index % Width;
3983 }
3984
3985
3986
3987
3988
3990 return 0;
3991
3992
3993
3994
3995
3998 ? 0
4000
4001
4002
4005 ? 2
4007
4008
4009
4010
4011
4012
4013 }
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030 auto ExtractCanFuseWithFmul = [&]() {
4031
4032 if (Index == 0)
4033 return false;
4034
4035
4036
4037 auto IsAllowedScalarTy = [&](const Type *T) {
4038 return T->isFloatTy() || T->isDoubleTy() ||
4039 (T->isHalfTy() && ST->hasFullFP16());
4040 };
4041
4042
4043 auto IsUserFMulScalarTy = [](const Value *EEUser) {
4044
4046 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4047 !BO->getType()->isVectorTy();
4048 };
4049
4050
4051
4052 auto IsExtractLaneEquivalentToZero = [&](unsigned Idx, unsigned EltSz) {
4053 auto RegWidth =
4056 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4057 };
4058
4059
4060
4062 return false;
4063
4064 if (Scalar) {
4065 DenseMap<User *, unsigned> UserToExtractIdx;
4066 for (auto *U : Scalar->users()) {
4067 if (!IsUserFMulScalarTy(U))
4068 return false;
4069
4070
4071 UserToExtractIdx[U];
4072 }
4073 if (UserToExtractIdx.empty())
4074 return false;
4075 for (auto &[S, U, L] : ScalarUserAndIdx) {
4076 for (auto *U : S->users()) {
4077 if (UserToExtractIdx.contains(U)) {
4079 auto *Op0 = FMul->getOperand(0);
4080 auto *Op1 = FMul->getOperand(1);
4081 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4083 break;
4084 }
4085 }
4086 }
4087 }
4088 for (auto &[U, L] : UserToExtractIdx) {
4089 if (!IsExtractLaneEquivalentToZero(Index, Val->getScalarSizeInBits()) &&
4091 return false;
4092 }
4093 } else {
4095
4097 if (!IdxOp)
4098 return false;
4099
4100 return !EE->users().empty() && all_of(EE->users(), [&](const User *U) {
4101 if (!IsUserFMulScalarTy(U))
4102 return false;
4103
4104
4105
4106 const auto *BO = cast(U);
4107 const auto *OtherEE = dyn_cast(
4108 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4109 if (OtherEE) {
4110 const auto *IdxOp = dyn_cast(OtherEE->getIndexOperand());
4111 if (!IdxOp)
4112 return false;
4113 return IsExtractLaneEquivalentToZero(
4114 cast(OtherEE->getIndexOperand())
4115 ->getValue()
4116 .getZExtValue(),
4117 OtherEE->getType()->getScalarSizeInBits());
4118 }
4119 return true;
4120 });
4121 }
4122 return true;
4123 };
4124
4125 if (Opcode == Instruction::ExtractElement && (I || Scalar) &&
4126 ExtractCanFuseWithFmul())
4127 return 0;
4128
4129
4131 : ST->getVectorInsertExtractBaseCost();
4132}
4133
4136 unsigned Index,
4137 const Value *Op0,
4138 const Value *Op1) const {
4139
4140
4141
4142 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4144 return 0;
4145 return getVectorInstrCostHelper(Opcode, Val, CostKind, Index);
4146}
4147
4151 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
4152 return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, nullptr, Scalar,
4153 ScalarUserAndIdx);
4154}
4155
4159 unsigned Index) const {
4160 return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index, &I);
4161}
4162
4166 unsigned Index) const {
4169 Index);
4170
4171
4172
4173
4174
4175
4177 ? 2
4178 : ST->getVectorInsertExtractBaseCost() + 1;
4179}
4180
4182 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
4187 if (Ty->getElementType()->isFloatingPointTy())
4190 unsigned VecInstCost =
4192 return DemandedElts.popcount() * (Insert + Extract) * VecInstCost;
4193}
4194
4199 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4200 return std::nullopt;
4201 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4202 return std::nullopt;
4203 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4204 ST->isNonStreamingSVEorSME2Available())
4205 return std::nullopt;
4206
4212 Cost += InstCost(PromotedTy);
4213 if (IncludeTrunc)
4216 return Cost;
4217}
4218
4223
4224
4225
4226
4227
4231
4232
4235 Op2Info, Args, CxtI);
4236
4237
4239 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4240
4241
4242
4246 Ty, CostKind, Op1Info, Op2Info, true,
4247
4249 [&](Type *PromotedTy) {
4251 Op1Info, Op2Info);
4252 }))
4253 return *PromotedCost;
4254
4255
4256
4257
4258
4259 if (Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4260 if (ExtTy != Ty)
4264 return LT.first;
4265 }
4266
4267 switch (ISD) {
4268 default:
4270 Op2Info);
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4324
4325
4326 auto VT = TLI->getValueType(DL, Ty);
4327 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4329
4330 return ISD == ISD::SDIV ? (3 * AddCost + AsrCost)
4331 : (3 * AsrCost + AddCost);
4332 } else {
4333 return MulCost + AsrCost + 2 * AddCost;
4334 }
4335 } else if (VT.isVector()) {
4338
4339
4340
4341
4342
4345 if (Ty->isScalableTy() && ST->hasSVE())
4346 Cost += 2 * AsrCost;
4347 else {
4349 UsraCost +
4351 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4352 : 2 * AddCost);
4353 }
4354 return Cost;
4355 } else if (LT.second == MVT::v2i64) {
4356 return VT.getVectorNumElements() *
4360 } else {
4361
4362
4363 if (Ty->isScalableTy() && ST->hasSVE())
4364 return MulCost + 2 * AddCost + 2 * AsrCost;
4365 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4366 }
4367 }
4368 }
4370 LT.second.isFixedLengthVector()) {
4371
4372
4373
4374
4375 auto ExtractCost = 2 * getVectorInstrCost(Instruction::ExtractElement, Ty,
4376 CostKind, -1, nullptr, nullptr);
4377 auto InsertCost = getVectorInstrCost(Instruction::InsertElement, Ty,
4378 CostKind, -1, nullptr, nullptr);
4380 return ExtractCost + InsertCost +
4384 }
4385 [[fallthrough]];
4388 auto VT = TLI->getValueType(DL, Ty);
4390
4399
4401
4402
4403
4404 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4405 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4406 LT.second == MVT::nxv16i8;
4407 bool Is128bit = LT.second.is128BitVector();
4408
4418 InstructionCost DivCost = MulCost * (Is128bit ? 2 : 1) +
4419 (HasMULH ? 0 : ShrCost) +
4420 AddCost * 2 + ShrCost;
4421 return DivCost + (ISD == ISD::UREM ? MulCost + AddCost : 0);
4422 }
4423 }
4424
4425
4426
4427
4428 if (!VT.isVector() && VT.getSizeInBits() > 64)
4430
4432 Opcode, Ty, CostKind, Op1Info, Op2Info);
4434 if (TLI->isOperationLegalOrCustom(ISD, LT.second) && ST->hasSVE()) {
4435
4436
4438 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4446
4448 if (nullptr != Entry)
4449 return Entry->Cost;
4450 }
4451
4452
4453 if (LT.second.getScalarType() == MVT::i8)
4455 else if (LT.second.getScalarType() == MVT::i16)
4457 return Cost;
4458 } else {
4459
4460
4461
4462
4467 Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info);
4468 return (4 + DivCost) * VTy->getNumElements();
4469 }
4470 }
4471
4472
4474 -1, nullptr, nullptr);
4476 nullptr, nullptr);
4477 }
4478
4479
4480
4482 }
4483 return Cost;
4484 }
4486
4487
4488 if (LT.second == MVT::v2i64 && ST->hasSVE())
4489 return LT.first;
4490
4491
4492
4493
4494
4495
4496
4497
4498
4499
4500
4501 if (LT.second != MVT::v2i64)
4502 return LT.first;
4503 return cast(Ty)->getElementCount().getKnownMinValue() *
4506 nullptr, nullptr) *
4507 2 +
4509 nullptr, nullptr));
4517
4518
4519 return LT.first;
4520
4521 case ISD::FNEG:
4522
4523 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4524 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4525 CxtI &&
4529 return 0;
4530 [[fallthrough]];
4533 if (!Ty->getScalarType()->isFP128Ty())
4534 return LT.first;
4535 [[fallthrough]];
4538
4539
4540 if (!Ty->getScalarType()->isFP128Ty())
4541 return 2 * LT.first;
4542
4544 Op2Info);
4546
4547
4548 if (!Ty->isVectorTy())
4551 Op2Info);
4552 }
4553}
4554
4557 const SCEV *Ptr,
4559
4560
4561
4562
4564 int MaxMergeDistance = 64;
4565
4568 return NumVectorInstToHideOverhead;
4569
4570
4571
4572 return 1;
4573}
4574
4575
4576
4578 unsigned Opcode1, unsigned Opcode2) const {
4581 if (.hasInstrSchedModel())
4582 return false;
4583
4585 Sched.getSchedClassDesc(TII->get(Opcode1).getSchedClass());
4587 Sched.getSchedClassDesc(TII->get(Opcode2).getSchedClass());
4588
4589
4590
4591
4593 "Cannot handle variant scheduling classes without an MI");
4595 return false;
4596
4599}
4600
4605
4606
4608
4609 const int AmortizationCost = 20;
4610
4611
4612
4617 VecPred = CurrentPred;
4618 }
4619
4620
4625 static const auto ValidMinMaxTys = {
4626 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4627 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4628 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4629
4631 if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }) ||
4632 (ST->hasFullFP16() &&
4633 any_of(ValidFP16MinMaxTys, [<](MVT M) { return M == LT.second; })))
4634 return LT.first;
4635 }
4636
4638 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4639 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4640 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4641 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4642 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4643 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4644 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4645 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4646 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4647 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4648 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4649
4650 EVT SelCondTy = TLI->getValueType(DL, CondTy);
4651 EVT SelValTy = TLI->getValueType(DL, ValTy);
4656 return Entry->Cost;
4657 }
4658 }
4659
4660 if (Opcode == Instruction::FCmp) {
4662 ValTy, CostKind, Op1Info, Op2Info, false,
4663
4664 false, [&](Type *PromotedTy) {
4667 CostKind, Op1Info, Op2Info);
4670 Instruction::Trunc,
4674 return Cost;
4675 }))
4676 return *PromotedCost;
4677
4679
4680 if (LT.second.getScalarType() != MVT::f64 &&
4681 LT.second.getScalarType() != MVT::f32 &&
4682 LT.second.getScalarType() != MVT::f16)
4683 return LT.first * getCallInstrCost( nullptr, ValTy,
4685
4686
4687 unsigned Factor = 1;
4690 Factor = 2;
4694 Factor = 3;
4697 Factor = 3;
4698
4702 AArch64::FCMEQv4f32))
4703 Factor *= 2;
4704
4706 }
4707
4708
4709
4710
4711
4714 TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
4717 return 0;
4718
4719
4722 return 0;
4723
4724
4727 return 0;
4728 }
4729
4730
4731
4733 Op1Info, Op2Info, I);
4734}
4735
4739 if (ST->requiresStrictAlign()) {
4740
4741
4743 }
4744 Options.AllowOverlappingLoads = true;
4745 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4747
4748
4749
4750 Options.LoadSizes = {8, 4, 2, 1};
4751 Options.AllowedTailExpansions = {3, 5, 6};
4753}
4754
4756 return ST->hasSVE();
4757}
4758
4762 switch (MICA.getID()) {
4763 case Intrinsic::masked_scatter:
4764 case Intrinsic::masked_gather:
4766 case Intrinsic::masked_load:
4767 case Intrinsic::masked_store:
4769 }
4771}
4772
4777
4781 if (!LT.first.isValid())
4783
4784
4786 if (VT->getElementType()->isIntegerTy(1))
4788
4789
4790
4791
4792
4795
4796 return LT.first;
4797}
4798
4799
4800
4803 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4804 "Should be called on only load or stores.");
4805 switch (Opcode) {
4806 case Instruction::Load:
4809 return ST->getGatherOverhead();
4810 break;
4811 case Instruction::Store:
4814 return ST->getScatterOverhead();
4815 break;
4816 default:
4818 }
4819}
4820
4824
4825 unsigned Opcode = (MICA.getID() == Intrinsic::masked_gather ||
4826 MICA.getID() == Intrinsic::vp_gather)
4827 ? Instruction::Load
4828 : Instruction::Store;
4829
4833
4838 if (!LT.first.isValid())
4840
4841
4842 if (!LT.second.isVector() ||
4844 VT->getElementType()->isIntegerTy(1))
4846
4847
4848
4849
4850
4853
4854 ElementCount LegalVF = LT.second.getVectorElementCount();
4857 {TTI::OK_AnyValue, TTI::OP_None}, I);
4858
4861}
4862
4866
4868 Align Alignment,
4873 EVT VT = TLI->getValueType(DL, Ty, true);
4874
4875 if (VT == MVT::Other)
4878
4880 if (!LT.first.isValid())
4882
4883
4884
4885
4886
4887
4890 (VTy->getElementType()->isIntegerTy(1) &&
4891 !VTy->getElementCount().isKnownMultipleOf(
4894
4895
4897 return LT.first;
4898
4900 return 1;
4901
4902 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4903 LT.second.is128BitVector() && Alignment < Align(16)) {
4904
4905
4906
4907
4908
4909 const int AmortizationCost = 6;
4910
4911 return LT.first * 2 * AmortizationCost;
4912 }
4913
4914
4915 if (Ty->isPtrOrPtrVectorTy())
4916 return LT.first;
4917
4919
4920 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4921
4922 if (VT == MVT::v4i8)
4923 return 2;
4924
4926 }
4929 if ((EltSize) || EltSize < 8 || EltSize > 64 ||
4931 return LT.first;
4932
4933
4935 return LT.first;
4936
4937
4938
4939
4944 while (!TypeWorklist.empty()) {
4949 continue;
4950 }
4951
4952 unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;
4956 }
4957 return Cost;
4958 }
4959
4960 return LT.first;
4961}
4962
4966 bool UseMaskForCond, bool UseMaskForGaps) const {
4967 assert(Factor >= 2 && "Invalid interleave factor");
4969
4972
4973
4974
4975
4976
4979
4980
4981
4982 if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))
4984
4985 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
4986 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
4987 auto *SubVecTy =
4989 VecVTy->getElementCount().divideCoefficientBy(Factor));
4990
4991
4992
4993
4994 bool UseScalable;
4995 if (MinElts % Factor == 0 &&
4996 TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
4997 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
4998 }
4999
5002 UseMaskForCond, UseMaskForGaps);
5003}
5004
5009 for (auto *I : Tys) {
5010 if (->isVectorTy())
5011 continue;
5013 128)
5016 }
5017 return Cost;
5018}
5019
5021 return ST->getMaxInterleaveFactor();
5022}
5023
5024
5025
5026
5027
5028static void
5031 enum { MaxStridedLoads = 7 };
5033 int StridedLoads = 0;
5034
5035
5036 for (const auto BB : L->blocks()) {
5037 for (auto &I : *BB) {
5039 if (!LMemI)
5040 continue;
5041
5043 if (L->isLoopInvariant(PtrValue))
5044 continue;
5045
5046 const SCEV *LSCEV = SE.getSCEV(PtrValue);
5048 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
5049 continue;
5050
5051
5052
5053
5054 ++StridedLoads;
5055
5056
5057 if (StridedLoads > MaxStridedLoads / 2)
5058 return StridedLoads;
5059 }
5060 }
5061 return StridedLoads;
5062 };
5063
5064 int StridedLoads = countStridedLoads(L, SE);
5065 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
5066 << " strided loads\n");
5067
5068
5069 if (StridedLoads) {
5070 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
5071 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "
5073 }
5074}
5075
5076
5077
5078
5079
5082 unsigned *FinalSize) {
5083
5085
5086 for (auto *BB : L->getBlocks()) {
5087 for (auto &I : *BB) {
5091
5092
5093 if (!Cost.isValid())
5094 return false;
5095
5096 LoopCost += Cost;
5097 if (LoopCost > Budget)
5098 return false;
5099 }
5100 }
5101
5102 if (FinalSize)
5103 *FinalSize = LoopCost.getValue();
5104 return true;
5105}
5106
5109
5110
5111
5114 return false;
5115
5116
5117
5119 if (MaxTC > 0 && MaxTC <= 32)
5120 return false;
5121
5122
5124 return false;
5125
5126
5127
5128
5130 if (Blocks.size() != 2)
5131 return false;
5132
5135 }))
5136 return false;
5137
5138 return true;
5139}
5140
5141
5142
5143static void
5147
5148
5149
5150
5151
5152 if (!L->isInnermost() || L->getNumBlocks() > 8)
5153 return;
5154
5155
5156 if (!L->getExitBlock())
5157 return;
5158
5159
5160
5161
5162 bool HasParellelizableReductions =
5163 L->getNumBlocks() == 1 &&
5164 any_of(L->getHeader()->phis(),
5165 [&SE, L](PHINode &Phi) {
5166 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5167 }) &&
5169 if (HasParellelizableReductions &&
5174 }
5175
5180 return;
5181
5183 return;
5184
5186 return;
5187
5188
5190
5191 if (HasParellelizableReductions) {
5195 }
5196
5197
5198
5199
5200 BasicBlock *Header = L->getHeader();
5201 BasicBlock *Latch = L->getLoopLatch();
5202 if (Header == Latch) {
5203
5204 unsigned Size;
5205 unsigned Width = 10;
5207 return;
5208
5209
5210
5211 unsigned MaxInstsPerLine = 16;
5212 unsigned UC = 1;
5213 unsigned BestUC = 1;
5214 unsigned SizeWithBestUC = BestUC * Size;
5215 while (UC <= 8) {
5216 unsigned SizeWithUC = UC * Size;
5217 if (SizeWithUC > 48)
5218 break;
5219 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5220 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5221 BestUC = UC;
5222 SizeWithBestUC = BestUC * Size;
5223 }
5224 UC++;
5225 }
5226
5227 if (BestUC == 1)
5228 return;
5229
5232 for (auto *BB : L->blocks()) {
5233 for (auto &I : *BB) {
5235 if (!Ptr)
5236 continue;
5239 continue;
5241 LoadedValuesPlus.insert(&I);
5242
5243 for (auto *U : I.users())
5245 LoadedValuesPlus.insert(U);
5246 } else
5248 }
5249 }
5250
5252 return LoadedValuesPlus.contains(SI->getOperand(0));
5253 }))
5254 return;
5255
5258 return;
5259 }
5260
5261
5262
5265 if (!Term || !Term->isConditional() || Preds.size() == 1 ||
5267 none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))
5268 return;
5269
5270 std::function<bool(Instruction *, unsigned)> DependsOnLoopLoad =
5273 return false;
5274
5276 return true;
5277
5278 return any_of(I->operands(), [&](Value *V) {
5279 auto *I = dyn_cast(V);
5280 return I && DependsOnLoopLoad(I, Depth + 1);
5281 });
5282 };
5287 DependsOnLoopLoad(I, 0)) {
5289 }
5290}
5291
5295
5297
5299
5300
5301
5302
5303 if (L->getLoopDepth() > 1)
5305
5306
5308
5309
5310
5311
5314 for (auto *BB : L->getBlocks()) {
5315 for (auto &I : *BB) {
5316
5317
5318
5319 if (IsVectorized && I.getType()->isVectorTy())
5320 return;
5325 continue;
5326 return;
5327 }
5328
5332 }
5333 }
5334
5335
5336 if (ST->isAppleMLike())
5338 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5341
5342
5343
5347
5349
5350
5352 return;
5353 }
5354
5355
5356
5357
5358
5360 !ST->getSchedModel().isOutOfOrder()) {
5365
5368 }
5369
5370
5371
5373 UP.Force = true;
5374}
5375
5380
5382 Type *ExpectedType,
5383 bool CanCreate) const {
5385 default:
5386 return nullptr;
5387 case Intrinsic::aarch64_neon_st2:
5388 case Intrinsic::aarch64_neon_st3:
5389 case Intrinsic::aarch64_neon_st4: {
5390
5392 if (!CanCreate || !ST)
5393 return nullptr;
5394 unsigned NumElts = Inst->arg_size() - 1;
5395 if (ST->getNumElements() != NumElts)
5396 return nullptr;
5397 for (unsigned i = 0, e = NumElts; i != e; ++i) {
5399 return nullptr;
5400 }
5403 for (unsigned i = 0, e = NumElts; i != e; ++i) {
5405 Res = Builder.CreateInsertValue(Res, L, i);
5406 }
5407 return Res;
5408 }
5409 case Intrinsic::aarch64_neon_ld2:
5410 case Intrinsic::aarch64_neon_ld3:
5411 case Intrinsic::aarch64_neon_ld4:
5412 if (Inst->getType() == ExpectedType)
5413 return Inst;
5414 return nullptr;
5415 }
5416}
5417
5421 default:
5422 break;
5423 case Intrinsic::aarch64_neon_ld2:
5424 case Intrinsic::aarch64_neon_ld3:
5425 case Intrinsic::aarch64_neon_ld4:
5426 Info.ReadMem = true;
5427 Info.WriteMem = false;
5429 break;
5430 case Intrinsic::aarch64_neon_st2:
5431 case Intrinsic::aarch64_neon_st3:
5432 case Intrinsic::aarch64_neon_st4:
5433 Info.ReadMem = false;
5434 Info.WriteMem = true;
5436 break;
5437 }
5438
5440 default:
5441 return false;
5442 case Intrinsic::aarch64_neon_ld2:
5443 case Intrinsic::aarch64_neon_st2:
5444 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5445 break;
5446 case Intrinsic::aarch64_neon_ld3:
5447 case Intrinsic::aarch64_neon_st3:
5448 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5449 break;
5450 case Intrinsic::aarch64_neon_ld4:
5451 case Intrinsic::aarch64_neon_st4:
5452 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5453 break;
5454 }
5455 return true;
5456}
5457
5458
5459
5460
5461
5462
5464 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
5465 bool Considerable = false;
5466 AllowPromotionWithoutCommonHeader = false;
5468 return false;
5469 Type *ConsideredSExtType =
5471 if (I.getType() != ConsideredSExtType)
5472 return false;
5473
5474
5475 for (const User *U : I.users()) {
5477 Considerable = true;
5478
5479
5480
5481 if (GEPInst->getNumOperands() > 2) {
5482 AllowPromotionWithoutCommonHeader = true;
5483 break;
5484 }
5485 }
5486 }
5487 return Considerable;
5488}
5489
5493 return true;
5494
5497 return false;
5498
5515 return true;
5516 default:
5517 return false;
5518 }
5519}
5520
5525
5526
5527
5528
5532
5534
5535 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5537
5539 if (LT.first > 1) {
5543 }
5544
5545 return LegalizationCost + 2;
5546}
5547
5552 if (LT.first > 1) {
5555 LegalizationCost *= LT.first - 1;
5556 }
5557
5558 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5559 assert(ISD && "Invalid opcode");
5560
5561 switch (ISD) {
5567 return LegalizationCost + 2;
5568 default:
5570 }
5571}
5572
5575 std::optional FMF,
5577
5578
5579
5580
5584
5589
5590
5591 return BaseCost + FixedVTy->getNumElements();
5592 }
5593
5594 if (Opcode != Instruction::FAdd)
5596
5601 return Cost;
5602 }
5603
5606
5608 MVT MTy = LT.second;
5609 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5610 assert(ISD && "Invalid opcode");
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620 static const CostTblEntry CostTblNoPairwise[]{
5628 {ISD::OR, MVT::v8i8, 5},
5629 {ISD::OR, MVT::v16i8, 7},
5630 {ISD::OR, MVT::v4i16, 4},
5631 {ISD::OR, MVT::v8i16, 6},
5632 {ISD::OR, MVT::v2i32, 3},
5633 {ISD::OR, MVT::v4i32, 5},
5634 {ISD::OR, MVT::v2i64, 3},
5635 {ISD::XOR, MVT::v8i8, 5},
5642 {ISD::AND, MVT::v8i8, 5},
5649 };
5650 switch (ISD) {
5651 default:
5652 break;
5655
5656
5657 MTy.isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5658 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5660 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5662
5663
5664
5665
5666
5667
5668
5669
5670 return (LT.first - 1) + Log2_32(NElts);
5671 }
5672 break;
5674 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
5675 return (LT.first - 1) + Entry->Cost;
5676 break;
5681 if (!Entry)
5682 break;
5687 if (LT.first != 1) {
5688
5689
5693 ExtraCost *= LT.first - 1;
5694 }
5695
5696 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5697 return Cost + ExtraCost;
5698 }
5699 break;
5700 }
5702}
5703
5705 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *VecTy,
5707 EVT VecVT = TLI->getValueType(DL, VecTy);
5708 EVT ResVT = TLI->getValueType(DL, ResTy);
5709
5710 if (Opcode == Instruction::Add && VecVT.isSimple() && ResVT.isSimple() &&
5713
5714
5715
5716
5718 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5719 RevVTSize <= 32) ||
5720 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5721 RevVTSize <= 32) ||
5722 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5723 RevVTSize <= 64))
5724 return (LT.first - 1) * 2 + 2;
5725 }
5726
5729}
5730
5735 EVT VecVT = TLI->getValueType(DL, VecTy);
5736 EVT ResVT = TLI->getValueType(DL, ResTy);
5737
5738 if (ST->hasDotProd() && VecVT.isSimple() && ResVT.isSimple() &&
5739 RedOpcode == Instruction::Add) {
5741
5742
5743
5744
5745 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5746 ResVT == MVT::i32)
5747 return LT.first + 2;
5748 }
5749
5752}
5753
5771 };
5772
5773
5774
5775
5776
5779
5782 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5783 ? TLI->getPromotedVTForPredicate(EVT(LT.second))
5784 : LT.second;
5787 if (Index < 0) {
5788 LegalizationCost =
5793 }
5794
5795
5796
5797 if (LT.second.getScalarType() == MVT::i1) {
5798 LegalizationCost +=
5803 }
5804 const auto *Entry =
5806 assert(Entry && "Illegal Type for Splice");
5807 LegalizationCost += Entry->Cost;
5808 return LegalizationCost * LT.first;
5809}
5810
5812 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
5817
5820
5821 if (VF.isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5822 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5824
5825 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||
5828
5830 (!BinOp || (OpBExtend != TTI::PR_None && InputTypeB)) &&
5831 "Unexpected values for OpBExtend or InputTypeB");
5832
5833
5834
5835 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))
5837
5838 bool IsUSDot = OpBExtend != TTI::PR_None && OpAExtend != OpBExtend;
5839 if (IsUSDot && !ST->hasMatMulInt8())
5841
5842 unsigned Ratio =
5846
5850
5851 auto TC = TLI->getTypeConversion(AccumVectorType->getContext(),
5853 switch (TC.first) {
5854 default:
5859
5860 if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) !=
5863 break;
5864 }
5865
5866 std::pair<InstructionCost, MVT> AccumLT =
5868 std::pair<InstructionCost, MVT> InputLT =
5870
5872
5873
5876
5877
5878
5880
5881 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5882
5883 if (AccumLT.second.getScalarType() == MVT::i64 &&
5884 InputLT.second.getScalarType() == MVT::i16)
5885 return Cost;
5886
5887 if (AccumLT.second.getScalarType() == MVT::i64 &&
5888 InputLT.second.getScalarType() == MVT::i8)
5889
5890
5891
5892
5893
5894 return Cost;
5895 }
5896
5897
5898 if (ST->isSVEorStreamingSVEAvailable() ||
5899 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
5900 ST->hasDotProd())) {
5901 if (AccumLT.second.getScalarType() == MVT::i32 &&
5902 InputLT.second.getScalarType() == MVT::i8)
5903 return Cost;
5904 }
5905
5906
5907 return Cost + 2;
5908}
5909
5918 "Expected the Mask to match the return size if given");
5920 "Expected the same scalar types");
5922
5923
5924
5926 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
5927 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
5928 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
5929
5930
5931
5932
5933 if (Args.size() >= 1 && isa(Args[0]) &&
5936 return std::max(1, LT.first / 4);
5937
5938
5939
5940
5941
5944 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
5946 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
5947 return LT.first;
5948
5949 unsigned TpNumElts = Mask.size();
5950 unsigned LTNumElts = LT.second.getVectorNumElements();
5951 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
5953 LT.second.getVectorElementCount());
5955 std::map<std::tuple<unsigned, unsigned, SmallVector>, InstructionCost>
5956 PreviousCosts;
5957 for (unsigned N = 0; N < NumVecs; N++) {
5959
5960
5961 unsigned Source1 = -1U, Source2 = -1U;
5962 unsigned NumSources = 0;
5963 for (unsigned E = 0; E < LTNumElts; E++) {
5964 int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]
5966 if (MaskElt < 0) {
5968 continue;
5969 }
5970
5971
5972
5973 unsigned Source = MaskElt / LTNumElts;
5974 if (NumSources == 0) {
5975 Source1 = Source;
5976 NumSources = 1;
5977 } else if (NumSources == 1 && Source != Source1) {
5978 Source2 = Source;
5979 NumSources = 2;
5980 } else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
5981 NumSources++;
5982 }
5983
5984
5985
5986 if (Source == Source1)
5987 NMask.push_back(MaskElt % LTNumElts);
5988 else if (Source == Source2)
5989 NMask.push_back(MaskElt % LTNumElts + LTNumElts);
5990 else
5991 NMask.push_back(MaskElt % LTNumElts);
5992 }
5993
5994
5995
5996
5997 auto Result =
5998 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
5999
6000 if (!Result.second)
6001 continue;
6002
6003
6004
6006 NumSources <= 2
6009 NTp, NTp, NMask, CostKind, 0, nullptr, Args,
6010 CxtI)
6011 : LTNumElts;
6012 Result.first->second = NCost;
6013 Cost += NCost;
6014 }
6015 return Cost;
6016 }
6017
6020
6021
6022
6023
6024
6025
6026 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6027 if (LT.second.getFixedSizeInBits() >= 128 &&
6029 LT.second.getVectorNumElements() / 2) {
6030 if (Index == 0)
6031 return 0;
6032 if (Index == (int)LT.second.getVectorNumElements() / 2)
6033 return 1;
6034 }
6036 }
6037
6038
6041 SrcTy = DstTy;
6042 }
6043
6044
6045
6046 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6049 return M.value() < 0 || M.value() == (int)M.index();
6050 }))
6051 return 0;
6052
6053
6055 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6056 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6058
6060 unsigned Segments =
6062 unsigned SegmentElts = VTy->getNumElements() / Segments;
6063
6064
6065 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6066 ST->isSVEorStreamingSVEAvailable() &&
6067 isDUPQMask(Mask, Segments, SegmentElts))
6068 return LT.first;
6069
6070
6071 if (ST->isSVEorStreamingSVEAvailable() &&
6073 return LT.first;
6074 }
6075
6076
6077
6078
6079
6080
6081
6082
6084 bool IsLoad = !Args.empty() && isa(Args[0]);
6085 if (IsLoad && LT.second.isVector() &&
6087 LT.second.getVectorElementCount()))
6088 return 0;
6089 }
6090
6091
6092
6093 if (Mask.size() == 4 &&
6095 (SrcTy->getScalarSizeInBits() == 16 ||
6096 SrcTy->getScalarSizeInBits() == 32) &&
6097 all_of(Mask, [](int E) { return E < 8; }))
6099
6100
6101
6102 unsigned Unused;
6103 if (LT.second.isFixedLengthVector() &&
6104 LT.second.getVectorNumElements() == Mask.size() &&
6106 (isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6107 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6108 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6109 LT.second.getVectorNumElements(), 16) ||
6110 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6111 LT.second.getVectorNumElements(), 32) ||
6112 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6113 LT.second.getVectorNumElements(), 64) ||
6114
6116 [&Mask](int M) { return M < 0 || M == Mask[0]; })))
6117 return 1;
6118
6123
6138
6139
6154
6155
6157 {TTI::SK_Select, MVT::v4i32, 2},
6160 {TTI::SK_Select, MVT::v4f32, 2},
6162
6177
6192
6207
6225
6243 };
6244 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
6245 return LT.first * Entry->Cost;
6246 }
6247
6250
6251
6252
6254 LT.second.getSizeInBits() <= 128 && SubTp) {
6256 if (SubLT.second.isVector()) {
6257 int NumElts = LT.second.getVectorNumElements();
6258 int NumSubElts = SubLT.second.getVectorNumElements();
6259 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6260 return SubLT.first;
6261 }
6262 }
6263
6264
6265 if (IsExtractSubvector)
6268 Args, CxtI);
6269}
6270
6276
6277
6282 if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6283 true, false)
6284 .value_or(0) < 0)
6285 return true;
6286 }
6287 }
6288 }
6289 return false;
6290}
6291
6295
6296
6297
6298
6299 if (IsEpilogue)
6300 return true;
6301 return ST->useFixedOverScalableIfEqualCost();
6302}
6303
6305 return ST->getEpilogueVectorizationMinVF();
6306}
6307
6309 if (!ST->hasSVE())
6310 return false;
6311
6312
6313
6314
6316 return false;
6317
6323
6324
6325
6326
6333
6336 return false;
6337
6338
6339
6340 unsigned NumInsns = 0;
6342 NumInsns += BB->sizeWithoutDebug();
6343 }
6344
6345
6347}
6348
6351 StackOffset BaseOffset, bool HasBaseReg,
6352 int64_t Scale, unsigned AddrSpace) const {
6353
6354
6355
6356
6357
6358
6359
6364 AM.Scale = Scale;
6367
6368
6369 return AM.Scale != 0 && AM.Scale != 1;
6371}
6372
6376
6377
6378
6379
6380 if (I->getOpcode() == Instruction::Or &&
6383 return true;
6384
6385 if (I->getOpcode() == Instruction::Add ||
6386 I->getOpcode() == Instruction::Sub)
6387 return true;
6388 }
6390}
6391
6395
6396
6397
6398
6399
6405
6407}
6408
6411 return all_equal(Shuf->getShuffleMask());
6412 return false;
6413}
6414
6415
6416
6418 bool AllowSplat = false) {
6419
6421 return false;
6422
6423 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
6424 auto *FullTy = FullV->getType();
6425 auto *HalfTy = HalfV->getType();
6427 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6428 };
6429
6430 auto extractHalf = [](Value *FullV, Value *HalfV) {
6433 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6434 };
6435
6437 Value *S1Op1 = nullptr, *S2Op1 = nullptr;
6440 return false;
6441
6442
6443
6445 S1Op1 = nullptr;
6447 S2Op1 = nullptr;
6448
6449
6450
6451 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6452 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6453 return false;
6454
6455
6456
6457 int M1Start = 0;
6458 int M2Start = 0;
6460 if ((S1Op1 &&
6462 (S2Op1 &&
6464 return false;
6465
6466 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6467 (M2Start != 0 && M2Start != (NumElements / 2)))
6468 return false;
6469 if (S1Op1 && S2Op1 && M1Start != M2Start)
6470 return false;
6471
6472 return true;
6473}
6474
6475
6476
6478 auto areExtDoubled = [](Instruction *Ext) {
6479 return Ext->getType()->getScalarSizeInBits() ==
6480 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6481 };
6482
6487 return false;
6488
6489 return true;
6490}
6491
6492
6494 Value *VectorOperand = nullptr;
6498 ElementIndex->getValue() == 1 &&
6501}
6502
6503
6507
6509
6511 if ( || GEP->getNumOperands() != 2)
6512 return false;
6513
6515 Value *Offsets = GEP->getOperand(1);
6516
6517
6518 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6519 return false;
6520
6521
6524 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6525 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6526 Ops.push_back(&GEP->getOperandUse(1));
6527 }
6528
6529
6530 return true;
6531}
6532
6533
6534
6535
6538 return true;
6542 return true;
6543 }
6549 return true;
6550 }
6551 return false;
6552}
6553
6554
6555
6556
6560 switch (II->getIntrinsicID()) {
6561 case Intrinsic::aarch64_neon_smull:
6562 case Intrinsic::aarch64_neon_umull:
6564 true)) {
6565 Ops.push_back(&II->getOperandUse(0));
6566 Ops.push_back(&II->getOperandUse(1));
6567 return true;
6568 }
6569 [[fallthrough]];
6570
6571 case Intrinsic::fma:
6572 case Intrinsic::fmuladd:
6575 !ST->hasFullFP16())
6576 return false;
6577 [[fallthrough]];
6578 case Intrinsic::aarch64_neon_sqdmull:
6579 case Intrinsic::aarch64_neon_sqdmulh:
6580 case Intrinsic::aarch64_neon_sqrdmulh:
6581
6583 Ops.push_back(&II->getOperandUse(0));
6585 Ops.push_back(&II->getOperandUse(1));
6586 return .empty();
6587 case Intrinsic::aarch64_neon_fmlal:
6588 case Intrinsic::aarch64_neon_fmlal2:
6589 case Intrinsic::aarch64_neon_fmlsl:
6590 case Intrinsic::aarch64_neon_fmlsl2:
6591
6593 Ops.push_back(&II->getOperandUse(1));
6595 Ops.push_back(&II->getOperandUse(2));
6596 return .empty();
6597 case Intrinsic::aarch64_sve_ptest_first:
6598 case Intrinsic::aarch64_sve_ptest_last:
6600 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6601 Ops.push_back(&II->getOperandUse(0));
6602 return .empty();
6603 case Intrinsic::aarch64_sme_write_horiz:
6604 case Intrinsic::aarch64_sme_write_vert:
6605 case Intrinsic::aarch64_sme_writeq_horiz:
6606 case Intrinsic::aarch64_sme_writeq_vert: {
6608 if (!Idx || Idx->getOpcode() != Instruction::Add)
6609 return false;
6610 Ops.push_back(&II->getOperandUse(1));
6611 return true;
6612 }
6613 case Intrinsic::aarch64_sme_read_horiz:
6614 case Intrinsic::aarch64_sme_read_vert:
6615 case Intrinsic::aarch64_sme_readq_horiz:
6616 case Intrinsic::aarch64_sme_readq_vert:
6617 case Intrinsic::aarch64_sme_ld1b_vert:
6618 case Intrinsic::aarch64_sme_ld1h_vert:
6619 case Intrinsic::aarch64_sme_ld1w_vert:
6620 case Intrinsic::aarch64_sme_ld1d_vert:
6621 case Intrinsic::aarch64_sme_ld1q_vert:
6622 case Intrinsic::aarch64_sme_st1b_vert:
6623 case Intrinsic::aarch64_sme_st1h_vert:
6624 case Intrinsic::aarch64_sme_st1w_vert:
6625 case Intrinsic::aarch64_sme_st1d_vert:
6626 case Intrinsic::aarch64_sme_st1q_vert:
6627 case Intrinsic::aarch64_sme_ld1b_horiz:
6628 case Intrinsic::aarch64_sme_ld1h_horiz:
6629 case Intrinsic::aarch64_sme_ld1w_horiz:
6630 case Intrinsic::aarch64_sme_ld1d_horiz:
6631 case Intrinsic::aarch64_sme_ld1q_horiz:
6632 case Intrinsic::aarch64_sme_st1b_horiz:
6633 case Intrinsic::aarch64_sme_st1h_horiz:
6634 case Intrinsic::aarch64_sme_st1w_horiz:
6635 case Intrinsic::aarch64_sme_st1d_horiz:
6636 case Intrinsic::aarch64_sme_st1q_horiz: {
6638 if (!Idx || Idx->getOpcode() != Instruction::Add)
6639 return false;
6640 Ops.push_back(&II->getOperandUse(3));
6641 return true;
6642 }
6643 case Intrinsic::aarch64_neon_pmull:
6645 return false;
6646 Ops.push_back(&II->getOperandUse(0));
6647 Ops.push_back(&II->getOperandUse(1));
6648 return true;
6649 case Intrinsic::aarch64_neon_pmull64:
6651 II->getArgOperand(1)))
6652 return false;
6653 Ops.push_back(&II->getArgOperandUse(0));
6654 Ops.push_back(&II->getArgOperandUse(1));
6655 return true;
6656 case Intrinsic::masked_gather:
6658 return false;
6659 Ops.push_back(&II->getArgOperandUse(0));
6660 return true;
6661 case Intrinsic::masked_scatter:
6663 return false;
6664 Ops.push_back(&II->getArgOperandUse(1));
6665 return true;
6666 default:
6667 return false;
6668 }
6669 }
6670
6671 auto ShouldSinkCondition = [](Value *Cond,
6674 return false;
6676 if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6678 return false;
6680 Ops.push_back(&II->getOperandUse(0));
6681 return true;
6682 };
6683
6684 switch (I->getOpcode()) {
6685 case Instruction::GetElementPtr:
6686 case Instruction::Add:
6687 case Instruction::Sub:
6688
6689 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {
6691 Ops.push_back(&I->getOperandUse(Op));
6692 return true;
6693 }
6694 }
6695 break;
6696 case Instruction::Select: {
6697 if (!ShouldSinkCondition(I->getOperand(0), Ops))
6698 return false;
6699
6700 Ops.push_back(&I->getOperandUse(0));
6701 return true;
6702 }
6703 case Instruction::Br: {
6705 return false;
6706
6708 return false;
6709
6710 Ops.push_back(&I->getOperandUse(0));
6711 return true;
6712 }
6713 default:
6714 break;
6715 }
6716
6717 if (->getType()->isVectorTy())
6718 return false;
6719
6720 switch (I->getOpcode()) {
6721 case Instruction::Sub:
6722 case Instruction::Add: {
6724 return false;
6725
6726
6727
6731 Ops.push_back(&Ext1->getOperandUse(0));
6732 Ops.push_back(&Ext2->getOperandUse(0));
6733 }
6734
6735 Ops.push_back(&I->getOperandUse(0));
6736 Ops.push_back(&I->getOperandUse(1));
6737
6738 return true;
6739 }
6740 case Instruction::Or: {
6741
6742
6743 if (ST->hasNEON()) {
6745 Value *MaskValue;
6746
6750 if (match(OtherAnd,
6752 Instruction *MainAnd = I->getOperand(0) == OtherAnd
6755
6756
6757 if (I->getParent() != MainAnd->getParent() ||
6758 I->getParent() != OtherAnd->getParent())
6759 return false;
6760
6761
6762 if (I->getParent() != IA->getParent() ||
6763 I->getParent() != IB->getParent())
6764 return false;
6765
6766 Ops.push_back(
6768 Ops.push_back(&I->getOperandUse(0));
6769 Ops.push_back(&I->getOperandUse(1));
6770
6771 return true;
6772 }
6773 }
6774 }
6775
6776 return false;
6777 }
6778 case Instruction::Mul: {
6779 auto ShouldSinkSplatForIndexedVariant = [](Value *V) {
6781
6782 if (Ty->isScalableTy())
6783 return false;
6784
6785
6786 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6787 };
6788
6789 int NumZExts = 0, NumSExts = 0;
6790 for (auto &Op : I->operands()) {
6791
6792 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
6793 continue;
6794
6797 auto *ExtOp = Ext->getOperand(0);
6798 if (isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6799 Ops.push_back(&Ext->getOperandUse(0));
6801
6803 NumSExts++;
6804 } else {
6805 NumZExts++;
6806
6807 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6808 I->getType()->getScalarSizeInBits())
6809 NumSExts++;
6810 }
6811
6812 continue;
6813 }
6814
6816 if (!Shuffle)
6817 continue;
6818
6819
6820
6821
6827 NumSExts++;
6828 else
6829 NumZExts++;
6830 continue;
6831 }
6832
6835 if (!Insert)
6836 continue;
6837
6839 if (!OperandInstr)
6840 continue;
6841
6844
6845 if (!ElementConstant || !ElementConstant->isZero())
6846 continue;
6847
6848 unsigned Opcode = OperandInstr->getOpcode();
6849 if (Opcode == Instruction::SExt)
6850 NumSExts++;
6851 else if (Opcode == Instruction::ZExt)
6852 NumZExts++;
6853 else {
6854
6855
6856 unsigned Bitwidth = I->getType()->getScalarSizeInBits();
6859 continue;
6860 NumZExts++;
6861 }
6862
6863
6864
6866 Ops.push_back(&Insert->getOperandUse(1));
6869 }
6870
6871
6872 if (.empty() && (NumSExts == 2 || NumZExts == 2))
6873 return true;
6874
6875
6876 if (!ShouldSinkSplatForIndexedVariant(I))
6877 return false;
6878
6879 Ops.clear();
6881 Ops.push_back(&I->getOperandUse(0));
6883 Ops.push_back(&I->getOperandUse(1));
6884
6885 return .empty();
6886 }
6887 case Instruction::FMul: {
6888
6889 if (I->getType()->isScalableTy())
6890 return false;
6891
6892 if (cast(I->getType())->getElementType()->isHalfTy() &&
6893 !ST->hasFullFP16())
6894 return false;
6895
6896
6898 Ops.push_back(&I->getOperandUse(0));
6900 Ops.push_back(&I->getOperandUse(1));
6901 return .empty();
6902 }
6903 default:
6904 return false;
6905 }
6906 return false;
6907}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static std::optional< Instruction * > instCombinePTrue(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2833
TailFoldingOption TailFoldingOptionLoc
Definition AArch64TargetTransformInfo.cpp:191
static std::optional< Instruction * > instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2385
static std::optional< Instruction * > instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, bool MergeIntoAddendOp)
Definition AArch64TargetTransformInfo.cpp:2263
static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)
Definition AArch64TargetTransformInfo.cpp:5029
bool SimplifyValuePattern(SmallVector< Value * > &Vec, bool AllowPoison)
Definition AArch64TargetTransformInfo.cpp:2654
static std::optional< Instruction * > instCombineSVESel(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1892
static bool hasPossibleIncompatibleOps(const Function *F, const AArch64TargetLowering &TLI)
Returns true if the function has explicit operations that can only be lowered using incompatible inst...
Definition AArch64TargetTransformInfo.cpp:238
static bool shouldSinkVScale(Value *Op, SmallVectorImpl< Use * > &Ops)
We want to sink following cases: (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A,...
Definition AArch64TargetTransformInfo.cpp:6536
static InstructionCost getHistogramCost(const AArch64Subtarget *ST, const IntrinsicCostAttributes &ICA)
Definition AArch64TargetTransformInfo.cpp:565
static std::optional< Instruction * > tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1791
static std::optional< Instruction * > instCombineSVEUnpack(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2473
static cl::opt< unsigned > SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold", cl::init(15), cl::Hidden)
static cl::opt< bool > EnableFixedwidthAutovecInStreamingMode("enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden)
static void getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP, const AArch64TTIImpl &TTI)
For Apple CPUs, we want to runtime-unroll loops to make better use if the OOO engine's wide instructi...
Definition AArch64TargetTransformInfo.cpp:5144
static std::optional< Instruction * > instCombineWhilelo(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2824
static std::optional< Instruction * > instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2405
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
Definition AArch64TargetTransformInfo.cpp:6477
static cl::opt< bool > EnableLSRCostOpt("enable-aarch64-lsr-cost-opt", cl::init(true), cl::Hidden)
static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl< Use * > &Ops)
Definition AArch64TargetTransformInfo.cpp:6508
static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE, const AArch64TTIImpl &TTI)
Definition AArch64TargetTransformInfo.cpp:5107
static std::optional< Instruction * > simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II, const SVEIntrinsicInfo &IInfo)
Definition AArch64TargetTransformInfo.cpp:1684
static std::optional< Instruction * > instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2464
static bool isLoopSizeWithinBudget(Loop *L, const AArch64TTIImpl &TTI, InstructionCost Budget, unsigned *FinalSize)
Definition AArch64TargetTransformInfo.cpp:5080
static std::optional< Instruction * > instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2562
static std::optional< Instruction * > instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2425
static std::optional< Instruction * > processPhiNode(InstCombiner &IC, IntrinsicInst &II)
The function will remove redundant reinterprets casting in the presence of the control flow.
Definition AArch64TargetTransformInfo.cpp:1071
static std::optional< Instruction * > instCombineSVEInsr(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2789
static std::optional< Instruction * > instCombineSMECntsd(InstCombiner &IC, IntrinsicInst &II, const AArch64Subtarget *ST)
Definition AArch64TargetTransformInfo.cpp:2180
static std::optional< Instruction * > instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2590
static bool isSMEABIRoutineCall(const CallInst &CI, const AArch64TargetLowering &TLI)
Definition AArch64TargetTransformInfo.cpp:228
static std::optional< Instruction * > instCombineSVESDIV(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2616
static std::optional< Instruction * > instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)
Definition AArch64TargetTransformInfo.cpp:2325
static Value * stripInactiveLanes(Value *V, const Value *Pg)
Definition AArch64TargetTransformInfo.cpp:1672
static cl::opt< bool > SVEPreferFixedOverScalableIfEqualCost("sve-prefer-fixed-over-scalable-if-equal", cl::Hidden)
static bool isUnpackedVectorVT(EVT VecVT)
Definition AArch64TargetTransformInfo.cpp:560
static std::optional< Instruction * > instCombineSVEDupX(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1926
static std::optional< Instruction * > instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1936
static std::optional< Instruction * > instCombineDMB(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2799
static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1311
static std::optional< Instruction * > instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2445
static std::optional< Instruction * > instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2145
static std::optional< Instruction * > instCombineMaxMinNM(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2745
static cl::opt< unsigned > SVEGatherOverhead("sve-gather-overhead", cl::init(10), cl::Hidden)
static std::optional< Instruction * > instCombineSVECondLast(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2103
static std::optional< Instruction * > instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2193
static std::optional< Instruction * > instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2546
static cl::opt< int > Aarch64ForceUnrollThreshold("aarch64-force-unroll-threshold", cl::init(0), cl::Hidden, cl::desc("Threshold for forced unrolling of small loops in AArch64"))
static std::optional< Instruction * > instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1904
static cl::opt< unsigned > BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, cl::desc("The cost of a histcnt instruction"))
static std::optional< Instruction * > instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:1840
static cl::opt< unsigned > CallPenaltyChangeSM("call-penalty-sm-change", cl::init(5), cl::Hidden, cl::desc("Penalty of calling a function that requires a change to PSTATE.SM"))
static std::optional< Instruction * > instCombineSVEUzp1(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2516
static std::optional< Instruction * > instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2356
static cl::opt< bool > EnableScalableAutovecInStreamingMode("enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden)
static std::optional< Instruction * > instCombineSVETBL(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2493
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
Definition AArch64TargetTransformInfo.cpp:6504
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic)
Definition AArch64TargetTransformInfo.cpp:2342
static bool containsDecreasingPointers(Loop *TheLoop, PredicatedScalarEvolution *PSE, const DominatorTree &DT)
Definition AArch64TargetTransformInfo.cpp:6271
static bool isSplatShuffle(Value *V)
Definition AArch64TargetTransformInfo.cpp:6409
static cl::opt< unsigned > InlineCallPenaltyChangeSM("inline-call-penalty-sm-change", cl::init(10), cl::Hidden, cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"))
static std::optional< Instruction * > instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)
Definition AArch64TargetTransformInfo.cpp:2306
static std::optional< Instruction * > instCombineSVESrshl(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2755
static cl::opt< unsigned > DMBLookaheadThreshold("dmb-lookahead-threshold", cl::init(10), cl::Hidden, cl::desc("The number of instructions to search for a redundant dmb"))
static std::optional< Instruction * > simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II, const SVEIntrinsicInfo &IInfo)
Definition AArch64TargetTransformInfo.cpp:1736
static unsigned getSVEGatherScatterOverhead(unsigned Opcode, const AArch64Subtarget *ST)
Definition AArch64TargetTransformInfo.cpp:4801
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
Definition AArch64TargetTransformInfo.cpp:6493
static std::optional< Instruction * > instCombineInStreamingMode(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2861
static std::optional< Instruction * > instCombineSVELast(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2029
static cl::opt< unsigned > NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10), cl::Hidden)
static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)
static std::optional< Instruction * > instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts)
Definition AArch64TargetTransformInfo.cpp:2161
static std::optional< Instruction * > instCombineSVEUxt(InstCombiner &IC, IntrinsicInst &II, unsigned NumBits)
Definition AArch64TargetTransformInfo.cpp:2840
static cl::opt< TailFoldingOption, true, cl::parser< std::string > > SVETailFolding("sve-tail-folding", cl::desc("Control the use of vectorisation using tail-folding for SVE where the" " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" "\ndisabled (Initial) No loop types will vectorize using " "tail-folding" "\ndefault (Initial) Uses the default tail-folding settings for " "the target CPU" "\nall (Initial) All legal loop types will vectorize using " "tail-folding" "\nsimple (Initial) Use tail-folding for simple loops (not " "reductions or recurrences)" "\nreductions Use tail-folding for loops containing reductions" "\nnoreductions Inverse of above" "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" "\nnorecurrences Inverse of above" "\nreverse Use tail-folding for loops requiring reversed " "predicates" "\nnoreverse Inverse of above"), cl::location(TailFoldingOptionLoc))
static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
Definition AArch64TargetTransformInfo.cpp:6417
static std::optional< Instruction * > instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2371
static cl::opt< bool > EnableOrLikeSelectOpt("enable-aarch64-or-like-select", cl::init(true), cl::Hidden)
static cl::opt< unsigned > SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden)
static std::optional< Instruction * > instCombineSVEDupqLane(InstCombiner &IC, IntrinsicInst &II)
Definition AArch64TargetTransformInfo.cpp:2683
This file a TargetTransformInfoImplBase conforming object specific to the AArch64 target machine.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static unsigned getNumElements(Type *Ty)
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5811
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5574
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:4219
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
Definition AArch64TargetTransformInfo.cpp:5006
unsigned getMaxInterleaveFactor(ElementCount VF) const override
Definition AArch64TargetTransformInfo.cpp:5020
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:4774
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:4822
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:4556
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
Definition AArch64TargetTransformInfo.cpp:3187
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
Definition AArch64TargetTransformInfo.cpp:390
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
Definition AArch64TargetTransformInfo.cpp:4195
bool prefersVectorizedAddressing() const override
Definition AArch64TargetTransformInfo.cpp:4755
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
Definition AArch64TargetTransformInfo.cpp:4164
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:608
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
Definition AArch64TargetTransformInfo.cpp:5732
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
Definition AArch64TargetTransformInfo.cpp:4134
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:431
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
Definition AArch64TargetTransformInfo.cpp:5376
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:3229
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
Definition AArch64TargetTransformInfo.cpp:5292
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
Definition AArch64TargetTransformInfo.cpp:5418
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5522
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:4867
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
Definition AArch64TargetTransformInfo.cpp:375
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
Definition AArch64TargetTransformInfo.cpp:6392
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Definition AArch64TargetTransformInfo.cpp:4181
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:3954
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
Definition AArch64TargetTransformInfo.cpp:6557
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
Definition AArch64TargetTransformInfo.cpp:2986
bool useNeonVector(const Type *Ty) const
Definition AArch64TargetTransformInfo.cpp:4863
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Definition AArch64TargetTransformInfo.cpp:2871
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:4601
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
Definition AArch64TargetTransformInfo.cpp:5911
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
Definition AArch64TargetTransformInfo.cpp:6308
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:5704
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
Definition AArch64TargetTransformInfo.cpp:552
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:3895
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
Definition AArch64TargetTransformInfo.cpp:340
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
Definition AArch64TargetTransformInfo.cpp:272
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
Definition AArch64TargetTransformInfo.cpp:6373
bool isMultiversionedFunction(const Function &F) const override
Definition AArch64TargetTransformInfo.cpp:264
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
Definition AArch64TargetTransformInfo.cpp:3018
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
Definition AArch64TargetTransformInfo.cpp:5490
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
Definition AArch64TargetTransformInfo.cpp:4737
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:500
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
Definition AArch64TargetTransformInfo.cpp:5463
APInt getFeatureMask(const Function &F) const override
Definition AArch64TargetTransformInfo.cpp:255
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
Definition AArch64TargetTransformInfo.cpp:4963
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
Definition AArch64TargetTransformInfo.cpp:315
bool enableScalableVectorization() const override
Definition AArch64TargetTransformInfo.cpp:3012
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Definition AArch64TargetTransformInfo.cpp:4760
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
Definition AArch64TargetTransformInfo.cpp:5381
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
Definition AArch64TargetTransformInfo.cpp:4577
unsigned getEpilogueVectorizationMinVF() const override
Definition AArch64TargetTransformInfo.cpp:6304
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:5755
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
Definition AArch64TargetTransformInfo.cpp:5548
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition AArch64TargetTransformInfo.cpp:6350
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Definition AArch64TargetTransformInfo.cpp:6292
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
virtual const DataLayout & getDataLayout() const
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const
virtual bool isLoweredToCall(const Function *F) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
PopcntSupportKind
Flags indicating the kind of support for population count.
PartialReductionExtendKind
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
unsigned getMatchingIROpode() const
Definition AArch64TargetTransformInfo.cpp:1211
bool inactiveLanesAreUnused() const
Definition AArch64TargetTransformInfo.cpp:1252
bool inactiveLanesAreNotDefined() const
Definition AArch64TargetTransformInfo.cpp:1242
bool hasMatchingUndefIntrinsic() const
Definition AArch64TargetTransformInfo.cpp:1194
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
Definition AArch64TargetTransformInfo.cpp:1133
static SVEIntrinsicInfo defaultZeroingOp()
Definition AArch64TargetTransformInfo.cpp:1156
bool hasGoverningPredicate() const
Definition AArch64TargetTransformInfo.cpp:1171
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
Definition AArch64TargetTransformInfo.cpp:1235
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
Definition AArch64TargetTransformInfo.cpp:1117
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
Definition AArch64TargetTransformInfo.cpp:1287
unsigned getOperandIdxWithNoActiveLanes() const
Definition AArch64TargetTransformInfo.cpp:1282
SVEIntrinsicInfo & setInactiveLanesAreUnused()
Definition AArch64TargetTransformInfo.cpp:1256
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
Definition AArch64TargetTransformInfo.cpp:1246
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
Definition AArch64TargetTransformInfo.cpp:1180
bool inactiveLanesTakenFromOperand() const
Definition AArch64TargetTransformInfo.cpp:1226
static SVEIntrinsicInfo defaultUndefOp()
Definition AArch64TargetTransformInfo.cpp:1140
bool hasOperandWithNoActiveLanes() const
Definition AArch64TargetTransformInfo.cpp:1278
Intrinsic::ID getMatchingUndefIntrinsic() const
Definition AArch64TargetTransformInfo.cpp:1198
SVEIntrinsicInfo & setResultIsZeroInitialized()
Definition AArch64TargetTransformInfo.cpp:1267
static SVEIntrinsicInfo defaultMergingUnaryOp()
Definition AArch64TargetTransformInfo.cpp:1125
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
Definition AArch64TargetTransformInfo.cpp:1203
unsigned getGoverningPredicateOperandIdx() const
Definition AArch64TargetTransformInfo.cpp:1175
bool hasMatchingIROpode() const
Definition AArch64TargetTransformInfo.cpp:1209
bool resultIsZeroInitialized() const
Definition AArch64TargetTransformInfo.cpp:1265
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
Definition AArch64TargetTransformInfo.cpp:1216
unsigned getOperandIdxInactiveLanesTakenFrom() const
Definition AArch64TargetTransformInfo.cpp:1230
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
Definition AArch64TargetTransformInfo.cpp:1148
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool isNegatedPowerOf2() const
OperandValueInfo getNoProps() const
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool RuntimeUnrollMultiExit
Allow runtime unrolling multi-exit loops.
unsigned SCEVExpansionBudget
Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.
bool AddAdditionalAccumulators
Allow unrolling to add parallel reduction phis.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...