LLVM: lib/Target/ARM/ARMTargetTransformInfo.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
26#include "llvm/IR/IntrinsicsARM.h"
37#include
38#include
39#include
40#include
41#include
42
43using namespace llvm;
44
45#define DEBUG_TYPE "armtti"
46
49 cl::desc("Enable the generation of masked loads and stores"));
50
53 cl::desc("Disable the generation of low-overhead loops"));
54
57 cl::desc("Enable the generation of WLS loops"));
58
61 cl::desc("Enable the widening of global strings to alignment boundaries"));
62
64
66
68
69
70
71
74 auto *IntrAlign = dyn_cast(II.getArgOperand(1));
75
76 if (!IntrAlign)
77 return nullptr;
78
79 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
80 ? MemAlign
81 : IntrAlign->getLimitedValue();
82
84 return nullptr;
85
87 Align(Alignment));
88}
89
91 const Function *Callee) const {
94 TM.getSubtargetImpl(*Caller)->getFeatureBits();
96 TM.getSubtargetImpl(*Callee)->getFeatureBits();
97
98
99 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
100 (CalleeBits & ~InlineFeaturesAllowed);
101
102
103 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
104 (CalleeBits & InlineFeaturesAllowed);
105 return MatchExact && MatchSubset;
106}
107
111 if (ST->hasMVEIntegerOps())
113
114 if (L->getHeader()->getParent()->hasOptSize())
116
118 L->getNumBlocks() == 1)
120
122}
123
124std::optional<Instruction *>
126 using namespace PatternMatch;
128 switch (IID) {
129 default:
130 break;
131 case Intrinsic::arm_neon_vld1: {
137 }
138 break;
139 }
140
141 case Intrinsic::arm_neon_vld2:
142 case Intrinsic::arm_neon_vld3:
143 case Intrinsic::arm_neon_vld4:
144 case Intrinsic::arm_neon_vld2lane:
145 case Intrinsic::arm_neon_vld3lane:
146 case Intrinsic::arm_neon_vld4lane:
147 case Intrinsic::arm_neon_vst1:
148 case Intrinsic::arm_neon_vst2:
149 case Intrinsic::arm_neon_vst3:
150 case Intrinsic::arm_neon_vst4:
151 case Intrinsic::arm_neon_vst2lane:
152 case Intrinsic::arm_neon_vst3lane:
153 case Intrinsic::arm_neon_vst4lane: {
157 unsigned AlignArg = II.arg_size() - 1;
158 Value *AlignArgOp = II.getArgOperand(AlignArg);
159 MaybeAlign Align = cast(AlignArgOp)->getMaybeAlignValue();
162 II, AlignArg,
164 false));
165 }
166 break;
167 }
168
169 case Intrinsic::arm_neon_vld1x2:
170 case Intrinsic::arm_neon_vld1x3:
171 case Intrinsic::arm_neon_vld1x4:
172 case Intrinsic::arm_neon_vst1x2:
173 case Intrinsic::arm_neon_vst1x3:
174 case Intrinsic::arm_neon_vst1x4: {
178 Align OldAlign = II.getParamAlign(0).valueOrOne();
179 if (NewAlign > OldAlign)
180 II.addParamAttr(0,
182 break;
183 }
184
185 case Intrinsic::arm_mve_pred_i2v: {
186 Value *Arg = II.getArgOperand(0);
188 if (match(Arg, PatternMatch::m_IntrinsicIntrinsic::arm\_mve\_pred\_v2i(
190 II.getType() == ArgArg->getType()) {
192 }
194 if (match(Arg, m_Xor(PatternMatch::m_IntrinsicIntrinsic::arm\_mve\_pred\_v2i(
197 II.getType() == ArgArg->getType()) {
198 if (auto *CI = dyn_cast(XorMask)) {
199 if (CI->getValue().trunc(16).isAllOnes()) {
201 cast(II.getType())->getNumElements(),
204 }
205 }
206 }
209 ScalarKnown)) {
210 return &II;
211 }
212 break;
213 }
214 case Intrinsic::arm_mve_pred_v2i: {
215 Value *Arg = II.getArgOperand(0);
217 if (match(Arg, PatternMatch::m_IntrinsicIntrinsic::arm\_mve\_pred\_i2v(
220 }
221
222 if (II.getMetadata(LLVMContext::MD_range))
223 break;
224
226
227 if (auto CurrentRange = II.getRange()) {
229 if (Range == CurrentRange)
230 break;
231 }
232
233 II.addRangeRetAttr(Range);
234 II.addRetAttr(Attribute::NoUndef);
235 return &II;
236 }
237 case Intrinsic::arm_mve_vadc:
238 case Intrinsic::arm_mve_vadc_predicated: {
239 unsigned CarryOp =
240 (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
241 assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
242 "Bad type for intrinsic!");
243
246 CarryKnown)) {
247 return &II;
248 }
249 break;
250 }
251 case Intrinsic::arm_mve_vmldava: {
253 if (I->hasOneUse()) {
254 auto *User = cast(*I->user_begin());
258 Value *OpX = I->getOperand(4);
259 Value *OpY = I->getOperand(5);
261
265 {I->getOperand(0), I->getOperand(1),
266 I->getOperand(2), OpZ, OpX, OpY});
267
270 }
271 }
272 return std::nullopt;
273 }
274 }
275 return std::nullopt;
276}
277
282 SimplifyAndSetOp) const {
283
284
285
286
287 auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {
288 unsigned NumElts = cast(II.getType())->getNumElements();
289 unsigned IsTop = cast(II.getOperand(TopOpc))->getZExtValue();
290
291
292
293 APInt DemandedElts =
296 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
297
300 return std::nullopt;
301 };
302
303 switch (II.getIntrinsicID()) {
304 default:
305 break;
306 case Intrinsic::arm_mve_vcvt_narrow:
307 SimplifyNarrowInstrTopBottom(2);
308 break;
309 case Intrinsic::arm_mve_vqmovn:
310 SimplifyNarrowInstrTopBottom(4);
311 break;
312 case Intrinsic::arm_mve_vshrn:
313 SimplifyNarrowInstrTopBottom(7);
314 break;
315 }
316
317 return std::nullopt;
318}
319
323
325 if (Bits == 0 || Imm.getActiveBits() >= 64)
326 return 4;
327
328 int64_t SImmVal = Imm.getSExtValue();
329 uint64_t ZImmVal = Imm.getZExtValue();
330 if (!ST->isThumb()) {
331 if ((SImmVal >= 0 && SImmVal < 65536) ||
334 return 1;
335 return ST->hasV6T2Ops() ? 2 : 3;
336 }
338 if ((SImmVal >= 0 && SImmVal < 65536) ||
341 return 1;
342 return ST->hasV6T2Ops() ? 2 : 3;
343 }
344
345 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
346 return 1;
348 return 2;
349
350 return 3;
351}
352
353
354
357 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
358 return 0;
359
360 return 1;
361}
362
363
364
365
370
373 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
374
375 auto isSSatMin = [&](Value *MinInst) {
376 if (isa(MinInst)) {
377 Value *MinLHS, *MinRHS;
383 MinC->getValue() == ((-Imm) - 1))
384 return true;
385 }
386 return false;
387 };
388
390 return cast(Inst->getOperand(1))->getOperand(1);
394 }
395 return nullptr;
396}
397
398
399
401 if (Imm.getBitWidth() != 64 ||
403 return false;
405 if ( && isa(Inst) && Inst->hasOneUse())
407 if ()
408 return false;
409 return isa(FP);
410}
411
416
417
418
419
420 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
421 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
422 Idx == 1)
423 return 0;
424
425
426
427 if (Opcode == Instruction::GetElementPtr && Idx != 0)
428 return 0;
429
430 if (Opcode == Instruction::And) {
431
432 if (Imm == 255 || Imm == 65535)
433 return 0;
434
437 }
438
439 if (Opcode == Instruction::Add)
440
443
444 if (Opcode == Instruction::ICmp && Imm.isNegative() &&
446 int64_t NegImm = -Imm.getSExtValue();
447 if (ST->isThumb2() && NegImm < 1<<12)
448
449 return 0;
450 if (ST->isThumb() && NegImm < 1<<8)
451
452 return 0;
453 }
454
455
456 if (Opcode == Instruction::Xor && Imm.isAllOnes())
457 return 0;
458
459
460
461 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&
464 (isa(Inst) && Inst->hasOneUse() &&
466 return 0;
467 }
468
470 return 0;
471
472
473 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
478 }
479
481}
482
487 (ST->hasNEON() || ST->hasMVEIntegerOps())) {
488
489
490
491
492 return 0;
493 }
495}
496
503 assert(ISD && "Invalid opcode");
504
505
508 return Cost == 0 ? 0 : 1;
510 };
511 auto IsLegalFPType = [this](EVT VT) {
513 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||
514 (EltVT == MVT::f64 && ST->hasFP64()) ||
515 (EltVT == MVT::f16 && ST->hasFullFP16());
516 };
517
520
522 return AdjustCost(
524
525
526
527
528 if ((ST->hasMVEIntegerOps() &&
529 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||
530 Opcode == Instruction::SExt)) ||
531 (ST->hasMVEFloatOps() &&
532 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&
533 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
537
538
554 };
557 return AdjustCost(Entry->Cost);
558
566
567
568
575 };
576 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
577 if (const auto *Entry =
581 }
582
584
587 };
588 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
589 if (const auto *Entry =
593 }
594
595
604 };
605 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
606 if (const auto *Entry =
610 }
611
615 };
616 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
617 if (const auto *Entry =
621 }
622 }
623
624
626 I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
628
629 { ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
630 { ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
631
632 { ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
633 { ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
634
635 { ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
636 { ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
637
638 { ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
639 { ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
640 };
641
642 auto *User = cast(*I->user_begin());
647 return AdjustCost(Entry->Cost);
648 }
649 }
650
651
652 if (Src->isVectorTy() && ST->hasNEON() &&
657 static const CostTblEntry NEONFltDblTbl[] = {
658
662
664 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
665 return AdjustCost(LT.first * Entry->Cost);
666 }
667
668
669
670
678
679
698
699
702
703
706
727
734
735
738
745
752 };
753
754 if (SrcTy.isVector() && ST->hasNEON()) {
758 return AdjustCost(Entry->Cost);
759 }
760
761
783 };
788 return AdjustCost(Entry->Cost);
789 }
790
791
813 };
814
815 if (SrcTy.isInteger() && ST->hasNEON()) {
819 return AdjustCost(Entry->Cost);
820 }
821
822
823
824
838 };
839
840 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
845 }
846
848
849
850
853 int Lanes = 1;
856
857 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
858 return Lanes;
859 else
860 return Lanes * CallCost;
861 }
862
863 if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
865
866
873 }
874
875
877
879
880
885 };
886
891 return AdjustCost(Entry->Cost);
892 }
893
894 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
896 : 1;
897 return AdjustCost(
899}
900
903 unsigned Index, Value *Op0,
905
906
907 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
909 return 3;
910
911 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||
912 Opcode == Instruction::ExtractElement)) {
913
914
915 if (cast(ValTy)->getElementType()->isIntegerTy())
916 return 3;
917
918
919
922 return std::max(
924 2U);
925 }
926
927 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||
928 Opcode == Instruction::ExtractElement)) {
929
930
931
932 std::pair<InstructionCost, MVT> LT =
935 }
936
938}
939
945
946
948 ST->isThumb() && !ValTy->isVectorTy()) {
949
950 if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
952
953
954
955
956
958
959
961
962
963
966
968 }
969
970
971
972
974 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&
976 Sel = cast(Sel->user_back());
981 unsigned IID = 0;
982 switch (SPF) {
984 IID = Intrinsic::abs;
985 break;
987 IID = Intrinsic::smin;
988 break;
990 IID = Intrinsic::smax;
991 break;
993 IID = Intrinsic::umin;
994 break;
996 IID = Intrinsic::umax;
997 break;
999 IID = Intrinsic::minnum;
1000 break;
1002 IID = Intrinsic::maxnum;
1003 break;
1004 default:
1005 break;
1006 }
1007 if (IID) {
1008
1009 if (Sel != I)
1010 return 0;
1013 }
1014 }
1015
1016
1018
1020 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
1021 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
1022 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
1023 };
1024
1031 return Entry->Cost;
1032 }
1033
1035 return LT.first;
1036 }
1037
1038 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
1039 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1040 cast(ValTy)->getNumElements() > 1) {
1041 FixedVectorType *VecValTy = cast(ValTy);
1042 FixedVectorType *VecCondTy = dyn_cast_or_null(CondTy);
1043 if (!VecCondTy)
1045
1046
1047 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
1048
1049
1051 true, CostKind) +
1053 false, CostKind) +
1058 }
1059
1062
1063
1064
1065
1066
1067 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
1068 if (LT.first > 1)
1069 return LT.first * BaseCost +
1071 false, CostKind);
1072 return BaseCost;
1073 }
1074 }
1075
1076
1077
1078 int BaseCost = 1;
1079 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
1081
1084}
1085
1089
1090
1091
1092
1093 unsigned NumVectorInstToHideOverhead = 10;
1094 int MaxMergeDistance = 64;
1095
1096 if (ST->hasNEON()) {
1099 return NumVectorInstToHideOverhead;
1100
1101
1102
1103 return 1;
1104 }
1106}
1107
1110
1111
1112 switch (II->getIntrinsicID()) {
1113 case Intrinsic::arm_mve_vctp8:
1114 case Intrinsic::arm_mve_vctp16:
1115 case Intrinsic::arm_mve_vctp32:
1116 case Intrinsic::arm_mve_vctp64:
1117 return true;
1118 default:
1119 break;
1120 }
1121 }
1122 return false;
1123}
1124
1127 return false;
1128
1129 if (auto *VecTy = dyn_cast(DataTy)) {
1130
1131 if (VecTy->getNumElements() == 2)
1132 return false;
1133
1134
1136 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
1137 return false;
1138 }
1139
1141 return (EltWidth == 32 && Alignment >= 4) ||
1142 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);
1143}
1144
1147 return false;
1148
1150 return ((EltWidth == 32 && Alignment >= 4) ||
1151 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);
1152}
1153
1154
1155
1156
1159 unsigned DstAddrSpace = ~0u;
1160 unsigned SrcAddrSpace = ~0u;
1161 const Function *F = I->getParent()->getParent();
1162
1163 if (const auto *MC = dyn_cast(I)) {
1164 ConstantInt *C = dyn_cast(MC->getLength());
1165
1166 if ()
1167 return -1;
1168
1169 const unsigned Size = C->getValue().getZExtValue();
1170 const Align DstAlign = *MC->getDestAlign();
1171 const Align SrcAlign = *MC->getSourceAlign();
1172
1173 MOp = MemOp::Copy(Size, false, DstAlign, SrcAlign,
1174 false);
1175 DstAddrSpace = MC->getDestAddressSpace();
1176 SrcAddrSpace = MC->getSourceAddressSpace();
1177 }
1178 else if (const auto *MS = dyn_cast(I)) {
1179 ConstantInt *C = dyn_cast(MS->getLength());
1180
1181 if ()
1182 return -1;
1183
1184 const unsigned Size = C->getValue().getZExtValue();
1185 const Align DstAlign = *MS->getDestAlign();
1186
1187 MOp = MemOp::Set(Size, false, DstAlign,
1188 false, false);
1189 DstAddrSpace = MS->getDestAddressSpace();
1190 }
1191 else
1193
1194 unsigned Limit, Factor = 2;
1195 switch(I->getIntrinsicID()) {
1196 case Intrinsic::memcpy:
1198 break;
1199 case Intrinsic::memmove:
1201 break;
1202 case Intrinsic::memset:
1204 Factor = 1;
1205 break;
1206 default:
1208 }
1209
1210
1211
1212
1213 std::vector MemOps;
1214 if (getTLI()->findOptimalMemOpLowering(
1215 MemOps, Limit, MOp, DstAddrSpace,
1216 SrcAddrSpace, F->getAttributes()))
1217 return MemOps.size() * Factor;
1218
1219
1220 return -1;
1221}
1222
1224 int NumOps = getNumMemOps(cast(I));
1225
1226
1227
1228 if (NumOps == -1)
1229 return 4;
1230 return NumOps;
1231}
1232
1240
1242 if (IsExtractSubvector)
1244 if (ST->hasNEON()) {
1247
1254
1259
1261 if (const auto *Entry =
1263 return LT.first * Entry->Cost;
1264 }
1266 static const CostTblEntry NEONShuffleTbl[] = {
1267
1268
1275
1280
1282 if (const auto *Entry =
1284 return LT.first * Entry->Cost;
1285 }
1287 static const CostTblEntry NEONSelShuffleTbl[] = {
1288
1289
1290
1291
1296
1300
1302
1304
1306 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
1308 return LT.first * Entry->Cost;
1309 }
1310 }
1311 if (ST->hasMVEIntegerOps()) {
1314
1320
1323 LT.second))
1324 return LT.first * Entry->Cost *
1326 }
1327
1328 if (!Mask.empty()) {
1330 if (LT.second.isVector() &&
1331 Mask.size() <= LT.second.getVectorNumElements() &&
1335 }
1336 }
1337
1338
1339 if (IsExtractSubvector)
1341 int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
1343 : 1;
1344 return BaseCost *
1346}
1347
1355
1356
1357
1358 switch (ISDOpcode) {
1359 default:
1360 break;
1363 return 2;
1365 return 3;
1366 }
1367 }
1368
1370
1371 if (ST->hasNEON()) {
1372 const unsigned FunctionCallDivCost = 20;
1373 const unsigned ReciprocalDivCost = 10;
1375
1376
1377
1378
1379 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1380 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
1381 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
1382 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
1383 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1384 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
1385 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
1386 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
1387 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
1388 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
1389 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
1390 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
1391 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
1392 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
1393 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
1394 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
1395
1396 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1397 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
1398 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
1399 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
1400 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1401 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
1402 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
1403 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
1404 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1405 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
1406 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
1407 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
1408 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1409 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
1410 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
1411 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
1412
1413 };
1414
1415 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
1416 return LT.first * Entry->Cost;
1417
1419 Opcode, Ty, CostKind, Op1Info, Op2Info);
1420
1421
1422
1423
1424
1425
1426
1427
1428 if (LT.second == MVT::v2i64 && Op2Info.isUniform() && Op2Info.isConstant())
1430
1431 return Cost;
1432 }
1433
1434
1435
1436 auto LooksLikeAFreeShift = [&]() {
1438 return false;
1439
1441 return false;
1443 return false;
1444
1445
1446 switch (cast(CxtI->user_back())->getOpcode()) {
1447 case Instruction::Add:
1448 case Instruction::Sub:
1449 case Instruction::And:
1450 case Instruction::Xor:
1451 case Instruction::Or:
1452 case Instruction::ICmp:
1453 return true;
1454 default:
1455 return false;
1456 }
1457 };
1458 if (LooksLikeAFreeShift())
1459 return 0;
1460
1461
1462
1463 int BaseCost = 1;
1464 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
1466
1467
1468
1469
1470
1472 return LT.first * BaseCost;
1473
1474
1475 if (auto *VTy = dyn_cast(Ty)) {
1476 unsigned Num = VTy->getNumElements();
1479
1480
1484 }
1485
1486 return BaseCost;
1487}
1488
1495
1497 return 1;
1498
1499
1503
1504 if (ST->hasNEON() && Src->isVectorTy() &&
1505 (Alignment && *Alignment != Align(16)) &&
1506 cast(Src)->getElementType()->isDoubleTy()) {
1507
1508
1510 return LT.first * 4;
1511 }
1512
1513
1514
1515 if (ST->hasMVEFloatOps() && isa(Src) && I &&
1516 ((Opcode == Instruction::Load && I->hasOneUse() &&
1517 isa(*I->user_begin())) ||
1518 (Opcode == Instruction::Store && isa(I->getOperand(0))))) {
1520 Type *DstTy =
1521 Opcode == Instruction::Load
1522 ? (*I->user_begin())->getType()
1523 : cast(I->getOperand(0))->getOperand(0)->getType();
1527 }
1528
1529 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
1531 : 1;
1534}
1535
1540 if (ST->hasMVEIntegerOps()) {
1541 if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
1543 if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
1545 }
1546 if (!isa(Src))
1549
1550
1551 return cast(Src)->getNumElements() * 8;
1552}
1553
1557 bool UseMaskForCond, bool UseMaskForGaps) {
1558 assert(Factor >= 2 && "Invalid interleave factor");
1559 assert(isa(VecTy) && "Expect a vector type");
1560
1561
1563
1564 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
1565 !UseMaskForCond && !UseMaskForGaps) {
1566 unsigned NumElts = cast(VecTy)->getNumElements();
1567 auto *SubVecTy =
1569
1570
1571
1572
1573 int BaseCost =
1575 if (NumElts % Factor == 0 &&
1578
1579
1580
1581
1582
1583
1584 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
1587 return 2 * BaseCost;
1588 }
1589
1592 UseMaskForCond, UseMaskForGaps);
1593}
1594
1596 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1598 using namespace PatternMatch;
1602
1603 assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
1604 auto *VTy = cast(DataTy);
1605
1606
1607
1608 unsigned NumElems = VTy->getNumElements();
1609 unsigned EltSize = VTy->getScalarSizeInBits();
1611
1612
1613
1614
1615
1616
1619
1620
1621
1622
1624 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
1629
1630 if (EltSize < 8 || Alignment < EltSize / 8)
1631 return ScalarCost;
1632
1633 unsigned ExtSize = EltSize;
1634
1635 if (I != nullptr) {
1636
1637
1638
1639 if ((I->getOpcode() == Instruction::Load ||
1640 match(I, m_IntrinsicIntrinsic::masked\_gather())) &&
1641 I->hasOneUse()) {
1642 const User *Us = *I->users().begin();
1643 if (isa(Us) || isa(Us)) {
1644
1646 cast(Us)->getType()->getScalarSizeInBits();
1647 if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||
1648 (TypeSize == 16 && EltSize == 8)) &&
1649 TypeSize * NumElems == 128) {
1651 }
1652 }
1653 }
1654
1656 if ((I->getOpcode() == Instruction::Store ||
1657 match(I, m_IntrinsicIntrinsic::masked\_scatter())) &&
1658 (T = dyn_cast(I->getOperand(0)))) {
1659
1660 unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();
1661 if (((EltSize == 16 && TypeSize == 32) ||
1665 }
1666 }
1667
1668 if (ExtSize * NumElems != 128 || NumElems < 4)
1669 return ScalarCost;
1670
1671
1672 if (ExtSize == 32)
1673 return VectorCost;
1674
1675
1676
1677 if (ExtSize != 8 && ExtSize != 16)
1678 return ScalarCost;
1679
1680 if (const auto *BC = dyn_cast(Ptr))
1681 Ptr = BC->getOperand(0);
1682 if (const auto *GEP = dyn_cast(Ptr)) {
1683 if (GEP->getNumOperands() != 2)
1684 return ScalarCost;
1686
1687 if (Scale != 1 && Scale * 8 != ExtSize)
1688 return ScalarCost;
1689
1690 if (const auto *ZExt = dyn_cast(GEP->getOperand(1))) {
1691 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
1692 return VectorCost;
1693 }
1694 return ScalarCost;
1695 }
1696 return ScalarCost;
1697}
1698
1701 std::optional FMF,
1703
1707
1708
1709
1710
1712 ((EltSize == 32 && ST->hasVFP2Base()) ||
1713 (EltSize == 64 && ST->hasFP64()) ||
1714 (EltSize == 16 && ST->hasFullFP16()))) {
1715 unsigned NumElts = cast(ValTy)->getNumElements();
1716 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1719 NumElts * EltSize > VecLimit) {
1722 NumElts /= 2;
1723 }
1724
1725
1726
1731 NumElts /= 2;
1733 ExtractCost = NumElts / 2;
1734
1735 return VecCost + ExtractCost +
1736 NumElts *
1738 }
1739
1741 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {
1742 unsigned NumElts = cast(ValTy)->getNumElements();
1743 unsigned VecLimit =
1744 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1746 while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1749 NumElts /= 2;
1750 }
1751
1752
1754 NumElts * EltSize == 64) {
1758 NumElts /= 2;
1759 }
1760
1761
1763 return VecCost + ExtractCost +
1766 }
1767
1768 if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD ||
1771
1773
1778 };
1779 if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
1781
1783}
1784
1786 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
1790
1792
1793 switch (ISD) {
1795 if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
1797
1798
1799
1800
1801
1802
1803
1806 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1807 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||
1808 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1810 }
1811 break;
1812 default:
1813 break;
1814 }
1817}
1818
1825
1826 if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
1828
1829
1830
1831
1832
1833
1834
1837 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
1838 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||
1839 (LT.second == MVT::v4i32 && RevVTSize <= 64)))
1841 }
1842
1844}
1845
1851
1852
1853
1854
1855 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&
1859 unsigned NumElts = cast(Ty)->getNumElements();
1861 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);
1863 while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {
1867 NumElts /= 2;
1868 }
1869
1870
1871
1874 NumElts == 8) {
1876 NumElts /= 2;
1878 ExtractCost = cast(Ty)->getNumElements() / 2;
1879
1881 {Ty->getElementType(), Ty->getElementType()},
1882 FMF);
1883 return VecCost + ExtractCost +
1885 }
1886
1887 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||
1888 IID == Intrinsic::umin || IID == Intrinsic::umax) {
1890
1891
1892
1893
1898 };
1901 }
1902
1904}
1905
1909 unsigned Opc = ICA.getID();
1910 switch (Opc) {
1911 case Intrinsic::get_active_lane_mask:
1912
1913
1914
1915
1916
1917
1918
1919 if (ST->hasMVEIntegerOps())
1920 return 0;
1921 break;
1922 case Intrinsic::sadd_sat:
1923 case Intrinsic::ssub_sat:
1924 case Intrinsic::uadd_sat:
1925 case Intrinsic::usub_sat: {
1926 bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1927 bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);
1929
1930 if (auto *ITy = dyn_cast(RetTy)) {
1931 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)
1932 return 1;
1933 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))
1934 return 2;
1935
1936
1938 Type *CondTy = RetTy->getWithNewBitWidth(1);
1945 }
1946
1947 if (!ST->hasMVEIntegerOps())
1948 break;
1949
1951 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1952 LT.second == MVT::v16i8) {
1953
1954
1955 unsigned Instrs =
1956 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1
1957 : 4;
1959 }
1960 break;
1961 }
1962 case Intrinsic::abs:
1963 case Intrinsic::smin:
1964 case Intrinsic::smax:
1965 case Intrinsic::umin:
1966 case Intrinsic::umax: {
1967 if (!ST->hasMVEIntegerOps())
1968 break;
1970
1972 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
1973 LT.second == MVT::v16i8)
1975 break;
1976 }
1977 case Intrinsic::minnum:
1978 case Intrinsic::maxnum: {
1979 if (!ST->hasMVEFloatOps())
1980 break;
1983 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)
1985 break;
1986 }
1987 case Intrinsic::fptosi_sat:
1988 case Intrinsic::fptoui_sat: {
1990 break;
1991 bool IsSigned = Opc == Intrinsic::fptosi_sat;
1994
1995 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||
1996 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||
1997 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
1998 return LT.first;
1999
2000
2001 if (ST->hasMVEFloatOps() &&
2002 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&
2005
2006
2007 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||
2008 (ST->hasFP64() && LT.second == MVT::f64) ||
2009 (ST->hasFullFP16() && LT.second == MVT::f16) ||
2010 (ST->hasMVEFloatOps() &&
2011 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&
2014 LT.second.getScalarSizeInBits());
2018 : Intrinsic::umin,
2019 LegalTy, {LegalTy, LegalTy});
2022 : Intrinsic::umax,
2023 LegalTy, {LegalTy, LegalTy});
2025 return LT.first * Cost;
2026 }
2027
2028
2036 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
2038 if (IsSigned) {
2039 Type *CondTy = RetTy->getWithNewBitWidth(1);
2044 }
2045 return Cost;
2046 }
2047 }
2048
2050}
2051
2053 if (->isIntrinsic())
2055
2056
2057 if (F->getName().starts_with("llvm.arm"))
2058 return false;
2059
2060 switch (F->getIntrinsicID()) {
2061 default: break;
2062 case Intrinsic::powi:
2063 case Intrinsic::sin:
2064 case Intrinsic::cos:
2065 case Intrinsic::sincos:
2066 case Intrinsic::pow:
2067 case Intrinsic:🪵
2068 case Intrinsic::log10:
2069 case Intrinsic::log2:
2070 case Intrinsic::exp:
2071 case Intrinsic::exp2:
2072 return true;
2073 case Intrinsic::sqrt:
2074 case Intrinsic::fabs:
2075 case Intrinsic::copysign:
2076 case Intrinsic:🤣
2077 case Intrinsic::ceil:
2078 case Intrinsic::trunc:
2079 case Intrinsic::rint:
2080 case Intrinsic::nearbyint:
2081 case Intrinsic::round:
2082 case Intrinsic::canonicalize:
2083 case Intrinsic::lround:
2084 case Intrinsic::llround:
2085 case Intrinsic::lrint:
2086 case Intrinsic::llrint:
2087 if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
2088 return true;
2089 if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
2090 return true;
2091
2092
2093
2095 case Intrinsic::masked_store:
2096 case Intrinsic::masked_load:
2097 case Intrinsic::masked_gather:
2098 case Intrinsic::masked_scatter:
2099 return !ST->hasMVEIntegerOps();
2100 case Intrinsic::sadd_with_overflow:
2101 case Intrinsic::uadd_with_overflow:
2102 case Intrinsic::ssub_with_overflow:
2103 case Intrinsic::usub_with_overflow:
2104 case Intrinsic::sadd_sat:
2105 case Intrinsic::uadd_sat:
2106 case Intrinsic::ssub_sat:
2107 case Intrinsic::usub_sat:
2108 return false;
2109 }
2110
2112}
2113
2118 return true;
2119
2120
2121
2122 if (auto *Call = dyn_cast(&I)) {
2123 if (auto *II = dyn_cast(Call)) {
2124 switch(II->getIntrinsicID()) {
2125 case Intrinsic::memcpy:
2126 case Intrinsic::memset:
2127 case Intrinsic::memmove:
2129 default:
2130 if (const Function *F = Call->getCalledFunction())
2132 }
2133 }
2134 return true;
2135 }
2136
2137
2138
2139 switch (I.getOpcode()) {
2140 default:
2141 break;
2142 case Instruction::FPToSI:
2143 case Instruction::FPToUI:
2144 case Instruction::SIToFP:
2145 case Instruction::UIToFP:
2146 case Instruction::FPTrunc:
2147 case Instruction::FPExt:
2149 }
2150
2151
2152
2153
2154
2155
2156
2158 switch (ISD) {
2159 default:
2160 break;
2167 return true;
2168 }
2169 }
2170
2171
2173 return false;
2174
2175
2177 switch (I.getOpcode()) {
2178 default:
2179 return true;
2180 case Instruction::Alloca:
2181 case Instruction::Load:
2182 case Instruction::Store:
2183 case Instruction::Select:
2184 case Instruction::PHI:
2185 return false;
2186 }
2187 }
2188
2189
2190
2191 if (I.getType()->isDoubleTy() && !ST->hasFP64())
2192 return true;
2193
2194
2195 if (I.getType()->isHalfTy() && !ST->hasFullFP16())
2196 return true;
2197
2198 return false;
2199}
2200
2205
2206
2209 return false;
2210 }
2211
2214 return false;
2215 }
2216
2218 if (isa(BackedgeTakenCount)) {
2219 LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n");
2220 return false;
2221 }
2222
2223 const SCEV *TripCountSCEV =
2226
2227
2229 LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n");
2230 return false;
2231 }
2232
2233
2234
2235
2236 auto IsHardwareLoopIntrinsic = [](Instruction &I) {
2237 if (auto *Call = dyn_cast(&I)) {
2238 switch (Call->getIntrinsicID()) {
2239 default:
2240 break;
2241 case Intrinsic::start_loop_iterations:
2242 case Intrinsic::test_start_loop_iterations:
2243 case Intrinsic::loop_decrement:
2244 case Intrinsic::loop_decrement_reg:
2245 return true;
2246 }
2247 }
2248 return false;
2249 };
2250
2251
2252
2253
2254 bool IsTailPredLoop = false;
2255 auto ScanLoop = [&](Loop *L) {
2256 for (auto *BB : L->getBlocks()) {
2257 for (auto &I : *BB) {
2259 isa(I)) {
2260 LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
2261 return false;
2262 }
2263 if (auto *II = dyn_cast(&I))
2264 IsTailPredLoop |=
2265 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||
2266 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||
2267 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||
2268 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||
2269 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
2270 }
2271 }
2272 return true;
2273 };
2274
2275
2276 for (auto *Inner : *L)
2277 if (!ScanLoop(Inner))
2278 return false;
2279
2280 if (!ScanLoop(L))
2281 return false;
2282
2283
2284
2285
2286
2287 LLVMContext &C = L->getHeader()->getContext();
2293 return true;
2294}
2295
2297
2298
2299 if (isa(&I) && ++ICmpCount > 1)
2300 return false;
2301
2302
2303
2304
2305
2306
2307
2308 if (auto *II = dyn_cast(&I))
2309 if ((II->getIntrinsicID() == Intrinsic::smin ||
2310 II->getIntrinsicID() == Intrinsic::smax ||
2311 II->getIntrinsicID() == Intrinsic::umin ||
2312 II->getIntrinsicID() == Intrinsic::umax) &&
2313 ++ICmpCount > 1)
2314 return false;
2315
2316 if (isa(&I))
2317 return false;
2318
2319
2320
2322 return false;
2323
2324
2326 if (.getOperand(0)->hasOneUse() || !isa(I.getOperand(0)))
2327 return false;
2328
2329
2330 if (isa(&I) )
2331 if (.hasOneUse() || !isa(*I.user_begin()))
2332 return false;
2333
2334 return true;
2335}
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2350 LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
2351
2352
2353
2354
2355
2356
2357
2358
2361 bool ReductionsDisabled =
2364
2365 for (auto *I : LiveOuts) {
2366 if (->getType()->isIntegerTy() &&
->getType()->isFloatTy() &&
2367 ->getType()->isHalfTy()) {
2368 LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
2369 "live-out value\n");
2370 return false;
2371 }
2372 if (ReductionsDisabled) {
2374 return false;
2375 }
2376 }
2377
2378
2381 int ICmpCount = 0;
2382
2383 for (BasicBlock *BB : L->blocks()) {
2384 for (Instruction &I : BB->instructionsWithoutDebug()) {
2385 if (isa(&I))
2386 continue;
2388 LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump());
2389 return false;
2390 }
2391
2393 if (T->getScalarSizeInBits() > 32) {
2395 return false;
2396 }
2400 int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L).value_or(0);
2401 if (NextStride == 1) {
2402
2403
2404
2405 continue;
2406 } else if (NextStride == -1 ||
2410 << "Consecutive strides of 2 found, vld2/vstr2 can't "
2411 "be tail-predicated\n.");
2412 return false;
2413
2415
2416
2417
2418
2420 if (auto AR = dyn_cast(PtrScev)) {
2421 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
2423 continue;
2424 }
2425 }
2427 "tail-predicate\n.");
2428 return false;
2429 }
2430 }
2431 }
2432
2433 LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");
2434 return true;
2435}
2436
2439 LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
2440 return false;
2441 }
2442
2443
2444
2445
2446 if (!ST->hasMVEIntegerOps())
2447 return false;
2448
2451
2452
2453 if (L->getNumBlocks() > 1) {
2454 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "
2455 "loop.\n");
2456 return false;
2457 }
2458
2459 assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
2460
2464 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
2465 "analyzable.\n");
2466 return false;
2467 }
2468
2471
2472
2473
2475 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
2476 "profitable.\n");
2477 return false;
2478 }
2479
2482 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
2483 "a candidate.\n");
2484 return false;
2485 }
2486
2488}
2489
2494
2495
2496
2497
2498
2500}
2504
2505
2506
2508 !ST->hasMVEIntegerOps() || (*L->getHeader(), [](Instruction &I) {
2509 return isa(I) &&
2510 cast(I).getIntrinsicID() ==
2511 Intrinsic::get_active_lane_mask;
2512 });
2513
2514
2517
2518
2521 if (L->getHeader()->getParent()->hasOptSize())
2522 return;
2523
2525 L->getExitingBlocks(ExitingBlocks);
2527 << "Blocks: " << L->getNumBlocks() << "\n"
2528 << "Exit blocks: " << ExitingBlocks.size() << "\n");
2529
2530
2531
2532 if (ExitingBlocks.size() > 2)
2533 return;
2534
2535
2536
2537 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
2538 return;
2539
2540
2542 return;
2543
2544
2545
2547 for (auto *BB : L->getBlocks()) {
2548 for (auto &I : *BB) {
2549
2550
2551 if (I.getType()->isVectorTy())
2552 return;
2553
2557 continue;
2558 }
2559 return;
2560 }
2561
2565 }
2566 }
2567
2568
2569
2570
2571
2572
2573
2576 unsigned ExitingValues = 0;
2578 L->getExitBlocks(ExitBlocks);
2579 for (auto *Exit : ExitBlocks) {
2580
2581
2582 unsigned LiveOuts = count_if(Exit->phis(), [](auto &PH) {
2583 return PH.getNumOperands() != 1 ||
2584 !isa(PH.getOperand(0));
2585 });
2586 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
2587 }
2588 if (ExitingValues)
2591 return;
2592 }
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2604 if (ST->hasLOB()) {
2607 auto *Outer = L->getOutermostLoop();
2608 if ((L != Outer && Outer != L->getParentLoop()) ||
2609 (L != Outer && BETC && !SE.isLoopInvariant(BETC, Outer))) {
2611 }
2612 }
2613 }
2614
2617
2624
2625
2626
2627 if (Cost < 12)
2628 UP.Force = true;
2629}
2630
2634}
2635
2638 if (!ST->hasMVEIntegerOps())
2639 return false;
2640
2642 switch (Opcode) {
2643 case Instruction::Add:
2644 return ScalarBits <= 64;
2645 default:
2646 return false;
2647 }
2648}
2649
2652 if (!ST->hasMVEIntegerOps())
2653 return false;
2654 return true;
2655}
2656
2659 bool HasBaseReg, int64_t Scale,
2660 unsigned AddrSpace) const {
2665 AM.Scale = Scale;
2668 if (ST->hasFPAO())
2669 return AM.Scale < 0 ? 1 : 0;
2670 return 0;
2671 }
2672 return -1;
2673}
2674
2676 if (Thumb) {
2677
2678
2679
2680 return ST->isThumb2() || ST->hasV8MBaselineOps();
2681 } else {
2682
2683
2685 }
2686}
2687
2688
2689
2691 using namespace PatternMatch;
2692
2693 auto areExtDoubled = [](Instruction *Ext) {
2694 return Ext->getType()->getScalarSizeInBits() ==
2695 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
2696 };
2697
2700 !areExtDoubled(cast(Ext1)) ||
2701 !areExtDoubled(cast(Ext2)))
2702 return false;
2703
2704 return true;
2705}
2706
2707
2708
2709
2712 using namespace PatternMatch;
2713
2714 if (->getType()->isVectorTy())
2715 return false;
2716
2717 if (ST->hasNEON()) {
2718 switch (I->getOpcode()) {
2719 case Instruction::Sub:
2720 case Instruction::Add: {
2722 return false;
2723 Ops.push_back(&I->getOperandUse(0));
2724 Ops.push_back(&I->getOperandUse(1));
2725 return true;
2726 }
2727 default:
2728 return false;
2729 }
2730 }
2731
2732 if (!ST->hasMVEIntegerOps())
2733 return false;
2734
2736 if (->hasOneUse())
2737 return false;
2738 auto *Sub = cast(*I->users().begin());
2739 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
2740 };
2744 return true;
2745 return false;
2746 };
2747
2748 auto IsSinker = [&](Instruction *I, int Operand) {
2749 switch (I->getOpcode()) {
2750 case Instruction::Add:
2751 case Instruction::Mul:
2752 case Instruction::FAdd:
2753 case Instruction::ICmp:
2754 case Instruction::FCmp:
2755 return true;
2756 case Instruction::FMul:
2757 return !IsFMSMul(I);
2758 case Instruction::Sub:
2759 case Instruction::FSub:
2760 case Instruction::Shl:
2761 case Instruction::LShr:
2762 case Instruction::AShr:
2763 return Operand == 1;
2764 case Instruction::Call:
2765 if (auto *II = dyn_cast(I)) {
2766 switch (II->getIntrinsicID()) {
2767 case Intrinsic::fma:
2768 return !IsFMS(I);
2769 case Intrinsic::sadd_sat:
2770 case Intrinsic::uadd_sat:
2771 case Intrinsic::arm_mve_add_predicated:
2772 case Intrinsic::arm_mve_mul_predicated:
2773 case Intrinsic::arm_mve_qadd_predicated:
2774 case Intrinsic::arm_mve_vhadd:
2775 case Intrinsic::arm_mve_hadd_predicated:
2776 case Intrinsic::arm_mve_vqdmull:
2777 case Intrinsic::arm_mve_vqdmull_predicated:
2778 case Intrinsic::arm_mve_vqdmulh:
2779 case Intrinsic::arm_mve_qdmulh_predicated:
2780 case Intrinsic::arm_mve_vqrdmulh:
2781 case Intrinsic::arm_mve_qrdmulh_predicated:
2782 case Intrinsic::arm_mve_fma_predicated:
2783 return true;
2784 case Intrinsic::ssub_sat:
2785 case Intrinsic::usub_sat:
2786 case Intrinsic::arm_mve_sub_predicated:
2787 case Intrinsic::arm_mve_qsub_predicated:
2788 case Intrinsic::arm_mve_hsub_predicated:
2789 case Intrinsic::arm_mve_vhsub:
2790 return Operand == 1;
2791 default:
2792 return false;
2793 }
2794 }
2795 return false;
2796 default:
2797 return false;
2798 }
2799 };
2800
2801 for (auto OpIdx : enumerate(I->operands())) {
2802 Instruction *Op = dyn_cast(OpIdx.value().get());
2803
2804 if ( || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2805 continue;
2806
2808 if (Shuffle->getOpcode() == Instruction::BitCast)
2809 Shuffle = dyn_cast(Shuffle->getOperand(0));
2810
2814 continue;
2815 if (!IsSinker(I, OpIdx.index()))
2816 continue;
2817
2818
2819
2820 for (Use &U : Op->uses()) {
2822 if (!IsSinker(Insn, U.getOperandNo()))
2823 return false;
2824 }
2825
2827 if (Shuffle != Op)
2830 }
2831 return true;
2832}
2833
2837 LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");
2838 return false;
2839 }
2840
2841
2844 return 0;
2845
2846
2847 if (Size % 4 == 0)
2848 return 0;
2849
2850 unsigned NumBytesToPad = 4 - (Size % 4);
2851 unsigned NewSize = Size + NumBytesToPad;
2852
2853
2854
2856
2857 if (NewSize > MaxMemIntrinsicSize)
2858 return 0;
2859
2860 return NumBytesToPad;
2861}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
This file implements a class to represent arbitrary precision integral constant values and operations...
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm)
static cl::opt< bool > AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), cl::desc("Enable the generation of WLS loops"))
static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)
Convert a vector load intrinsic into a simple llvm load instruction.
static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm)
static cl::opt< bool > UseWidenGlobalArrays("widen-global-strings", cl::Hidden, cl::init(true), cl::desc("Enable the widening of global strings to alignment boundaries"))
cl::opt< bool > EnableMaskedGatherScatters
static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor
cl::opt< TailPredication::Mode > EnableTailPredication
static cl::opt< bool > DisableLowOverheadLoops("disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops"))
static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, const DataLayout &DL, const LoopAccessInfo *LAI)
static cl::opt< bool > EnableMaskedLoadStores("enable-arm-maskedldst", cl::Hidden, cl::init(true), cl::desc("Enable the generation of masked loads and stores"))
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))
This file defines the LoopVectorizationLegality class.
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool isThumb1Only() const
bool hasFPARMv8Base() const
unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool maybeLoweredToCall(Instruction &I)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
bool isLegalMaskedStore(Type *DataTy, Align Alignment)
bool isLegalMaskedLoad(Type *DataTy, Align Alignment)
InstructionCost getMemcpyCost(const Instruction *I)
unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isLoweredToCall(const Function *F)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
int getNumMemOps(const IntrinsicInst *I) const
Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool hasArmWideBranch(bool Thumb) const
uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalMaskedGather(Type *Ty, Align Alignment)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
bool isProfitableLSRChainElement(Instruction *I)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool useSoftFloat() const override
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
ConstantInt * getTrue()
Get the constant value for i1 true.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
AssumptionCache & getAssumptionCache() const
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Drive the analysis of memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
LoopInfo * getLoopInfo() const
DominatorTree * getDominatorTree() const
AssumptionCache * getAssumptionCache() const
const LoopAccessInfo * getLAI() const
ScalarEvolution * getScalarEvolution() const
Represents a single loop in the control flow graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L)
Return true if the specified loop has an analyzable loop-invariant backedge-taken count.
APInt getUnsignedRangeMax(const SCEV *S)
Determine the max of the unsigned range for a particular SCEV.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Primary interface to the complete machine description for the target machine.
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
bool isLoweredToCall(const Function *F) const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
@ TCC_Expensive
The cost of a 'div' instruction on x86.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Type * getArrayElementType() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
@ C
The default llvm calling convention, compatible with C.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ SIGN_EXTEND
Conversion operators.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ ForceEnabledNoReductions
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
@ Runtime
Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_FMAXNUM
Floating point minnum.
@ SPF_UMIN
Signed minimum.
@ SPF_UMAX
Signed maximum.
@ SPF_SMAX
Unsigned minimum.
@ SPF_FMINNUM
Unsigned maximum.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
DWARFExpression::Operation Op
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
@ Data
Use predicate only to mask operations on data in the loop.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool isInteger() const
Return true if this is an integer or a vector integer type.
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
SelectPatternFlavor Flavor
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Type Conversion Cost Table.