ARMTargetTransformInfo.cpp Source File (original) (raw)

26#include "llvm/IR/IntrinsicsARM.h"

37#include

38#include

39#include

40#include

41#include

43using namespace llvm;

45#define DEBUG_TYPE "armtti"

49 cl::desc("Enable the generation of masked loads and stores"));

53 cl::desc("Disable the generation of low-overhead loops"));

57 cl::desc("Enable the generation of WLS loops"));

61 cl::desc("Enable the widening of global strings to alignment boundaries"));

72 "Threshold for forced unrolling of small loops in Arm architecture"));

81 if (!IntrAlign)

82 return nullptr;

84 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign

85 ? MemAlign

86 : IntrAlign->getLimitedValue();

89 return nullptr;

91 return Builder.CreateAlignedLoad(II.getType(), II.getArgOperand(0),

92 Align(Alignment));

93}

96 const Function *Callee) const {

97 const TargetMachine &TM = getTLI()->getTargetMachine();

102

103

104 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==

105 (CalleeBits & ~InlineFeaturesAllowed);

106

107

108 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==

109 (CalleeBits & InlineFeaturesAllowed);

110 return MatchExact && MatchSubset;

111}

112

116 if (ST->hasMVEIntegerOps())

118

119 if (L->getHeader()->getParent()->hasOptSize())

121

122 if (ST->isMClass() && ST->isThumb2() &&

123 L->getNumBlocks() == 1)

125

127}

128

129std::optional<Instruction *>

133 switch (IID) {

134 default:

135 break;

136 case Intrinsic::arm_neon_vld1: {

142 }

143 break;

144 }

145

146 case Intrinsic::arm_neon_vld2:

147 case Intrinsic::arm_neon_vld3:

148 case Intrinsic::arm_neon_vld4:

149 case Intrinsic::arm_neon_vld2lane:

150 case Intrinsic::arm_neon_vld3lane:

151 case Intrinsic::arm_neon_vld4lane:

152 case Intrinsic::arm_neon_vst1:

153 case Intrinsic::arm_neon_vst2:

154 case Intrinsic::arm_neon_vst3:

155 case Intrinsic::arm_neon_vst4:

156 case Intrinsic::arm_neon_vst2lane:

157 case Intrinsic::arm_neon_vst3lane:

158 case Intrinsic::arm_neon_vst4lane: {

162 unsigned AlignArg = II.arg_size() - 1;

163 Value *AlignArgOp = II.getArgOperand(AlignArg);

167 II, AlignArg,

169 false));

170 }

171 break;

172 }

173

174 case Intrinsic::arm_neon_vld1x2:

175 case Intrinsic::arm_neon_vld1x3:

176 case Intrinsic::arm_neon_vld1x4:

177 case Intrinsic::arm_neon_vst1x2:

178 case Intrinsic::arm_neon_vst1x3:

179 case Intrinsic::arm_neon_vst1x4: {

183 Align OldAlign = II.getParamAlign(0).valueOrOne();

184 if (NewAlign > OldAlign)

185 II.addParamAttr(0,

187 break;

188 }

189

190 case Intrinsic::arm_mve_pred_i2v: {

191 Value *Arg = II.getArgOperand(0);

195 II.getType() == ArgArg->getType()) {

197 }

202 II.getType() == ArgArg->getType()) {

204 if (CI->getValue().trunc(16).isAllOnes()) {

209 }

210 }

211 }

214 ScalarKnown)) {

215 return &II;

216 }

217 break;

218 }

219 case Intrinsic::arm_mve_pred_v2i: {

220 Value *Arg = II.getArgOperand(0);

225 }

226

227 if (II.getMetadata(LLVMContext::MD_range))

228 break;

229

231

232 if (auto CurrentRange = II.getRange()) {

233 Range = Range.intersectWith(*CurrentRange);

234 if (Range == CurrentRange)

235 break;

236 }

237

238 II.addRangeRetAttr(Range);

239 II.addRetAttr(Attribute::NoUndef);

240 return &II;

241 }

242 case Intrinsic::arm_mve_vadc:

243 case Intrinsic::arm_mve_vadc_predicated: {

244 unsigned CarryOp =

245 (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;

246 assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&

247 "Bad type for intrinsic!");

248

251 CarryKnown)) {

252 return &II;

253 }

254 break;

255 }

256 case Intrinsic::arm_mve_vmldava: {

258 if (I->hasOneUse()) {

263 Value *OpX = I->getOperand(4);

264 Value *OpY = I->getOperand(5);

266

270 {I->getOperand(0), I->getOperand(1),

271 I->getOperand(2), OpZ, OpX, OpY});

272

275 }

276 }

277 return std::nullopt;

278 }

279 }

280 return std::nullopt;

281}

282

287 SimplifyAndSetOp) const {

288

289

290

291

292 auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {

294 unsigned IsTop = cast(II.getOperand(TopOpc))->getZExtValue();

295

296

297

298 APInt DemandedElts =

301 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);

302

305 return std::nullopt;

306 };

307

308 switch (II.getIntrinsicID()) {

309 default:

310 break;

311 case Intrinsic::arm_mve_vcvt_narrow:

312 SimplifyNarrowInstrTopBottom(2);

313 break;

314 case Intrinsic::arm_mve_vqmovn:

315 SimplifyNarrowInstrTopBottom(4);

316 break;

317 case Intrinsic::arm_mve_vshrn:

318 SimplifyNarrowInstrTopBottom(7);

319 break;

320 }

321

322 return std::nullopt;

323}

324

327 assert(Ty->isIntegerTy());

328

329 unsigned Bits = Ty->getPrimitiveSizeInBits();

330 if (Bits == 0 || Imm.getActiveBits() >= 64)

331 return 4;

332

333 int64_t SImmVal = Imm.getSExtValue();

334 uint64_t ZImmVal = Imm.getZExtValue();

335 if (!ST->isThumb()) {

336 if ((SImmVal >= 0 && SImmVal < 65536) ||

339 return 1;

340 return ST->hasV6T2Ops() ? 2 : 3;

341 }

342 if (ST->isThumb2()) {

343 if ((SImmVal >= 0 && SImmVal < 65536) ||

346 return 1;

347 return ST->hasV6T2Ops() ? 2 : 3;

348 }

349

350 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))

351 return 1;

353 return 2;

354

355 return 3;

356}

357

358

359

361 const APInt &Imm,

362 Type *Ty) const {

363 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)

364 return 0;

365

366 return 1;

367}

368

369

370

371

376

379 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {

380

381 auto isSSatMin = [&](Value *MinInst) {

383 Value *MinLHS, *MinRHS;

389 MinC->getValue() == ((-Imm) - 1))

390 return true;

391 }

392 return false;

393 };

394

400 }

401 return nullptr;

402}

403

404

405

407 if (Imm.getBitWidth() != 64 ||

409 return false;

413 if ()

414 return false;

416}

417

422

423

424

425

426 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||

427 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&

428 Idx == 1)

429 return 0;

430

431

432

433 if (Opcode == Instruction::GetElementPtr && Idx != 0)

434 return 0;

435

436 if (Opcode == Instruction::And) {

437

438 if (Imm == 255 || Imm == 65535)

439 return 0;

440

443 }

444

445 if (Opcode == Instruction::Add)

446

449

450 if (Opcode == Instruction::ICmp && Imm.isNegative() &&

451 Ty->getIntegerBitWidth() == 32) {

452 int64_t NegImm = -Imm.getSExtValue();

453 if (ST->isThumb2() && NegImm < 1<<12)

454

455 return 0;

456 if (ST->isThumb() && NegImm < 1<<8)

457

458 return 0;

459 }

460

461

462 if (Opcode == Instruction::Xor && Imm.isAllOnes())

463 return 0;

464

465

466

467 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&

468 Ty->getIntegerBitWidth() <= 32) {

472 return 0;

473 }

474

476 return 0;

477

478

479 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {

484 }

485

487}

488

493 (ST->hasNEON() || ST->hasMVEIntegerOps())) {

494

495

496

497

498 return 0;

499 }

501}

502

508 int ISD = TLI->InstructionOpcodeToISD(Opcode);

510

511

514 return Cost == 0 ? 0 : 1;

516 };

517 auto IsLegalFPType = [this](EVT VT) {

519 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||

520 (EltVT == MVT::f64 && ST->hasFP64()) ||

521 (EltVT == MVT::f16 && ST->hasFullFP16());

522 };

523

524 EVT SrcTy = TLI->getValueType(DL, Src);

525 EVT DstTy = TLI->getValueType(DL, Dst);

526

527 if (!SrcTy.isSimple() || !DstTy.isSimple())

528 return AdjustCost(

530

531

532

533

534 if ((ST->hasMVEIntegerOps() &&

535 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||

536 Opcode == Instruction::SExt)) ||

537 (ST->hasMVEFloatOps() &&

538 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&

539 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))

542 ST->getMVEVectorCostFactor(CostKind);

543

544

560 };

562 LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))

563 return AdjustCost(Entry->Cost);

564

572

573

574

581 };

582 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {

583 if (const auto *Entry =

585 DstTy.getSimpleVT(), SrcTy.getSimpleVT()))

586 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);

587 }

588

590

591 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},

592 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},

593 };

594 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {

595 if (const auto *Entry =

597 DstTy.getSimpleVT(), SrcTy.getSimpleVT()))

598 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);

599 }

600

601

610 };

611 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {

612 if (const auto *Entry =

614 SrcTy.getSimpleVT(), DstTy.getSimpleVT()))

615 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);

616 }

617

621 };

622 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {

623 if (const auto *Entry =

625 SrcTy.getSimpleVT(), DstTy.getSimpleVT()))

626 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);

627 }

628 }

629

630

632 I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {

634

635 { ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },

636 { ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },

637

638 { ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },

639 { ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },

640

641 { ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },

642 { ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },

643

644 { ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },

645 { ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },

646 };

647

649 int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode());

652 SrcTy.getSimpleVT())) {

653 return AdjustCost(Entry->Cost);

654 }

655 }

656

657

658 if (Src->isVectorTy() && ST->hasNEON() &&

659 ((ISD == ISD::FP_ROUND && SrcTy.getScalarType() == MVT::f64 &&

661 (ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&

663 static const CostTblEntry NEONFltDblTbl[] = {

664

666 {ISD::FP_EXTEND, MVT::v2f32, 2},

667 {ISD::FP_EXTEND, MVT::v4f32, 4}};

668

670 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))

671 return AdjustCost(LT.first * Entry->Cost);

672 }

673

674

675

676

684

685

704

705

708

709

712

733

740

741

744

751

758 };

759

760 if (SrcTy.isVector() && ST->hasNEON()) {

763 SrcTy.getSimpleVT()))

764 return AdjustCost(Entry->Cost);

765 }

766

767

789 };

790 if (SrcTy.isFloatingPoint() && ST->hasNEON()) {

793 SrcTy.getSimpleVT()))

794 return AdjustCost(Entry->Cost);

795 }

796

797

819 };

820

821 if (SrcTy.isInteger() && ST->hasNEON()) {

824 SrcTy.getSimpleVT()))

825 return AdjustCost(Entry->Cost);

826 }

827

828

829

830

844 };

845

846 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {

849 SrcTy.getSimpleVT()))

850 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);

851 }

852

854

855

856

859 int Lanes = 1;

860 if (SrcTy.isFixedLengthVector())

861 Lanes = SrcTy.getVectorNumElements();

862

863 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))

864 return Lanes;

865 else

866 return Lanes * CallCost;

867 }

868

870 SrcTy.isFixedLengthVector()) {

871

872

873 if ((SrcTy.getScalarType() == MVT::i8 ||

874 SrcTy.getScalarType() == MVT::i16 ||

875 SrcTy.getScalarType() == MVT::i32) &&

876 SrcTy.getSizeInBits() > 128 &&

878 return SrcTy.getVectorNumElements() * 2;

879 }

880

881

883

885

886

891 };

892

893 if (SrcTy.isInteger()) {

896 SrcTy.getSimpleVT()))

897 return AdjustCost(Entry->Cost);

898 }

899

900 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()

901 ? ST->getMVEVectorCostFactor(CostKind)

902 : 1;

903 return AdjustCost(

905}

906

909 unsigned Index, const Value *Op0,

910 const Value *Op1) const {

911

912

913 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&

914 ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)

915 return 3;

916

917 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||

918 Opcode == Instruction::ExtractElement)) {

919

920

921 if (cast(ValTy)->getElementType()->isIntegerTy())

922 return 3;

923

924

925

926 if (ValTy->isVectorTy() &&

927 ValTy->getScalarSizeInBits() <= 32)

928 return std::max(

930 2U);

931 }

932

933 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||

934 Opcode == Instruction::ExtractElement)) {

935

936

937

938 std::pair<InstructionCost, MVT> LT =

940 return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);

941 }

942

944}

945

950 int ISD = TLI->InstructionOpcodeToISD(Opcode);

951

952

954 ST->isThumb() && !ValTy->isVectorTy()) {

955

956 if (TLI->getValueType(DL, ValTy, true) == MVT::Other)

958

959

960

961

962

964

965

967

968

969

970 if (ValTy->isIntegerTy(1))

972

974 }

975

976

977

978

980 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&

983 if (Sel && ValTy->isVectorTy() &&

984 (ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) {

985 const Value *LHS, *RHS;

987 unsigned IID = 0;

988 switch (SPF) {

990 IID = Intrinsic::abs;

991 break;

993 IID = Intrinsic::smin;

994 break;

996 IID = Intrinsic::smax;

997 break;

999 IID = Intrinsic::umin;

1000 break;

1002 IID = Intrinsic::umax;

1003 break;

1005 IID = Intrinsic::minnum;

1006 break;

1008 IID = Intrinsic::maxnum;

1009 break;

1010 default:

1011 break;

1012 }

1013 if (IID) {

1014

1015 if (Sel != I)

1016 return 0;

1019 }

1020 }

1021

1022

1023 if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {

1024

1026 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },

1027 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },

1028 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }

1029 };

1030

1031 EVT SelCondTy = TLI->getValueType(DL, CondTy);

1032 EVT SelValTy = TLI->getValueType(DL, ValTy);

1037 return Entry->Cost;

1038 }

1039

1041 return LT.first;

1042 }

1043

1044 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&

1045 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&

1049 if (!VecCondTy)

1051

1052

1053 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {

1054

1055

1057 true, CostKind) +

1059 false, CostKind) +

1064 }

1065

1067 int BaseCost = ST->getMVEVectorCostFactor(CostKind);

1068

1069

1070

1071

1072

1073 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {

1074 if (LT.first > 1)

1075 return LT.first * BaseCost +

1077 false, CostKind);

1078 return BaseCost;

1079 }

1080 }

1081

1082

1083

1084 int BaseCost = 1;

1085 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())

1086 BaseCost = ST->getMVEVectorCostFactor(CostKind);

1087

1090}

1091

1094 const SCEV *Ptr,

1096

1097

1098

1099

1100 unsigned NumVectorInstToHideOverhead = 10;

1101 int MaxMergeDistance = 64;

1102

1103 if (ST->hasNEON()) {

1106 return NumVectorInstToHideOverhead;

1107

1108

1109

1110 return 1;

1111 }

1113}

1114

1117

1118

1119 switch (II->getIntrinsicID()) {

1120 case Intrinsic::arm_mve_vctp8:

1121 case Intrinsic::arm_mve_vctp16:

1122 case Intrinsic::arm_mve_vctp32:

1123 case Intrinsic::arm_mve_vctp64:

1124 return true;

1125 default:

1126 break;

1127 }

1128 }

1129 return false;

1130}

1131

1133 unsigned ,

1136 return false;

1137

1139

1140 if (VecTy->getNumElements() == 2)

1141 return false;

1142

1143

1145 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())

1146 return false;

1147 }

1148

1150 return (EltWidth == 32 && Alignment >= 4) ||

1151 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);

1152}

1153

1156 return false;

1157

1158 unsigned EltWidth = Ty->getScalarSizeInBits();

1159 return ((EltWidth == 32 && Alignment >= 4) ||

1160 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);

1161}

1162

1163

1164

1165

1168 unsigned DstAddrSpace = ~0u;

1169 unsigned SrcAddrSpace = ~0u;

1170 const Function *F = I->getParent()->getParent();

1171

1174

1175 if ()

1176 return -1;

1177

1178 const unsigned Size = C->getValue().getZExtValue();

1179 const Align DstAlign = MC->getDestAlign().valueOrOne();

1180 const Align SrcAlign = MC->getSourceAlign().valueOrOne();

1181

1182 MOp = MemOp::Copy(Size, false, DstAlign, SrcAlign,

1183 false);

1184 DstAddrSpace = MC->getDestAddressSpace();

1185 SrcAddrSpace = MC->getSourceAddressSpace();

1186 }

1189

1190 if ()

1191 return -1;

1192

1193 const unsigned Size = C->getValue().getZExtValue();

1194 const Align DstAlign = MS->getDestAlign().valueOrOne();

1195

1196 MOp = MemOp::Set(Size, false, DstAlign,

1197 false, false);

1198 DstAddrSpace = MS->getDestAddressSpace();

1199 }

1200 else

1202

1203 unsigned Limit, Factor = 2;

1204 switch(I->getIntrinsicID()) {

1205 case Intrinsic::memcpy:

1206 Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());

1207 break;

1208 case Intrinsic::memmove:

1209 Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());

1210 break;

1211 case Intrinsic::memset:

1212 Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());

1213 Factor = 1;

1214 break;

1215 default:

1217 }

1218

1219

1220

1221

1222 std::vector MemOps;

1224 if (getTLI()->findOptimalMemOpLowering(C, MemOps, Limit, MOp, DstAddrSpace,

1225 SrcAddrSpace, F->getAttributes()))

1226 return MemOps.size() * Factor;

1227

1228

1229 return -1;

1230}

1231

1234

1235

1236

1238 return 4;

1240}

1241

1251 "Expected the Mask to match the return size if given");

1253 "Expected the same scalar types");

1254

1256

1258 if (IsExtractSubvector)

1260 if (ST->hasNEON()) {

1263

1270

1275

1277 if (const auto *Entry =

1279 return LT.first * Entry->Cost;

1280 }

1282 static const CostTblEntry NEONShuffleTbl[] = {

1283

1284

1291

1296

1298 if (const auto *Entry =

1300 return LT.first * Entry->Cost;

1301 }

1303 static const CostTblEntry NEONSelShuffleTbl[] = {

1304

1305

1306

1307

1312

1316

1318

1320

1322 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,

1324 return LT.first * Entry->Cost;

1325 }

1326 }

1327 if (ST->hasMVEIntegerOps()) {

1330

1336

1339 LT.second))

1340 return LT.first * Entry->Cost * ST->getMVEVectorCostFactor(CostKind);

1341 }

1342

1343 if (!Mask.empty()) {

1345

1346

1347

1348

1349 if (Args.size() >= 1 && isa(Args[0]) &&

1350 (LT.second.getScalarSizeInBits() == 8 ||

1351 LT.second.getScalarSizeInBits() == 16 ||

1352 LT.second.getScalarSizeInBits() == 32) &&

1353 LT.second.getSizeInBits() == 128 &&

1354 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&

1356 (TLI->getMaxSupportedInterleaveFactor() == 4 &&

1358 return ST->getMVEVectorCostFactor(CostKind) *

1359 std::max(1, LT.first / 4);

1360

1361

1362

1363

1364

1366 (LT.second.getScalarSizeInBits() == 8 ||

1367 LT.second.getScalarSizeInBits() == 16 ||

1368 LT.second.getScalarSizeInBits() == 32) &&

1369 LT.second.getSizeInBits() == 128 &&

1370 ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&

1372 Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||

1373 (TLI->getMaxSupportedInterleaveFactor() == 4 &&

1375 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))

1376 return ST->getMVEVectorCostFactor(CostKind) * LT.first;

1377

1378 if (LT.second.isVector() &&

1379 Mask.size() <= LT.second.getVectorNumElements() &&

1382 return ST->getMVEVectorCostFactor(CostKind) * LT.first;

1383 }

1384 }

1385

1386

1387 if (IsExtractSubvector)

1389 int BaseCost = ST->hasMVEIntegerOps() && SrcTy->isVectorTy()

1390 ? ST->getMVEVectorCostFactor(CostKind)

1391 : 1;

1393 Index, SubTp);

1394}

1395

1400 int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);

1402

1403

1404

1405 switch (ISDOpcode) {

1406 default:

1407 break;

1410 return 2;

1412 return 3;

1413 }

1414 }

1415

1417

1418 if (ST->hasNEON()) {

1419 const unsigned FunctionCallDivCost = 20;

1420 const unsigned ReciprocalDivCost = 10;

1422

1423

1424

1425

1426 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},

1427 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},

1428 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},

1429 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},

1430 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},

1431 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},

1432 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},

1433 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},

1434 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},

1435 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},

1436 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},

1437 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},

1438 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},

1439 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},

1440 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},

1441 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},

1442

1443 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},

1444 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},

1445 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},

1446 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},

1447 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},

1448 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},

1449 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},

1450 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},

1451 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},

1452 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},

1453 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},

1454 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},

1455 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},

1456 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},

1457 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},

1458 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},

1459

1460 };

1461

1462 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))

1463 return LT.first * Entry->Cost;

1464

1466 Opcode, Ty, CostKind, Op1Info, Op2Info);

1467

1468

1469

1470

1471

1472

1473

1474

1475 if (LT.second == MVT::v2i64 && Op2Info.isUniform() && Op2Info.isConstant())

1477

1478 return Cost;

1479 }

1480

1481

1482

1483 auto LooksLikeAFreeShift = [&]() {

1484 if (ST->isThumb1Only() || Ty->isVectorTy())

1485 return false;

1486

1488 return false;

1490 return false;

1491

1492

1494 case Instruction::Add:

1495 case Instruction::Sub:

1496 case Instruction::And:

1497 case Instruction::Xor:

1498 case Instruction::Or:

1499 case Instruction::ICmp:

1500 return true;

1501 default:

1502 return false;

1503 }

1504 };

1505 if (LooksLikeAFreeShift())

1506 return 0;

1507

1508

1509

1510

1511

1512

1513

1514

1515 auto MulInDSPMLALPattern = [&](const Instruction *I, unsigned Opcode,

1516 Type *Ty) -> bool {

1517 if (!ST->hasDSP())

1518 return false;

1519

1520 if ()

1521 return false;

1522

1523 if (Opcode != Instruction::Mul)

1524 return false;

1525

1526 if (Ty->isVectorTy())

1527 return false;

1528

1529 auto ValueOpcodesEqual = [](const Value *LHS, const Value *RHS) -> bool {

1532 };

1533 auto IsExtInst = [](const Value *V) -> bool {

1535 };

1536 auto IsExtensionFromHalf = [](const Value *V) -> bool {

1537 return cast(V)->getOperand(0)->getType()->isIntegerTy(16);

1538 };

1539

1540

1542 if (!BinOp)

1543 return false;

1544 Value *Op0 = BinOp->getOperand(0);

1545 Value *Op1 = BinOp->getOperand(1);

1546 if (IsExtInst(Op0) && IsExtInst(Op1) && ValueOpcodesEqual(Op0, Op1)) {

1547

1548 if (->getType()->isIntegerTy(32) || !IsExtensionFromHalf(Op0) ||

1549 !IsExtensionFromHalf(Op1))

1550 return false;

1551

1552

1553 for (auto *U : I->users())

1554 if (!IsExtInst(U))

1555 return false;

1556 return true;

1557 }

1558

1559 return false;

1560 };

1561

1562 if (MulInDSPMLALPattern(CxtI, Opcode, Ty))

1563 return 0;

1564

1565

1566

1567 int BaseCost = 1;

1568 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())

1569 BaseCost = ST->getMVEVectorCostFactor(CostKind);

1570

1571

1572

1573

1574

1575 if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))

1576 return LT.first * BaseCost;

1577

1578

1580 unsigned Num = VTy->getNumElements();

1583

1584

1588 }

1589

1590 return BaseCost;

1591}

1592

1594 Align Alignment,

1599

1601 return 1;

1602

1603

1604 if (TLI->getValueType(DL, Src, true) == MVT::Other)

1607

1608 if (ST->hasNEON() && Src->isVectorTy() && Alignment != Align(16) &&

1610

1611

1613 return LT.first * 4;

1614 }

1615

1616

1617

1619 ((Opcode == Instruction::Load && I->hasOneUse() &&

1621 (Opcode == Instruction::Store && isa(I->getOperand(0))))) {

1623 Type *DstTy =

1624 Opcode == Instruction::Load

1625 ? (*I->user_begin())->getType()

1629 return ST->getMVEVectorCostFactor(CostKind);

1630 }

1631

1632 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()

1633 ? ST->getMVEVectorCostFactor(CostKind)

1634 : 1;

1637}

1638

1642 switch (MICA.getID()) {

1643 case Intrinsic::masked_scatter:

1644 case Intrinsic::masked_gather:

1646 case Intrinsic::masked_load:

1647 case Intrinsic::masked_store:

1649 }

1651}

1652

1656 unsigned IID = MICA.getID();

1660 if (ST->hasMVEIntegerOps()) {

1661 if (IID == Intrinsic::masked_load &&

1663 return ST->getMVEVectorCostFactor(CostKind);

1664 if (IID == Intrinsic::masked_store &&

1666 return ST->getMVEVectorCostFactor(CostKind);

1667 }

1670

1671

1673}

1674

1678 bool UseMaskForCond, bool UseMaskForGaps) const {

1679 assert(Factor >= 2 && "Invalid interleave factor");

1681

1682

1683 bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;

1684

1685 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&

1686 !UseMaskForCond && !UseMaskForGaps) {

1688 auto *SubVecTy =

1690

1691

1692

1693

1694 int BaseCost =

1695 ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(CostKind) : 1;

1696 if (NumElts % Factor == 0 &&

1697 TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))

1698 return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, DL);

1699

1700

1701

1702

1703

1704

1705 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&

1707 DL.getTypeSizeInBits(SubVecTy).getFixedValue() <= 64)

1708 return 2 * BaseCost;

1709 }

1710

1713 UseMaskForCond, UseMaskForGaps);

1714}

1715

1719

1725

1729

1730 assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");

1732

1733

1734

1735 unsigned NumElems = VTy->getNumElements();

1736 unsigned EltSize = VTy->getScalarSizeInBits();

1738

1739

1740

1741

1742

1743

1745 NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);

1746

1747

1748

1749

1751 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +

1756

1757 if (EltSize < 8 || Alignment < EltSize / 8)

1758 return ScalarCost;

1759

1760 unsigned ExtSize = EltSize;

1761

1762 if (I != nullptr) {

1763

1764

1765

1766 if ((I->getOpcode() == Instruction::Load ||

1768 I->hasOneUse()) {

1769 const User *Us = *I->users().begin();

1771

1774 if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||

1775 (TypeSize == 16 && EltSize == 8)) &&

1776 TypeSize * NumElems == 128) {

1778 }

1779 }

1780 }

1781

1783 if ((I->getOpcode() == Instruction::Store ||

1786

1787 unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();

1788 if (((EltSize == 16 && TypeSize == 32) ||

1792 }

1793 }

1794

1795 if (ExtSize * NumElems != 128 || NumElems < 4)

1796 return ScalarCost;

1797

1798

1799 if (ExtSize == 32)

1800 return VectorCost;

1801

1802

1803

1804 if (ExtSize != 8 && ExtSize != 16)

1805 return ScalarCost;

1806

1808 Ptr = BC->getOperand(0);

1810 if (GEP->getNumOperands() != 2)

1811 return ScalarCost;

1812 unsigned Scale = DL.getTypeAllocSize(GEP->getResultElementType());

1813

1814 if (Scale != 1 && Scale * 8 != ExtSize)

1815 return ScalarCost;

1816

1818 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)

1819 return VectorCost;

1820 }

1821 return ScalarCost;

1822 }

1823 return ScalarCost;

1824}

1825

1828 std::optional FMF,

1830

1831 EVT ValVT = TLI->getValueType(DL, ValTy);

1832 int ISD = TLI->InstructionOpcodeToISD(Opcode);

1834

1835

1836

1837

1839 ((EltSize == 32 && ST->hasVFP2Base()) ||

1840 (EltSize == 64 && ST->hasFP64()) ||

1841 (EltSize == 16 && ST->hasFullFP16()))) {

1843 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);

1846 NumElts * EltSize > VecLimit) {

1849 NumElts /= 2;

1850 }

1851

1852

1853

1857 VecCost += ST->getMVEVectorCostFactor(CostKind) * 2;

1858 NumElts /= 2;

1860 ExtractCost = NumElts / 2;

1861

1862 return VecCost + ExtractCost +

1863 NumElts *

1865 }

1866

1868 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {

1870 unsigned VecLimit =

1871 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);

1873 while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {

1876 NumElts /= 2;

1877 }

1878

1879

1881 NumElts * EltSize == 64) {

1883 VecCost += ST->getMVEVectorCostFactor(CostKind) +

1885 NumElts /= 2;

1886 }

1887

1888

1890 return VecCost + ExtractCost +

1892 Opcode, ValTy->getElementType(), CostKind);

1893 }

1894

1898

1900

1905 };

1906 if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))

1907 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first;

1908

1910}

1911

1913 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,

1915 EVT ValVT = TLI->getValueType(DL, ValTy);

1916 EVT ResVT = TLI->getValueType(DL, ResTy);

1917

1918 int ISD = TLI->InstructionOpcodeToISD(Opcode);

1919

1920 switch (ISD) {

1922 if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {

1924

1925

1926

1927

1928

1929

1930

1933 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||

1934 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||

1935 (LT.second == MVT::v4i32 && RevVTSize <= 64)))

1936 return ST->getMVEVectorCostFactor(CostKind) * LT.first;

1937 }

1938 break;

1939 default:

1940 break;

1941 }

1944}

1945

1950 if (RedOpcode != Instruction::Add)

1952 EVT ValVT = TLI->getValueType(DL, ValTy);

1953 EVT ResVT = TLI->getValueType(DL, ResTy);

1954

1955 if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {

1957

1958

1959

1960

1961

1962

1963

1966 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||

1967 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||

1968 (LT.second == MVT::v4i32 && RevVTSize <= 64)))

1969 return ST->getMVEVectorCostFactor(CostKind) * LT.first;

1970 }

1971

1974}

1975

1980 EVT ValVT = TLI->getValueType(DL, Ty);

1981

1982

1983

1984

1985 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&

1991 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);

1993 while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {

1997 NumElts /= 2;

1998 }

1999

2000

2001

2004 NumElts == 8) {

2005 VecCost += ST->getMVEVectorCostFactor(CostKind) * 2;

2006 NumElts /= 2;

2009

2011 {Ty->getElementType(), Ty->getElementType()},

2012 FMF);

2013 return VecCost + ExtractCost +

2015 }

2016

2017 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||

2018 IID == Intrinsic::umin || IID == Intrinsic::umax) {

2020

2021

2022

2023

2028 };

2030 return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first;

2031 }

2032

2034}

2035

2040 switch (Opc) {

2041 case Intrinsic::get_active_lane_mask:

2042

2043

2044

2045

2046

2047

2048

2049 if (ST->hasMVEIntegerOps())

2050 return 0;

2051 break;

2052 case Intrinsic::sadd_sat:

2053 case Intrinsic::ssub_sat:

2054 case Intrinsic::uadd_sat:

2055 case Intrinsic::usub_sat: {

2056 bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);

2057 bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);

2059

2061 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)

2062 return 1;

2063 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))

2064 return 2;

2065

2066

2075 }

2076

2077 if (!ST->hasMVEIntegerOps())

2078 break;

2079

2081 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||

2082 LT.second == MVT::v16i8) {

2083

2084

2085 unsigned Instrs =

2087 : 4;

2088 return LT.first * ST->getMVEVectorCostFactor(CostKind) * Instrs;

2089 }

2090 break;

2091 }

2092 case Intrinsic::abs:

2093 case Intrinsic::smin:

2094 case Intrinsic::smax:

2095 case Intrinsic::umin:

2096 case Intrinsic::umax: {

2097 if (!ST->hasMVEIntegerOps())

2098 break;

2100

2102 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||

2103 LT.second == MVT::v16i8)

2104 return LT.first * ST->getMVEVectorCostFactor(CostKind);

2105 break;

2106 }

2107 case Intrinsic::minnum:

2108 case Intrinsic::maxnum: {

2109 if (!ST->hasMVEFloatOps())

2110 break;

2113 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)

2114 return LT.first * ST->getMVEVectorCostFactor(CostKind);

2115 break;

2116 }

2117 case Intrinsic::fptosi_sat:

2118 case Intrinsic::fptoui_sat: {

2120 break;

2121 bool IsSigned = Opc == Intrinsic::fptosi_sat;

2124

2125 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||

2126 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||

2127 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))

2128 return LT.first;

2129

2130

2131 if (ST->hasMVEFloatOps() &&

2132 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&

2134 return LT.first * ST->getMVEVectorCostFactor(CostKind);

2135

2136

2137 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||

2138 (ST->hasFP64() && LT.second == MVT::f64) ||

2139 (ST->hasFullFP16() && LT.second == MVT::f16) ||

2140 (ST->hasMVEFloatOps() &&

2141 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&

2144 LT.second.getScalarSizeInBits());

2146 LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1;

2148 : Intrinsic::umin,

2149 LegalTy, {LegalTy, LegalTy});

2152 : Intrinsic::umax,

2153 LegalTy, {LegalTy, LegalTy});

2155 return LT.first * Cost;

2156 }

2157

2158

2166 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,

2168 if (IsSigned) {

2174 }

2175 return Cost;

2176 }

2177 }

2178

2180}

2181

2183 if (->isIntrinsic())

2185

2186

2187 if (F->getName().starts_with("llvm.arm"))

2188 return false;

2189

2190 switch (F->getIntrinsicID()) {

2191 default: break;

2192 case Intrinsic::powi:

2193 case Intrinsic::sin:

2194 case Intrinsic::cos:

2195 case Intrinsic::sincos:

2196 case Intrinsic::pow:

2197 case Intrinsic:🪵

2198 case Intrinsic::log10:

2199 case Intrinsic::log2:

2200 case Intrinsic::exp:

2201 case Intrinsic::exp2:

2202 return true;

2203 case Intrinsic::sqrt:

2204 case Intrinsic::fabs:

2205 case Intrinsic::copysign:

2206 case Intrinsic:🤣

2207 case Intrinsic::ceil:

2208 case Intrinsic::trunc:

2209 case Intrinsic::rint:

2210 case Intrinsic::nearbyint:

2211 case Intrinsic::round:

2212 case Intrinsic::canonicalize:

2213 case Intrinsic::lround:

2214 case Intrinsic::llround:

2215 case Intrinsic::lrint:

2216 case Intrinsic::llrint:

2217 if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())

2218 return true;

2219 if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())

2220 return true;

2221

2222

2223

2224 return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();

2225 case Intrinsic::masked_store:

2226 case Intrinsic::masked_load:

2227 case Intrinsic::masked_gather:

2228 case Intrinsic::masked_scatter:

2229 return !ST->hasMVEIntegerOps();

2230 case Intrinsic::sadd_with_overflow:

2231 case Intrinsic::uadd_with_overflow:

2232 case Intrinsic::ssub_with_overflow:

2233 case Intrinsic::usub_with_overflow:

2234 case Intrinsic::sadd_sat:

2235 case Intrinsic::uadd_sat:

2236 case Intrinsic::ssub_sat:

2237 case Intrinsic::usub_sat:

2238 return false;

2239 }

2240

2242}

2243

2245 unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());

2246 EVT VT = TLI->getValueType(DL, I.getType(), true);

2248 return true;

2249

2250

2251

2254 switch(II->getIntrinsicID()) {

2255 case Intrinsic::memcpy:

2256 case Intrinsic::memset:

2257 case Intrinsic::memmove:

2259 default:

2260 if (const Function *F = Call->getCalledFunction())

2262 }

2263 }

2264 return true;

2265 }

2266

2267

2268

2269 switch (I.getOpcode()) {

2270 default:

2271 break;

2272 case Instruction::FPToSI:

2273 case Instruction::FPToUI:

2274 case Instruction::SIToFP:

2275 case Instruction::UIToFP:

2276 case Instruction::FPTrunc:

2277 case Instruction::FPExt:

2278 return !ST->hasFPARMv8Base();

2279 }

2280

2281

2282

2283

2284

2285

2286

2288 switch (ISD) {

2289 default:

2290 break;

2297 return true;

2298 }

2299 }

2300

2301

2303 return false;

2304

2305

2306 if (TLI->useSoftFloat()) {

2307 switch (I.getOpcode()) {

2308 default:

2309 return true;

2310 case Instruction::Alloca:

2311 case Instruction::Load:

2312 case Instruction::Store:

2313 case Instruction::Select:

2314 case Instruction::PHI:

2315 return false;

2316 }

2317 }

2318

2319

2320

2321 if (I.getType()->isDoubleTy() && !ST->hasFP64())

2322 return true;

2323

2324

2325 if (I.getType()->isHalfTy() && !ST->hasFullFP16())

2326 return true;

2327

2328 return false;

2329}

2330

2335

2336

2339 return false;

2340 }

2341

2344 return false;

2345 }

2346

2349 LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n");

2350 return false;

2351 }

2352

2353 const SCEV *TripCountSCEV =

2356

2357

2359 LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n");

2360 return false;

2361 }

2362

2363

2364

2365

2366 auto IsHardwareLoopIntrinsic = [](Instruction &I) {

2368 switch (Call->getIntrinsicID()) {

2369 default:

2370 break;

2371 case Intrinsic::start_loop_iterations:

2372 case Intrinsic::test_start_loop_iterations:

2373 case Intrinsic::loop_decrement:

2374 case Intrinsic::loop_decrement_reg:

2375 return true;

2376 }

2377 }

2378 return false;

2379 };

2380

2381

2382

2383

2384 bool IsTailPredLoop = false;

2385 auto ScanLoop = [&](Loop *L) {

2386 for (auto *BB : L->getBlocks()) {

2387 for (auto &I : *BB) {

2390 LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");

2391 return false;

2392 }

2394 IsTailPredLoop |=

2395 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||

2396 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||

2397 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||

2398 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||

2399 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;

2400 }

2401 }

2402 return true;

2403 };

2404

2405

2406 for (auto *Inner : *L)

2407 if (!ScanLoop(Inner))

2408 return false;

2409

2410 if (!ScanLoop(L))

2411 return false;

2412

2413

2414

2415

2416

2417 LLVMContext &C = L->getHeader()->getContext();

2423 return true;

2424}

2425

2427

2428

2430 return false;

2431

2432

2433

2434

2435

2436

2437

2439 if ((II->getIntrinsicID() == Intrinsic::smin ||

2440 II->getIntrinsicID() == Intrinsic::smax ||

2441 II->getIntrinsicID() == Intrinsic::umin ||

2442 II->getIntrinsicID() == Intrinsic::umax) &&

2443 ++ICmpCount > 1)

2444 return false;

2445

2447 return false;

2448

2449

2450

2452 return false;

2453

2454

2456 if (.getOperand(0)->hasOneUse() || isa<LoadInst> (I.getOperand(0)))

2457 return false;

2458

2459

2462 return false;

2463

2464 return true;

2465}

2466

2467

2468

2469

2470

2471

2472

2473

2474

2475

2476

2481 LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");

2482

2483

2484

2485

2486

2487

2488

2489

2492 bool ReductionsDisabled =

2495

2496 for (auto *I : LiveOuts) {

2497 if (->getType()->isIntegerTy() && ->getType()->isFloatTy() &&

2498 ->getType()->isHalfTy()) {

2499 LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "

2500 "live-out value\n");

2501 return false;

2502 }

2503 if (ReductionsDisabled) {

2505 return false;

2506 }

2507 }

2508

2509

2511 int ICmpCount = 0;

2512

2513 for (BasicBlock *BB : L->blocks()) {

2514 for (Instruction &I : BB->instructionsWithoutDebug()) {

2516 continue;

2518 LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump());

2519 return false;

2520 }

2521

2522 Type *T = I.getType();

2523 if (T->getScalarSizeInBits() > 32) {

2525 return false;

2526 }

2530 int64_t NextStride =

2531 getPtrStride(PSE, AccessTy, Ptr, L, DT).value_or(0);

2532 if (NextStride == 1) {

2533

2534

2535

2536 continue;

2537 } else if (NextStride == -1 ||

2541 << "Consecutive strides of 2 found, vld2/vstr2 can't "

2542 "be tail-predicated\n.");

2543 return false;

2544

2546

2547

2548

2549

2552 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());

2554 continue;

2555 }

2556 }

2558 "tail-predicate\n.");

2559 return false;

2560 }

2561 }

2562 }

2563

2564 LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");

2565 return true;

2566}

2567

2570 LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");

2571 return false;

2572 }

2573

2574

2575

2576

2577 if (!ST->hasMVEIntegerOps())

2578 return false;

2579

2582

2583

2584 if (L->getNumBlocks() > 1) {

2585 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "

2586 "loop.\n");

2587 return false;

2588 }

2589

2590 assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");

2591

2595 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "

2596 "analyzable.\n");

2597 return false;

2598 }

2599

2602

2603

2604

2606 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "

2607 "profitable.\n");

2608 return false;

2609 }

2610

2613 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "

2614 "a candidate.\n");

2615 return false;

2616 }

2617

2620}

2621

2626

2627

2628

2629

2630

2632}

2636

2637

2638

2640 !ST->hasMVEIntegerOps() || $any\_of$ (*L->getHeader(), [](Instruction &I) {

2641 return isa(I) &&

2642 cast(I).getIntrinsicID() ==

2643 Intrinsic::get_active_lane_mask;

2644 });

2645

2646

2647 if (!ST->isMClass())

2649

2650

2653 if (L->getHeader()->getParent()->hasOptSize())

2654 return;

2655

2657 L->getExitingBlocks(ExitingBlocks);

2659 << "Blocks: " << L->getNumBlocks() << "\n"

2660 << "Exit blocks: " << ExitingBlocks.size() << "\n");

2661

2662

2663

2664 if (ExitingBlocks.size() > 2)

2665 return;

2666

2667

2668

2669 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)

2670 return;

2671

2672

2674 return;

2675

2676

2677

2679 for (auto *BB : L->getBlocks()) {

2680 for (auto &I : *BB) {

2681

2682

2683 if (I.getType()->isVectorTy())

2684 return;

2685

2689 continue;

2690 }

2691 return;

2692 }

2693

2697 }

2698 }

2699

2700

2701

2702

2703

2704

2705

2707 if (ST->isThumb1Only()) {

2708 unsigned ExitingValues = 0;

2710 L->getExitBlocks(ExitBlocks);

2711 for (auto *Exit : ExitBlocks) {

2712

2713

2714 unsigned LiveOuts = count_if(Exit->phis(), [](auto &PH) {

2715 return PH.getNumOperands() != 1 ||

2716 !isa(PH.getOperand(0));

2717 });

2718 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;

2719 }

2720 if (ExitingValues)

2723 return;

2724 }

2725

2726

2727

2728

2729

2730

2731

2732

2733

2734

2736 if (ST->hasLOB()) {

2739 auto *Outer = L->getOutermostLoop();

2740 if ((L != Outer && Outer != L->getParentLoop()) ||

2741 (L != Outer && BETC && !SE.isLoopInvariant(BETC, Outer))) {

2743 }

2744 }

2745 }

2746

2749

2756

2757

2758

2760 UP.Force = true;

2761}

2762

2767

2769 if (!ST->hasMVEIntegerOps())

2770 return false;

2771

2772 unsigned ScalarBits = Ty->getScalarSizeInBits();

2773 switch (Kind) {

2775 return ScalarBits <= 64;

2776 default:

2777 return false;

2778 }

2779}

2780

2782 if (!ST->hasMVEIntegerOps())

2783 return false;

2784 return true;

2785}

2786

2789 bool HasBaseReg, int64_t Scale,

2790 unsigned AddrSpace) const {

2795 AM.Scale = Scale;

2798 if (ST->hasFPAO())

2799 return AM.Scale < 0 ? 1 : 0;

2800 return 0;

2801 }

2803}

2804

2806 if (Thumb) {

2807

2808

2809

2810 return ST->isThumb2() || ST->hasV8MBaselineOps();

2811 } else {

2812

2813

2814 return ST->hasARMOps();

2815 }

2816}

2817

2818

2819

2822

2823 auto areExtDoubled = [](Instruction *Ext) {

2824 return Ext->getType()->getScalarSizeInBits() ==

2825 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();

2826 };

2827

2832 return false;

2833

2834 return true;

2835}

2836

2837

2838

2839

2843

2844 if (->getType()->isVectorTy())

2845 return false;

2846

2847 if (ST->hasNEON()) {

2848 switch (I->getOpcode()) {

2849 case Instruction::Sub:

2850 case Instruction::Add: {

2852 return false;

2853 Ops.push_back(&I->getOperandUse(0));

2854 Ops.push_back(&I->getOperandUse(1));

2855 return true;

2856 }

2857 default:

2858 return false;

2859 }

2860 }

2861

2862 if (!ST->hasMVEIntegerOps())

2863 return false;

2864

2866 if (->hasOneUse())

2867 return false;

2869 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;

2870 };

2874 return true;

2875 return false;

2876 };

2877

2878 auto IsSinker = [&](Instruction *I, int Operand) {

2879 switch (I->getOpcode()) {

2880 case Instruction::Add:

2881 case Instruction::Mul:

2882 case Instruction::FAdd:

2883 case Instruction::ICmp:

2884 case Instruction::FCmp:

2885 return true;

2886 case Instruction::FMul:

2887 return !IsFMSMul(I);

2888 case Instruction::Sub:

2889 case Instruction::FSub:

2890 case Instruction::Shl:

2891 case Instruction::LShr:

2892 case Instruction::AShr:

2893 return Operand == 1;

2894 case Instruction::Call:

2896 switch (II->getIntrinsicID()) {

2897 case Intrinsic::fma:

2898 return !IsFMS(I);

2899 case Intrinsic::sadd_sat:

2900 case Intrinsic::uadd_sat:

2901 case Intrinsic::arm_mve_add_predicated:

2902 case Intrinsic::arm_mve_mul_predicated:

2903 case Intrinsic::arm_mve_qadd_predicated:

2904 case Intrinsic::arm_mve_vhadd:

2905 case Intrinsic::arm_mve_hadd_predicated:

2906 case Intrinsic::arm_mve_vqdmull:

2907 case Intrinsic::arm_mve_vqdmull_predicated:

2908 case Intrinsic::arm_mve_vqdmulh:

2909 case Intrinsic::arm_mve_qdmulh_predicated:

2910 case Intrinsic::arm_mve_vqrdmulh:

2911 case Intrinsic::arm_mve_qrdmulh_predicated:

2912 case Intrinsic::arm_mve_fma_predicated:

2913 return true;

2914 case Intrinsic::ssub_sat:

2915 case Intrinsic::usub_sat:

2916 case Intrinsic::arm_mve_sub_predicated:

2917 case Intrinsic::arm_mve_qsub_predicated:

2918 case Intrinsic::arm_mve_hsub_predicated:

2919 case Intrinsic::arm_mve_vhsub:

2920 return Operand == 1;

2921 default:

2922 return false;

2923 }

2924 }

2925 return false;

2926 default:

2927 return false;

2928 }

2929 };

2930

2933

2934 if ( || any_of(Ops, [&](Use *U) { return U->get() == Op; }))

2935 continue;

2936

2938 if (Shuffle->getOpcode() == Instruction::BitCast)

2940

2944 continue;

2945 if (!IsSinker(I, OpIdx.index()))

2946 continue;

2947

2948

2949

2950 for (Use &U : Op->uses()) {

2952 if (!IsSinker(Insn, U.getOperandNo()))

2953 return false;

2954 }

2955

2957 if (Shuffle != Op)

2958 Ops.push_back(&Op->getOperandUse(0));

2959 Ops.push_back(&OpIdx.value());

2960 }

2961 return true;

2962}

2963

2967 LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");

2968 return false;

2969 }

2970

2971

2974 return 0;

2975

2976

2977 if (Size % 4 == 0)

2978 return 0;

2979

2980 unsigned NumBytesToPad = 4 - (Size % 4);

2981 unsigned NewSize = Size + NumBytesToPad;

2982

2983

2984

2986

2987 if (NewSize > MaxMemIntrinsicSize)

2988 return 0;

2989

2990 return NumBytesToPad;

2991}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static bool areExtractExts(Value *Ext1, Value *Ext2)

Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.

This file implements a class to represent arbitrary precision integral constant values and operations...

cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static cl::opt< int > ArmForceUnrollThreshold("arm-force-unroll-threshold", cl::init(12), cl::Hidden, cl::desc("Threshold for forced unrolling of small loops in Arm architecture"))

static Value * isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm)

Definition ARMTargetTransformInfo.cpp:372

static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, const DataLayout &DL, const LoopAccessInfo *LAI, const DominatorTree &DT)

Definition ARMTargetTransformInfo.cpp:2477

static cl::opt< bool > AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), cl::desc("Enable the generation of WLS loops"))

static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)

Convert a vector load intrinsic into a simple llvm load instruction.

Definition ARMTargetTransformInfo.cpp:77

static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm)

Definition ARMTargetTransformInfo.cpp:406

static cl::opt< bool > UseWidenGlobalArrays("widen-global-strings", cl::Hidden, cl::init(true), cl::desc("Enable the widening of global strings to alignment boundaries"))

cl::opt< bool > EnableMaskedGatherScatters

static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount)

Definition ARMTargetTransformInfo.cpp:2426

cl::opt< TailPredication::Mode > EnableTailPredication

static cl::opt< bool > DisableLowOverheadLoops("disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops"))

static cl::opt< bool > EnableMaskedLoadStores("enable-arm-maskedldst", cl::Hidden, cl::init(true), cl::desc("Enable the generation of masked loads and stores"))

This file a TargetTransformInfoImplBase conforming object specific to the ARM target machine.

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

Cost tables and simple lookup functions.

This file provides the interface for the instcombine pass implementation.

const size_t AbstractManglingParser< Derived, Alloc >::NumOps

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))

This file defines the LoopVectorizationLegality class.

static const Function * getCalledFunction(const Value *V)

MachineInstr unsigned OpIdx

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

This file defines the SmallVector class.

Class for arbitrary precision integers.

unsigned getBitWidth() const

Return the number of bits in the APInt.

static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)

Return a value containing V broadcasted over NewLen bits.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

Definition ARMTargetTransformInfo.cpp:1717

InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:1093

TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const override

Definition ARMTargetTransformInfo.cpp:2623

InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

Definition ARMTargetTransformInfo.cpp:1654

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

Definition ARMTargetTransformInfo.cpp:1593

InstructionCost getMemcpyCost(const Instruction *I) const override

Definition ARMTargetTransformInfo.cpp:1232

bool maybeLoweredToCall(Instruction &I) const

Definition ARMTargetTransformInfo.cpp:2244

bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override

Definition ARMTargetTransformInfo.cpp:2768

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

Definition ARMTargetTransformInfo.cpp:946

InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:1947

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override

Definition ARMTargetTransformInfo.cpp:1675

InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:325

bool hasArmWideBranch(bool Thumb) const override

Definition ARMTargetTransformInfo.cpp:2805

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

Definition ARMTargetTransformInfo.cpp:503

int getNumMemOps(const IntrinsicInst *I) const

Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...

Definition ARMTargetTransformInfo.cpp:1166

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

Definition ARMTargetTransformInfo.cpp:489

InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const override

Definition ARMTargetTransformInfo.cpp:360

bool isLoweredToCall(const Function *F) const override

Definition ARMTargetTransformInfo.cpp:2182

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:1912

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

Definition ARMTargetTransformInfo.cpp:2840

uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override

bool isLegalMaskedStore(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

Definition ARMTargetTransformInfo.cpp:907

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:1827

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

Definition ARMTargetTransformInfo.cpp:283

bool isLegalMaskedLoad(Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind=TTI::MaskKind::VariableOrConstantMask) const override

Definition ARMTargetTransformInfo.cpp:1132

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override

Definition ARMTargetTransformInfo.cpp:418

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

Definition ARMTargetTransformInfo.cpp:1396

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:2037

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

Definition ARMTargetTransformInfo.cpp:130

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

Definition ARMTargetTransformInfo.cpp:2763

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:1977

TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override

Definition ARMTargetTransformInfo.cpp:114

bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override

Definition ARMTargetTransformInfo.cpp:2568

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

Definition ARMTargetTransformInfo.cpp:1242

bool areInlineCompatible(const Function *Caller, const Function *Callee) const override

Definition ARMTargetTransformInfo.cpp:95

InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override

Definition ARMTargetTransformInfo.cpp:1640

bool preferPredicatedReductionSelect() const override

Definition ARMTargetTransformInfo.cpp:2781

bool isLegalMaskedGather(Type *Ty, Align Alignment) const override

Definition ARMTargetTransformInfo.cpp:1154

unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const override

Definition ARMTargetTransformInfo.cpp:2964

bool isProfitableLSRChainElement(Instruction *I) const override

Definition ARMTargetTransformInfo.cpp:1115

bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const override

Definition ARMTargetTransformInfo.cpp:2331

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

Definition ARMTargetTransformInfo.cpp:2633

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override

getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.

Definition ARMTargetTransformInfo.cpp:2787

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Class to represent array types.

A cache of @llvm.assume calls within a function.

static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)

Return a uniquified Attribute object that has the specific alignment set.

LLVM Basic Block Representation.

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override

InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override

static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)

Construct a binary instruction, given the opcode and the two operands.

static Type * makeCmpResultType(Type *opnd_type)

Create a result type for fcmp/icmp.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ ICMP_SLE

signed less or equal

@ ICMP_SGT

signed greater than

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

This is the shared class of boolean and integer constants.

const APInt & getValue() const

Return the constant as an APInt value reference.

This class represents a range of values.

This is an important base class in LLVM.

A parsed version of the target data layout string in and methods for querying it.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Convenience struct for specifying and reasoning about fast-math flags.

Container class for subtarget features.

Class to represent fixed width SIMD vectors.

unsigned getNumElements() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

ConstantInt * getTrue()

Get the constant value for i1 true.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

The core instruction combiner logic.

const DataLayout & getDataLayout() const

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy

An IRBuilder that automatically inserts new instructions into the worklist.

DominatorTree & getDominatorTree() const

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

AssumptionCache & getAssumptionCache() const

static InstructionCost getInvalid(CostType Val=0)

Instruction * user_back()

Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

const SmallVectorImpl< Type * > & getArgTypes() const

Type * getReturnType() const

Intrinsic::ID getID() const

A wrapper class for inspecting calls to intrinsic functions.

This is an important class for using LLVM in a threaded context.

Drive the analysis of memory accesses in the loop.

const PredicatedScalarEvolution & getPSE() const

Used to add runtime SCEV checks.

LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...

LoopInfo * getLoopInfo() const

DominatorTree * getDominatorTree() const

AssumptionCache * getAssumptionCache() const

const LoopAccessInfo * getLAI() const

ScalarEvolution * getScalarEvolution() const

Represents a single loop in the control flow graph.

const FeatureBitset & getFeatureBits() const

Information for memory intrinsic cost model.

Align getAlignment() const

unsigned getAddressSpace() const

Type * getDataType() const

const Value * getPointer() const

bool getVariableMask() const

Intrinsic::ID getID() const

const Instruction * getInst() const

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

This class represents an analyzed expression in the program.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

The main scalar evolution driver.

LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

const SCEV * getOne(Type *Ty)

Return a SCEV for the constant 1 of a specific type.

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

LLVM_ABI bool hasLoopInvariantBackedgeTakenCount(const Loop *L)

Return true if the specified loop has an analyzable loop-invariant backedge-taken count.

APInt getUnsignedRangeMax(const SCEV *S)

Determine the max of the unsigned range for a particular SCEV.

LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)

Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...

static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)

Return true if the mask interleaves one or more input vectors together.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

static StackOffset getScalable(int64_t Scalable)

static StackOffset getFixed(int64_t Fixed)

Provides information about what library functions are available for the current target.

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

virtual bool isLoweredToCall(const Function *F) const

bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override

MaskKind

Some targets only support masked load/store with a constant mask.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)

A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...

@ TCC_Expensive

The cost of a 'div' instruction on x86.

AddressingModeKind

Which addressing mode Loop Strength Reduction will try to generate.

@ AMK_PostIndexed

Prefer post-indexed addressing mode.

@ AMK_PreIndexed

Prefer pre-indexed addressing mode.

@ AMK_None

Don't prefer any addressing mode.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_Select

Selects elements from the corresponding lane of either source operand.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_Reverse

Reverse the order of the vector.

@ SK_ExtractSubvector

ExtractSubvector Index indicates start offset.

CastContextHint

Represents a hint about the context in which a cast is used.

@ Masked

The cast is used with a masked load/store.

@ None

The cast is not used with a load/store of any kind.

@ Normal

The cast is used with a normal load/store.

This class represents a truncation of integer types.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isArrayTy() const

True if this is an instance of ArrayType.

LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const

Return true if this is a type whose size is a known multiple of vscale.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

Type * getArrayElementType() const

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const

Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVM_ABI bool hasNUses(unsigned N) const

Return true if this Value has exactly N uses.

Base class of all SIMD vector types.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

int getSOImmVal(unsigned Arg)

getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...

bool isThumbImmShiftedVal(unsigned V)

isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...

int getT2SOImmVal(unsigned Arg)

getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...

@ C

The default llvm calling convention, compatible with C.

ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...

@ ADD

Simple integer binary arithmetic operators.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ FADD

Simple binary floating point operators.

@ SDIVREM

SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.

@ SIGN_EXTEND

Conversion operators.

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

@ SHL

Shift and rotation operations.

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ SMIN

[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

FNeg_match< OpTy > m_FNeg(const OpTy &X)

Match 'fneg X' as 'fsub -0.0, X'.

auto m_Undef()

Match an arbitrary undef constant.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

@ ForceEnabledNoReductions

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)

Find in cost table.

LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)

Returns true if Name is applied to TheLoop and enabled.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

@ Runtime

Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

auto dyn_cast_or_null(const Y &Val)

Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)

Try to infer an alignment for the specified pointer.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)

Returns the instructions that use values defined in the loop.

SelectPatternFlavor

Specific patterns of select instructions we can match.

@ SPF_ABS

Floating point maxnum.

@ SPF_FMAXNUM

Floating point minnum.

@ SPF_UMIN

Signed minimum.

@ SPF_UMAX

Signed maximum.

@ SPF_SMAX

Unsigned minimum.

@ SPF_FMINNUM

Unsigned maximum.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)

Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

RecurKind

These are the kinds of recurrences that we support.

@ Sub

Subtraction of integers.

DWARFExpression::Operation Op

CostTblEntryT< unsigned > CostTblEntry

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)

isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...

@ DataWithoutLaneMask

Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...

@ Data

Use predicate only to mask operations on data in the loop.

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)

Find in type conversion cost table.

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

uint64_t getScalarSizeInBits() const

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

bool isInteger() const

Return true if this is an integer or a vector integer type.

Attributes of a target dependent hardware loop.

LLVM_ABI bool canAnalyze(LoopInfo &LI)

LLVM_ABI bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)

static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)

SelectPatternFlavor Flavor

LoopVectorizationLegality * LVL

This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...

Parameters that control the generic loop unrolling transformation.

bool UpperBound

Allow using trip count upper bound to unroll loops.

bool Force

Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

unsigned DefaultUnrollRuntimeCount

Default unroll count for loops with run-time trip count.

unsigned UnrollAndJamInnerLoopThreshold

Threshold for unroll and jam, for inner loop size.

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...

unsigned OptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).