ARMTargetTransformInfo.cpp Source File (original) (raw)

26#include "llvm/IR/IntrinsicsARM.h"

37#include

38#include

39#include

40#include

41#include

43using namespace llvm;

45#define DEBUG_TYPE "armtti"

49 cl::desc("Enable the generation of masked loads and stores"));

53 cl::desc("Disable the generation of low-overhead loops"));

57 cl::desc("Enable the generation of WLS loops"));

61 cl::desc("Enable the widening of global strings to alignment boundaries"));

74 auto *IntrAlign = dyn_cast(II.getArgOperand(1));

76 if (!IntrAlign)

77 return nullptr;

79 unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign

80 ? MemAlign

81 : IntrAlign->getLimitedValue();

84 return nullptr;

87 Align(Alignment));

88}

91 const Function *Callee) const {

94 TM.getSubtargetImpl(*Caller)->getFeatureBits();

96 TM.getSubtargetImpl(*Callee)->getFeatureBits();

99 bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==

100 (CalleeBits & ~InlineFeaturesAllowed);

101

102

103 bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==

104 (CalleeBits & InlineFeaturesAllowed);

105 return MatchExact && MatchSubset;

106}

107

111 if (ST->hasMVEIntegerOps())

113

114 if (L->getHeader()->getParent()->hasOptSize())

116

118 L->getNumBlocks() == 1)

120

122}

123

124std::optional<Instruction *>

126 using namespace PatternMatch;

128 switch (IID) {

129 default:

130 break;

131 case Intrinsic::arm_neon_vld1: {

137 }

138 break;

139 }

140

141 case Intrinsic::arm_neon_vld2:

142 case Intrinsic::arm_neon_vld3:

143 case Intrinsic::arm_neon_vld4:

144 case Intrinsic::arm_neon_vld2lane:

145 case Intrinsic::arm_neon_vld3lane:

146 case Intrinsic::arm_neon_vld4lane:

147 case Intrinsic::arm_neon_vst1:

148 case Intrinsic::arm_neon_vst2:

149 case Intrinsic::arm_neon_vst3:

150 case Intrinsic::arm_neon_vst4:

151 case Intrinsic::arm_neon_vst2lane:

152 case Intrinsic::arm_neon_vst3lane:

153 case Intrinsic::arm_neon_vst4lane: {

157 unsigned AlignArg = II.arg_size() - 1;

158 Value *AlignArgOp = II.getArgOperand(AlignArg);

159 MaybeAlign Align = cast(AlignArgOp)->getMaybeAlignValue();

162 II, AlignArg,

164 false));

165 }

166 break;

167 }

168

169 case Intrinsic::arm_neon_vld1x2:

170 case Intrinsic::arm_neon_vld1x3:

171 case Intrinsic::arm_neon_vld1x4:

172 case Intrinsic::arm_neon_vst1x2:

173 case Intrinsic::arm_neon_vst1x3:

174 case Intrinsic::arm_neon_vst1x4: {

178 Align OldAlign = II.getParamAlign(0).valueOrOne();

179 if (NewAlign > OldAlign)

180 II.addParamAttr(0,

182 break;

183 }

184

185 case Intrinsic::arm_mve_pred_i2v: {

186 Value *Arg = II.getArgOperand(0);

188 if (match(Arg, PatternMatch::m_IntrinsicIntrinsic::arm\_mve\_pred\_v2i(

190 II.getType() == ArgArg->getType()) {

192 }

194 if (match(Arg, m_Xor(PatternMatch::m_IntrinsicIntrinsic::arm\_mve\_pred\_v2i(

197 II.getType() == ArgArg->getType()) {

198 if (auto *CI = dyn_cast(XorMask)) {

199 if (CI->getValue().trunc(16).isAllOnes()) {

201 cast(II.getType())->getNumElements(),

204 }

205 }

206 }

209 ScalarKnown)) {

210 return &II;

211 }

212 break;

213 }

214 case Intrinsic::arm_mve_pred_v2i: {

215 Value *Arg = II.getArgOperand(0);

217 if (match(Arg, PatternMatch::m_IntrinsicIntrinsic::arm\_mve\_pred\_i2v(

220 }

221

222 if (II.getMetadata(LLVMContext::MD_range))

223 break;

224

226

227 if (auto CurrentRange = II.getRange()) {

229 if (Range == CurrentRange)

230 break;

231 }

232

233 II.addRangeRetAttr(Range);

234 II.addRetAttr(Attribute::NoUndef);

235 return &II;

236 }

237 case Intrinsic::arm_mve_vadc:

238 case Intrinsic::arm_mve_vadc_predicated: {

239 unsigned CarryOp =

240 (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;

241 assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&

242 "Bad type for intrinsic!");

243

246 CarryKnown)) {

247 return &II;

248 }

249 break;

250 }

251 case Intrinsic::arm_mve_vmldava: {

253 if (I->hasOneUse()) {

254 auto *User = cast(*I->user_begin());

258 Value *OpX = I->getOperand(4);

259 Value *OpY = I->getOperand(5);

261

265 {I->getOperand(0), I->getOperand(1),

266 I->getOperand(2), OpZ, OpX, OpY});

267

270 }

271 }

272 return std::nullopt;

273 }

274 }

275 return std::nullopt;

276}

277

282 SimplifyAndSetOp) const {

283

284

285

286

287 auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {

288 unsigned NumElts = cast(II.getType())->getNumElements();

289 unsigned IsTop = cast(II.getOperand(TopOpc))->getZExtValue();

290

291

292

293 APInt DemandedElts =

296 SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);

297

300 return std::nullopt;

301 };

302

303 switch (II.getIntrinsicID()) {

304 default:

305 break;

306 case Intrinsic::arm_mve_vcvt_narrow:

307 SimplifyNarrowInstrTopBottom(2);

308 break;

309 case Intrinsic::arm_mve_vqmovn:

310 SimplifyNarrowInstrTopBottom(4);

311 break;

312 case Intrinsic::arm_mve_vshrn:

313 SimplifyNarrowInstrTopBottom(7);

314 break;

315 }

316

317 return std::nullopt;

318}

319

323

325 if (Bits == 0 || Imm.getActiveBits() >= 64)

326 return 4;

327

328 int64_t SImmVal = Imm.getSExtValue();

329 uint64_t ZImmVal = Imm.getZExtValue();

330 if (!ST->isThumb()) {

331 if ((SImmVal >= 0 && SImmVal < 65536) ||

334 return 1;

335 return ST->hasV6T2Ops() ? 2 : 3;

336 }

338 if ((SImmVal >= 0 && SImmVal < 65536) ||

341 return 1;

342 return ST->hasV6T2Ops() ? 2 : 3;

343 }

344

345 if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))

346 return 1;

348 return 2;

349

350 return 3;

351}

352

353

354

357 if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)

358 return 0;

359

360 return 1;

361}

362

363

364

365

370

373 C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {

374

375 auto isSSatMin = [&](Value *MinInst) {

376 if (isa(MinInst)) {

377 Value *MinLHS, *MinRHS;

383 MinC->getValue() == ((-Imm) - 1))

384 return true;

385 }

386 return false;

387 };

388

390 return cast(Inst->getOperand(1))->getOperand(1);

394 }

395 return nullptr;

396}

397

398

399

401 if (Imm.getBitWidth() != 64 ||

403 return false;

405 if ( && isa(Inst) && Inst->hasOneUse())

407 if ()

408 return false;

409 return isa(FP);

410}

411

416

417

418

419

420 if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||

421 Opcode == Instruction::SRem || Opcode == Instruction::URem) &&

422 Idx == 1)

423 return 0;

424

425

426

427 if (Opcode == Instruction::GetElementPtr && Idx != 0)

428 return 0;

429

430 if (Opcode == Instruction::And) {

431

432 if (Imm == 255 || Imm == 65535)

433 return 0;

434

437 }

438

439 if (Opcode == Instruction::Add)

440

443

444 if (Opcode == Instruction::ICmp && Imm.isNegative() &&

446 int64_t NegImm = -Imm.getSExtValue();

447 if (ST->isThumb2() && NegImm < 1<<12)

448

449 return 0;

450 if (ST->isThumb() && NegImm < 1<<8)

451

452 return 0;

453 }

454

455

456 if (Opcode == Instruction::Xor && Imm.isAllOnes())

457 return 0;

458

459

460

461 if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) &&

464 (isa(Inst) && Inst->hasOneUse() &&

466 return 0;

467 }

468

470 return 0;

471

472

473 if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {

478 }

479

481}

482

487 (ST->hasNEON() || ST->hasMVEIntegerOps())) {

488

489

490

491

492 return 0;

493 }

495}

496

503 assert(ISD && "Invalid opcode");

504

505

508 return Cost == 0 ? 0 : 1;

510 };

511 auto IsLegalFPType = [this](EVT VT) {

513 return (EltVT == MVT::f32 && ST->hasVFP2Base()) ||

514 (EltVT == MVT::f64 && ST->hasFP64()) ||

515 (EltVT == MVT::f16 && ST->hasFullFP16());

516 };

517

520

522 return AdjustCost(

524

525

526

527

528 if ((ST->hasMVEIntegerOps() &&

529 (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt ||

530 Opcode == Instruction::SExt)) ||

531 (ST->hasMVEFloatOps() &&

532 (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) &&

533 IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))

537

538

554 };

557 return AdjustCost(Entry->Cost);

558

566

567

568

575 };

576 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {

577 if (const auto *Entry =

581 }

582

584

587 };

588 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {

589 if (const auto *Entry =

593 }

594

595

604 };

605 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {

606 if (const auto *Entry =

610 }

611

615 };

616 if (SrcTy.isVector() && ST->hasMVEFloatOps()) {

617 if (const auto *Entry =

621 }

622 }

623

624

626 I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {

628

629 { ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },

630 { ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },

631

632 { ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },

633 { ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },

634

635 { ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },

636 { ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },

637

638 { ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },

639 { ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },

640 };

641

642 auto *User = cast(*I->user_begin());

647 return AdjustCost(Entry->Cost);

648 }

649 }

650

651

652 if (Src->isVectorTy() && ST->hasNEON() &&

657 static const CostTblEntry NEONFltDblTbl[] = {

658

662

664 if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))

665 return AdjustCost(LT.first * Entry->Cost);

666 }

667

668

669

670

678

679

698

699

702

703

706

727

734

735

738

745

752 };

753

754 if (SrcTy.isVector() && ST->hasNEON()) {

758 return AdjustCost(Entry->Cost);

759 }

760

761

783 };

788 return AdjustCost(Entry->Cost);

789 }

790

791

813 };

814

815 if (SrcTy.isInteger() && ST->hasNEON()) {

819 return AdjustCost(Entry->Cost);

820 }

821

822

823

824

838 };

839

840 if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {

845 }

846

848

849

850

853 int Lanes = 1;

856

857 if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))

858 return Lanes;

859 else

860 return Lanes * CallCost;

861 }

862

863 if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&

865

866

873 }

874

875

877

879

880

885 };

886

891 return AdjustCost(Entry->Cost);

892 }

893

894 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()

896 : 1;

897 return AdjustCost(

899}

900

903 unsigned Index, Value *Op0,

905

906

907 if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&

909 return 3;

910

911 if (ST->hasNEON() && (Opcode == Instruction::InsertElement ||

912 Opcode == Instruction::ExtractElement)) {

913

914

915 if (cast(ValTy)->getElementType()->isIntegerTy())

916 return 3;

917

918

919

922 return std::max(

924 2U);

925 }

926

927 if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement ||

928 Opcode == Instruction::ExtractElement)) {

929

930

931

932 std::pair<InstructionCost, MVT> LT =

935 }

936

938}

939

945

946

948 ST->isThumb() && !ValTy->isVectorTy()) {

949

950 if (TLI->getValueType(DL, ValTy, true) == MVT::Other)

952

953

954

955

956

958

959

961

962

963

966

968 }

969

970

971

972

974 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel &&

976 Sel = cast(Sel->user_back());

981 unsigned IID = 0;

982 switch (SPF) {

984 IID = Intrinsic::abs;

985 break;

987 IID = Intrinsic::smin;

988 break;

990 IID = Intrinsic::smax;

991 break;

993 IID = Intrinsic::umin;

994 break;

996 IID = Intrinsic::umax;

997 break;

999 IID = Intrinsic::minnum;

1000 break;

1002 IID = Intrinsic::maxnum;

1003 break;

1004 default:

1005 break;

1006 }

1007 if (IID) {

1008

1009 if (Sel != I)

1010 return 0;

1013 }

1014 }

1015

1016

1018

1020 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },

1021 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },

1022 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }

1023 };

1024

1031 return Entry->Cost;

1032 }

1033

1035 return LT.first;

1036 }

1037

1038 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&

1039 (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&

1040 cast(ValTy)->getNumElements() > 1) {

1041 FixedVectorType *VecValTy = cast(ValTy);

1042 FixedVectorType *VecCondTy = dyn_cast_or_null(CondTy);

1043 if (!VecCondTy)

1045

1046

1047 if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {

1048

1049

1051 true, CostKind) +

1053 false, CostKind) +

1058 }

1059

1062

1063

1064

1065

1066

1067 if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {

1068 if (LT.first > 1)

1069 return LT.first * BaseCost +

1071 false, CostKind);

1072 return BaseCost;

1073 }

1074 }

1075

1076

1077

1078 int BaseCost = 1;

1079 if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())

1081

1084}

1085

1089

1090

1091

1092

1093 unsigned NumVectorInstToHideOverhead = 10;

1094 int MaxMergeDistance = 64;

1095

1096 if (ST->hasNEON()) {

1099 return NumVectorInstToHideOverhead;

1100

1101

1102

1103 return 1;

1104 }

1106}

1107

1110

1111

1112 switch (II->getIntrinsicID()) {

1113 case Intrinsic::arm_mve_vctp8:

1114 case Intrinsic::arm_mve_vctp16:

1115 case Intrinsic::arm_mve_vctp32:

1116 case Intrinsic::arm_mve_vctp64:

1117 return true;

1118 default:

1119 break;

1120 }

1121 }

1122 return false;

1123}

1124

1127 return false;

1128

1129 if (auto *VecTy = dyn_cast(DataTy)) {

1130

1131 if (VecTy->getNumElements() == 2)

1132 return false;

1133

1134

1136 if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())

1137 return false;

1138 }

1139

1141 return (EltWidth == 32 && Alignment >= 4) ||

1142 (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8);

1143}

1144

1147 return false;

1148

1150 return ((EltWidth == 32 && Alignment >= 4) ||

1151 (EltWidth == 16 && Alignment >= 2) || EltWidth == 8);

1152}

1153

1154

1155

1156

1159 unsigned DstAddrSpace = ~0u;

1160 unsigned SrcAddrSpace = ~0u;

1161 const Function *F = I->getParent()->getParent();

1162

1163 if (const auto *MC = dyn_cast(I)) {

1164 ConstantInt *C = dyn_cast(MC->getLength());

1165

1166 if ()

1167 return -1;

1168

1169 const unsigned Size = C->getValue().getZExtValue();

1170 const Align DstAlign = *MC->getDestAlign();

1171 const Align SrcAlign = *MC->getSourceAlign();

1172

1173 MOp = MemOp::Copy(Size, false, DstAlign, SrcAlign,

1174 false);

1175 DstAddrSpace = MC->getDestAddressSpace();

1176 SrcAddrSpace = MC->getSourceAddressSpace();

1177 }

1178 else if (const auto *MS = dyn_cast(I)) {

1179 ConstantInt *C = dyn_cast(MS->getLength());

1180

1181 if ()

1182 return -1;

1183

1184 const unsigned Size = C->getValue().getZExtValue();

1185 const Align DstAlign = *MS->getDestAlign();

1186

1187 MOp = MemOp::Set(Size, false, DstAlign,

1188 false, false);

1189 DstAddrSpace = MS->getDestAddressSpace();

1190 }

1191 else

1193

1194 unsigned Limit, Factor = 2;

1195 switch(I->getIntrinsicID()) {

1196 case Intrinsic::memcpy:

1198 break;

1199 case Intrinsic::memmove:

1201 break;

1202 case Intrinsic::memset:

1204 Factor = 1;

1205 break;

1206 default:

1208 }

1209

1210

1211

1212

1213 std::vector MemOps;

1214 if (getTLI()->findOptimalMemOpLowering(

1215 MemOps, Limit, MOp, DstAddrSpace,

1216 SrcAddrSpace, F->getAttributes()))

1217 return MemOps.size() * Factor;

1218

1219

1220 return -1;

1221}

1222

1224 int NumOps = getNumMemOps(cast(I));

1225

1226

1227

1228 if (NumOps == -1)

1229 return 4;

1230 return NumOps;

1231}

1232

1240

1242 if (IsExtractSubvector)

1244 if (ST->hasNEON()) {

1247

1254

1259

1261 if (const auto *Entry =

1263 return LT.first * Entry->Cost;

1264 }

1266 static const CostTblEntry NEONShuffleTbl[] = {

1267

1268

1275

1280

1282 if (const auto *Entry =

1284 return LT.first * Entry->Cost;

1285 }

1287 static const CostTblEntry NEONSelShuffleTbl[] = {

1288

1289

1290

1291

1296

1300

1302

1304

1306 if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,

1308 return LT.first * Entry->Cost;

1309 }

1310 }

1311 if (ST->hasMVEIntegerOps()) {

1314

1320

1323 LT.second))

1324 return LT.first * Entry->Cost *

1326 }

1327

1328 if (!Mask.empty()) {

1330 if (LT.second.isVector() &&

1331 Mask.size() <= LT.second.getVectorNumElements() &&

1335 }

1336 }

1337

1338

1339 if (IsExtractSubvector)

1341 int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()

1343 : 1;

1344 return BaseCost *

1346}

1347

1355

1356

1357

1358 switch (ISDOpcode) {

1359 default:

1360 break;

1363 return 2;

1365 return 3;

1366 }

1367 }

1368

1370

1371 if (ST->hasNEON()) {

1372 const unsigned FunctionCallDivCost = 20;

1373 const unsigned ReciprocalDivCost = 10;

1375

1376

1377

1378

1379 { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},

1380 { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},

1381 { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},

1382 { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},

1383 { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},

1384 { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},

1385 { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},

1386 { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},

1387 { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},

1388 { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},

1389 { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},

1390 { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},

1391 { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},

1392 { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},

1393 { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},

1394 { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},

1395

1396 { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},

1397 { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},

1398 { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},

1399 { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},

1400 { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},

1401 { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},

1402 { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},

1403 { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},

1404 { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},

1405 { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},

1406 { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},

1407 { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},

1408 { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},

1409 { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},

1410 { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},

1411 { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},

1412

1413 };

1414

1415 if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))

1416 return LT.first * Entry->Cost;

1417

1419 Opcode, Ty, CostKind, Op1Info, Op2Info);

1420

1421

1422

1423

1424

1425

1426

1427

1428 if (LT.second == MVT::v2i64 && Op2Info.isUniform() && Op2Info.isConstant())

1430

1431 return Cost;

1432 }

1433

1434

1435

1436 auto LooksLikeAFreeShift = [&]() {

1438 return false;

1439

1441 return false;

1443 return false;

1444

1445

1446 switch (cast(CxtI->user_back())->getOpcode()) {

1447 case Instruction::Add:

1448 case Instruction::Sub:

1449 case Instruction::And:

1450 case Instruction::Xor:

1451 case Instruction::Or:

1452 case Instruction::ICmp:

1453 return true;

1454 default:

1455 return false;

1456 }

1457 };

1458 if (LooksLikeAFreeShift())

1459 return 0;

1460

1461

1462

1463 int BaseCost = 1;

1464 if (ST->hasMVEIntegerOps() && Ty->isVectorTy())

1466

1467

1468

1469

1470

1472 return LT.first * BaseCost;

1473

1474

1475 if (auto *VTy = dyn_cast(Ty)) {

1476 unsigned Num = VTy->getNumElements();

1479

1480

1484 }

1485

1486 return BaseCost;

1487}

1488

1495

1497 return 1;

1498

1499

1503

1504 if (ST->hasNEON() && Src->isVectorTy() &&

1505 (Alignment && *Alignment != Align(16)) &&

1506 cast(Src)->getElementType()->isDoubleTy()) {

1507

1508

1510 return LT.first * 4;

1511 }

1512

1513

1514

1515 if (ST->hasMVEFloatOps() && isa(Src) && I &&

1516 ((Opcode == Instruction::Load && I->hasOneUse() &&

1517 isa(*I->user_begin())) ||

1518 (Opcode == Instruction::Store && isa(I->getOperand(0))))) {

1520 Type *DstTy =

1521 Opcode == Instruction::Load

1522 ? (*I->user_begin())->getType()

1523 : cast(I->getOperand(0))->getOperand(0)->getType();

1527 }

1528

1529 int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()

1531 : 1;

1534}

1535

1540 if (ST->hasMVEIntegerOps()) {

1541 if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))

1543 if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))

1545 }

1546 if (!isa(Src))

1549

1550

1551 return cast(Src)->getNumElements() * 8;

1552}

1553

1557 bool UseMaskForCond, bool UseMaskForGaps) {

1558 assert(Factor >= 2 && "Invalid interleave factor");

1559 assert(isa(VecTy) && "Expect a vector type");

1560

1561

1563

1564 if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&

1565 !UseMaskForCond && !UseMaskForGaps) {

1566 unsigned NumElts = cast(VecTy)->getNumElements();

1567 auto *SubVecTy =

1569

1570

1571

1572

1573 int BaseCost =

1575 if (NumElts % Factor == 0 &&

1578

1579

1580

1581

1582

1583

1584 if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&

1587 return 2 * BaseCost;

1588 }

1589

1592 UseMaskForCond, UseMaskForGaps);

1593}

1594

1596 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

1598 using namespace PatternMatch;

1602

1603 assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");

1604 auto *VTy = cast(DataTy);

1605

1606

1607

1608 unsigned NumElems = VTy->getNumElements();

1609 unsigned EltSize = VTy->getScalarSizeInBits();

1611

1612

1613

1614

1615

1616

1619

1620

1621

1622

1624 NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +

1629

1630 if (EltSize < 8 || Alignment < EltSize / 8)

1631 return ScalarCost;

1632

1633 unsigned ExtSize = EltSize;

1634

1635 if (I != nullptr) {

1636

1637

1638

1639 if ((I->getOpcode() == Instruction::Load ||

1640 match(I, m_IntrinsicIntrinsic::masked\_gather())) &&

1641 I->hasOneUse()) {

1642 const User *Us = *I->users().begin();

1643 if (isa(Us) || isa(Us)) {

1644

1646 cast(Us)->getType()->getScalarSizeInBits();

1647 if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) ||

1648 (TypeSize == 16 && EltSize == 8)) &&

1649 TypeSize * NumElems == 128) {

1651 }

1652 }

1653 }

1654

1656 if ((I->getOpcode() == Instruction::Store ||

1657 match(I, m_IntrinsicIntrinsic::masked\_scatter())) &&

1658 (T = dyn_cast(I->getOperand(0)))) {

1659

1660 unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();

1661 if (((EltSize == 16 && TypeSize == 32) ||

1665 }

1666 }

1667

1668 if (ExtSize * NumElems != 128 || NumElems < 4)

1669 return ScalarCost;

1670

1671

1672 if (ExtSize == 32)

1673 return VectorCost;

1674

1675

1676

1677 if (ExtSize != 8 && ExtSize != 16)

1678 return ScalarCost;

1679

1680 if (const auto *BC = dyn_cast(Ptr))

1681 Ptr = BC->getOperand(0);

1682 if (const auto *GEP = dyn_cast(Ptr)) {

1683 if (GEP->getNumOperands() != 2)

1684 return ScalarCost;

1686

1687 if (Scale != 1 && Scale * 8 != ExtSize)

1688 return ScalarCost;

1689

1690 if (const auto *ZExt = dyn_cast(GEP->getOperand(1))) {

1691 if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)

1692 return VectorCost;

1693 }

1694 return ScalarCost;

1695 }

1696 return ScalarCost;

1697}

1698

1701 std::optional FMF,

1703

1707

1708

1709

1710

1712 ((EltSize == 32 && ST->hasVFP2Base()) ||

1713 (EltSize == 64 && ST->hasFP64()) ||

1714 (EltSize == 16 && ST->hasFullFP16()))) {

1715 unsigned NumElts = cast(ValTy)->getNumElements();

1716 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);

1719 NumElts * EltSize > VecLimit) {

1722 NumElts /= 2;

1723 }

1724

1725

1726

1731 NumElts /= 2;

1733 ExtractCost = NumElts / 2;

1734

1735 return VecCost + ExtractCost +

1736 NumElts *

1738 }

1739

1741 (EltSize == 64 || EltSize == 32 || EltSize == 16 || EltSize == 8)) {

1742 unsigned NumElts = cast(ValTy)->getNumElements();

1743 unsigned VecLimit =

1744 ST->hasMVEIntegerOps() ? 128 : (ST->hasNEON() ? 64 : -1);

1746 while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {

1749 NumElts /= 2;

1750 }

1751

1752

1754 NumElts * EltSize == 64) {

1758 NumElts /= 2;

1759 }

1760

1761

1763 return VecCost + ExtractCost +

1766 }

1767

1768 if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD ||

1771

1773

1778 };

1779 if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))

1781

1783}

1784

1786 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,

1790

1792

1793 switch (ISD) {

1795 if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {

1797

1798

1799

1800

1801

1802

1803

1806 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||

1807 (LT.second == MVT::v8i16 && RevVTSize <= 32) ||

1808 (LT.second == MVT::v4i32 && RevVTSize <= 64)))

1810 }

1811 break;

1812 default:

1813 break;

1814 }

1817}

1818

1825

1826 if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {

1828

1829

1830

1831

1832

1833

1834

1837 ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||

1838 (LT.second == MVT::v8i16 && RevVTSize <= 64) ||

1839 (LT.second == MVT::v4i32 && RevVTSize <= 64)))

1841 }

1842

1844}

1845

1851

1852

1853

1854

1855 if ((IID == Intrinsic::minnum || IID == Intrinsic::maxnum) &&

1859 unsigned NumElts = cast(Ty)->getNumElements();

1861 unsigned VecLimit = ST->hasMVEFloatOps() ? 128 : (ST->hasNEON() ? 64 : -1);

1863 while (isPowerOf2_32(NumElts) && NumElts * EltSize > VecLimit) {

1867 NumElts /= 2;

1868 }

1869

1870

1871

1874 NumElts == 8) {

1876 NumElts /= 2;

1878 ExtractCost = cast(Ty)->getNumElements() / 2;

1879

1881 {Ty->getElementType(), Ty->getElementType()},

1882 FMF);

1883 return VecCost + ExtractCost +

1885 }

1886

1887 if (IID == Intrinsic::smin || IID == Intrinsic::smax ||

1888 IID == Intrinsic::umin || IID == Intrinsic::umax) {

1890

1891

1892

1893

1898 };

1901 }

1902

1904}

1905

1909 unsigned Opc = ICA.getID();

1910 switch (Opc) {

1911 case Intrinsic::get_active_lane_mask:

1912

1913

1914

1915

1916

1917

1918

1919 if (ST->hasMVEIntegerOps())

1920 return 0;

1921 break;

1922 case Intrinsic::sadd_sat:

1923 case Intrinsic::ssub_sat:

1924 case Intrinsic::uadd_sat:

1925 case Intrinsic::usub_sat: {

1926 bool IsAdd = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);

1927 bool IsSigned = (Opc == Intrinsic::sadd_sat || Opc == Intrinsic::ssub_sat);

1929

1930 if (auto *ITy = dyn_cast(RetTy)) {

1931 if (IsSigned && ST->hasDSP() && ITy->getBitWidth() == 32)

1932 return 1;

1933 if (ST->hasDSP() && (ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16))

1934 return 2;

1935

1936

1938 Type *CondTy = RetTy->getWithNewBitWidth(1);

1945 }

1946

1947 if (!ST->hasMVEIntegerOps())

1948 break;

1949

1951 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||

1952 LT.second == MVT::v16i8) {

1953

1954

1955 unsigned Instrs =

1956 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1

1957 : 4;

1959 }

1960 break;

1961 }

1962 case Intrinsic::abs:

1963 case Intrinsic::smin:

1964 case Intrinsic::smax:

1965 case Intrinsic::umin:

1966 case Intrinsic::umax: {

1967 if (!ST->hasMVEIntegerOps())

1968 break;

1970

1972 if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||

1973 LT.second == MVT::v16i8)

1975 break;

1976 }

1977 case Intrinsic::minnum:

1978 case Intrinsic::maxnum: {

1979 if (!ST->hasMVEFloatOps())

1980 break;

1983 if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16)

1985 break;

1986 }

1987 case Intrinsic::fptosi_sat:

1988 case Intrinsic::fptoui_sat: {

1990 break;

1991 bool IsSigned = Opc == Intrinsic::fptosi_sat;

1994

1995 if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) ||

1996 (ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) ||

1997 (ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))

1998 return LT.first;

1999

2000

2001 if (ST->hasMVEFloatOps() &&

2002 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) &&

2005

2006

2007 if (((ST->hasVFP2Base() && LT.second == MVT::f32) ||

2008 (ST->hasFP64() && LT.second == MVT::f64) ||

2009 (ST->hasFullFP16() && LT.second == MVT::f16) ||

2010 (ST->hasMVEFloatOps() &&

2011 (LT.second == MVT::v4f32 || LT.second == MVT::v8f16))) &&

2014 LT.second.getScalarSizeInBits());

2018 : Intrinsic::umin,

2019 LegalTy, {LegalTy, LegalTy});

2022 : Intrinsic::umax,

2023 LegalTy, {LegalTy, LegalTy});

2025 return LT.first * Cost;

2026 }

2027

2028

2036 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,

2038 if (IsSigned) {

2039 Type *CondTy = RetTy->getWithNewBitWidth(1);

2044 }

2045 return Cost;

2046 }

2047 }

2048

2050}

2051

2053 if (->isIntrinsic())

2055

2056

2057 if (F->getName().starts_with("llvm.arm"))

2058 return false;

2059

2060 switch (F->getIntrinsicID()) {

2061 default: break;

2062 case Intrinsic::powi:

2063 case Intrinsic::sin:

2064 case Intrinsic::cos:

2065 case Intrinsic::sincos:

2066 case Intrinsic::pow:

2067 case Intrinsic:🪵

2068 case Intrinsic::log10:

2069 case Intrinsic::log2:

2070 case Intrinsic::exp:

2071 case Intrinsic::exp2:

2072 return true;

2073 case Intrinsic::sqrt:

2074 case Intrinsic::fabs:

2075 case Intrinsic::copysign:

2076 case Intrinsic:🤣

2077 case Intrinsic::ceil:

2078 case Intrinsic::trunc:

2079 case Intrinsic::rint:

2080 case Intrinsic::nearbyint:

2081 case Intrinsic::round:

2082 case Intrinsic::canonicalize:

2083 case Intrinsic::lround:

2084 case Intrinsic::llround:

2085 case Intrinsic::lrint:

2086 case Intrinsic::llrint:

2087 if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())

2088 return true;

2089 if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())

2090 return true;

2091

2092

2093

2095 case Intrinsic::masked_store:

2096 case Intrinsic::masked_load:

2097 case Intrinsic::masked_gather:

2098 case Intrinsic::masked_scatter:

2099 return !ST->hasMVEIntegerOps();

2100 case Intrinsic::sadd_with_overflow:

2101 case Intrinsic::uadd_with_overflow:

2102 case Intrinsic::ssub_with_overflow:

2103 case Intrinsic::usub_with_overflow:

2104 case Intrinsic::sadd_sat:

2105 case Intrinsic::uadd_sat:

2106 case Intrinsic::ssub_sat:

2107 case Intrinsic::usub_sat:

2108 return false;

2109 }

2110

2112}

2113

2118 return true;

2119

2120

2121

2122 if (auto *Call = dyn_cast(&I)) {

2123 if (auto *II = dyn_cast(Call)) {

2124 switch(II->getIntrinsicID()) {

2125 case Intrinsic::memcpy:

2126 case Intrinsic::memset:

2127 case Intrinsic::memmove:

2129 default:

2130 if (const Function *F = Call->getCalledFunction())

2132 }

2133 }

2134 return true;

2135 }

2136

2137

2138

2139 switch (I.getOpcode()) {

2140 default:

2141 break;

2142 case Instruction::FPToSI:

2143 case Instruction::FPToUI:

2144 case Instruction::SIToFP:

2145 case Instruction::UIToFP:

2146 case Instruction::FPTrunc:

2147 case Instruction::FPExt:

2149 }

2150

2151

2152

2153

2154

2155

2156

2158 switch (ISD) {

2159 default:

2160 break;

2167 return true;

2168 }

2169 }

2170

2171

2173 return false;

2174

2175

2177 switch (I.getOpcode()) {

2178 default:

2179 return true;

2180 case Instruction::Alloca:

2181 case Instruction::Load:

2182 case Instruction::Store:

2183 case Instruction::Select:

2184 case Instruction::PHI:

2185 return false;

2186 }

2187 }

2188

2189

2190

2191 if (I.getType()->isDoubleTy() && !ST->hasFP64())

2192 return true;

2193

2194

2195 if (I.getType()->isHalfTy() && !ST->hasFullFP16())

2196 return true;

2197

2198 return false;

2199}

2200

2205

2206

2209 return false;

2210 }

2211

2214 return false;

2215 }

2216

2218 if (isa(BackedgeTakenCount)) {

2219 LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n");

2220 return false;

2221 }

2222

2223 const SCEV *TripCountSCEV =

2226

2227

2229 LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n");

2230 return false;

2231 }

2232

2233

2234

2235

2236 auto IsHardwareLoopIntrinsic = [](Instruction &I) {

2237 if (auto *Call = dyn_cast(&I)) {

2238 switch (Call->getIntrinsicID()) {

2239 default:

2240 break;

2241 case Intrinsic::start_loop_iterations:

2242 case Intrinsic::test_start_loop_iterations:

2243 case Intrinsic::loop_decrement:

2244 case Intrinsic::loop_decrement_reg:

2245 return true;

2246 }

2247 }

2248 return false;

2249 };

2250

2251

2252

2253

2254 bool IsTailPredLoop = false;

2255 auto ScanLoop = [&](Loop *L) {

2256 for (auto *BB : L->getBlocks()) {

2257 for (auto &I : *BB) {

2259 isa(I)) {

2260 LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");

2261 return false;

2262 }

2263 if (auto *II = dyn_cast(&I))

2264 IsTailPredLoop |=

2265 II->getIntrinsicID() == Intrinsic::get_active_lane_mask ||

2266 II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 ||

2267 II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 ||

2268 II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 ||

2269 II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;

2270 }

2271 }

2272 return true;

2273 };

2274

2275

2276 for (auto *Inner : *L)

2277 if (!ScanLoop(Inner))

2278 return false;

2279

2280 if (!ScanLoop(L))

2281 return false;

2282

2283

2284

2285

2286

2287 LLVMContext &C = L->getHeader()->getContext();

2293 return true;

2294}

2295

2297

2298

2299 if (isa(&I) && ++ICmpCount > 1)

2300 return false;

2301

2302

2303

2304

2305

2306

2307

2308 if (auto *II = dyn_cast(&I))

2309 if ((II->getIntrinsicID() == Intrinsic::smin ||

2310 II->getIntrinsicID() == Intrinsic::smax ||

2311 II->getIntrinsicID() == Intrinsic::umin ||

2312 II->getIntrinsicID() == Intrinsic::umax) &&

2313 ++ICmpCount > 1)

2314 return false;

2315

2316 if (isa(&I))

2317 return false;

2318

2319

2320

2321 if (isa(&I) || isa(&I))

2322 return false;

2323

2324

2325 if (isa(&I) || isa(&I) )

2326 if (.getOperand(0)->hasOneUse() || !isa(I.getOperand(0)))

2327 return false;

2328

2329

2330 if (isa(&I) )

2331 if (.hasOneUse() || !isa(*I.user_begin()))

2332 return false;

2333

2334 return true;

2335}

2336

2337

2338

2339

2340

2341

2342

2343

2344

2345

2346

2350 LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");

2351

2352

2353

2354

2355

2356

2357

2358

2361 bool ReductionsDisabled =

2364

2365 for (auto *I : LiveOuts) {

2366 if (->getType()->isIntegerTy() && ->getType()->isFloatTy() &&

2367 ->getType()->isHalfTy()) {

2368 LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "

2369 "live-out value\n");

2370 return false;

2371 }

2372 if (ReductionsDisabled) {

2374 return false;

2375 }

2376 }

2377

2378

2381 int ICmpCount = 0;

2382

2383 for (BasicBlock *BB : L->blocks()) {

2384 for (Instruction &I : BB->instructionsWithoutDebug()) {

2385 if (isa(&I))

2386 continue;

2388 LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump());

2389 return false;

2390 }

2391

2392 Type *T = I.getType();

2393 if (T->getScalarSizeInBits() > 32) {

2395 return false;

2396 }

2397 if (isa(I) || isa(I)) {

2400 int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L).value_or(0);

2401 if (NextStride == 1) {

2402

2403

2404

2405 continue;

2406 } else if (NextStride == -1 ||

2410 << "Consecutive strides of 2 found, vld2/vstr2 can't "

2411 "be tail-predicated\n.");

2412 return false;

2413

2415

2416

2417

2418

2420 if (auto AR = dyn_cast(PtrScev)) {

2421 const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());

2423 continue;

2424 }

2425 }

2427 "tail-predicate\n.");

2428 return false;

2429 }

2430 }

2431 }

2432

2433 LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");

2434 return true;

2435}

2436

2439 LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");

2440 return false;

2441 }

2442

2443

2444

2445

2446 if (!ST->hasMVEIntegerOps())

2447 return false;

2448

2451

2452

2453 if (L->getNumBlocks() > 1) {

2454 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "

2455 "loop.\n");

2456 return false;

2457 }

2458

2459 assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");

2460

2464 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "

2465 "analyzable.\n");

2466 return false;

2467 }

2468

2471

2472

2473

2475 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "

2476 "profitable.\n");

2477 return false;

2478 }

2479

2482 LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "

2483 "a candidate.\n");

2484 return false;

2485 }

2486

2488}

2489

2494

2495

2496

2497

2498

2500}

2504

2505

2506

2508 !ST->hasMVEIntegerOps() || $any\_of$ (*L->getHeader(), [](Instruction &I) {

2509 return isa(I) &&

2510 cast(I).getIntrinsicID() ==

2511 Intrinsic::get_active_lane_mask;

2512 });

2513

2514

2517

2518

2521 if (L->getHeader()->getParent()->hasOptSize())

2522 return;

2523

2525 L->getExitingBlocks(ExitingBlocks);

2527 << "Blocks: " << L->getNumBlocks() << "\n"

2528 << "Exit blocks: " << ExitingBlocks.size() << "\n");

2529

2530

2531

2532 if (ExitingBlocks.size() > 2)

2533 return;

2534

2535

2536

2537 if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)

2538 return;

2539

2540

2542 return;

2543

2544

2545

2547 for (auto *BB : L->getBlocks()) {

2548 for (auto &I : *BB) {

2549

2550

2551 if (I.getType()->isVectorTy())

2552 return;

2553

2554 if (isa(I) || isa(I)) {

2557 continue;

2558 }

2559 return;

2560 }

2561

2565 }

2566 }

2567

2568

2569

2570

2571

2572

2573

2576 unsigned ExitingValues = 0;

2578 L->getExitBlocks(ExitBlocks);

2579 for (auto *Exit : ExitBlocks) {

2580

2581

2582 unsigned LiveOuts = count_if(Exit->phis(), [](auto &PH) {

2583 return PH.getNumOperands() != 1 ||

2584 !isa(PH.getOperand(0));

2585 });

2586 ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;

2587 }

2588 if (ExitingValues)

2591 return;

2592 }

2593

2594

2595

2596

2597

2598

2599

2600

2601

2602

2604 if (ST->hasLOB()) {

2607 auto *Outer = L->getOutermostLoop();

2608 if ((L != Outer && Outer != L->getParentLoop()) ||

2609 (L != Outer && BETC && !SE.isLoopInvariant(BETC, Outer))) {

2611 }

2612 }

2613 }

2614

2617

2624

2625

2626

2627 if (Cost < 12)

2628 UP.Force = true;

2629}

2630

2634}

2635

2638 if (!ST->hasMVEIntegerOps())

2639 return false;

2640

2642 switch (Opcode) {

2643 case Instruction::Add:

2644 return ScalarBits <= 64;

2645 default:

2646 return false;

2647 }

2648}

2649

2652 if (!ST->hasMVEIntegerOps())

2653 return false;

2654 return true;

2655}

2656

2659 bool HasBaseReg, int64_t Scale,

2660 unsigned AddrSpace) const {

2665 AM.Scale = Scale;

2668 if (ST->hasFPAO())

2669 return AM.Scale < 0 ? 1 : 0;

2670 return 0;

2671 }

2672 return -1;

2673}

2674

2676 if (Thumb) {

2677

2678

2679

2680 return ST->isThumb2() || ST->hasV8MBaselineOps();

2681 } else {

2682

2683

2685 }

2686}

2687

2688

2689

2691 using namespace PatternMatch;

2692

2693 auto areExtDoubled = [](Instruction *Ext) {

2694 return Ext->getType()->getScalarSizeInBits() ==

2695 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();

2696 };

2697

2700 !areExtDoubled(cast(Ext1)) ||

2701 !areExtDoubled(cast(Ext2)))

2702 return false;

2703

2704 return true;

2705}

2706

2707

2708

2709

2712 using namespace PatternMatch;

2713

2714 if (->getType()->isVectorTy())

2715 return false;

2716

2717 if (ST->hasNEON()) {

2718 switch (I->getOpcode()) {

2719 case Instruction::Sub:

2720 case Instruction::Add: {

2722 return false;

2723 Ops.push_back(&I->getOperandUse(0));

2724 Ops.push_back(&I->getOperandUse(1));

2725 return true;

2726 }

2727 default:

2728 return false;

2729 }

2730 }

2731

2732 if (!ST->hasMVEIntegerOps())

2733 return false;

2734

2736 if (->hasOneUse())

2737 return false;

2738 auto *Sub = cast(*I->users().begin());

2739 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;

2740 };

2744 return true;

2745 return false;

2746 };

2747

2748 auto IsSinker = [&](Instruction *I, int Operand) {

2749 switch (I->getOpcode()) {

2750 case Instruction::Add:

2751 case Instruction::Mul:

2752 case Instruction::FAdd:

2753 case Instruction::ICmp:

2754 case Instruction::FCmp:

2755 return true;

2756 case Instruction::FMul:

2757 return !IsFMSMul(I);

2758 case Instruction::Sub:

2759 case Instruction::FSub:

2760 case Instruction::Shl:

2761 case Instruction::LShr:

2762 case Instruction::AShr:

2763 return Operand == 1;

2764 case Instruction::Call:

2765 if (auto *II = dyn_cast(I)) {

2766 switch (II->getIntrinsicID()) {

2767 case Intrinsic::fma:

2768 return !IsFMS(I);

2769 case Intrinsic::sadd_sat:

2770 case Intrinsic::uadd_sat:

2771 case Intrinsic::arm_mve_add_predicated:

2772 case Intrinsic::arm_mve_mul_predicated:

2773 case Intrinsic::arm_mve_qadd_predicated:

2774 case Intrinsic::arm_mve_vhadd:

2775 case Intrinsic::arm_mve_hadd_predicated:

2776 case Intrinsic::arm_mve_vqdmull:

2777 case Intrinsic::arm_mve_vqdmull_predicated:

2778 case Intrinsic::arm_mve_vqdmulh:

2779 case Intrinsic::arm_mve_qdmulh_predicated:

2780 case Intrinsic::arm_mve_vqrdmulh:

2781 case Intrinsic::arm_mve_qrdmulh_predicated:

2782 case Intrinsic::arm_mve_fma_predicated:

2783 return true;

2784 case Intrinsic::ssub_sat:

2785 case Intrinsic::usub_sat:

2786 case Intrinsic::arm_mve_sub_predicated:

2787 case Intrinsic::arm_mve_qsub_predicated:

2788 case Intrinsic::arm_mve_hsub_predicated:

2789 case Intrinsic::arm_mve_vhsub:

2790 return Operand == 1;

2791 default:

2792 return false;

2793 }

2794 }

2795 return false;

2796 default:

2797 return false;

2798 }

2799 };

2800

2801 for (auto OpIdx : enumerate(I->operands())) {

2802 Instruction *Op = dyn_cast(OpIdx.value().get());

2803

2804 if ( || any_of(Ops, [&](Use *U) { return U->get() == Op; }))

2805 continue;

2806

2808 if (Shuffle->getOpcode() == Instruction::BitCast)

2809 Shuffle = dyn_cast(Shuffle->getOperand(0));

2810

2814 continue;

2815 if (!IsSinker(I, OpIdx.index()))

2816 continue;

2817

2818

2819

2820 for (Use &U : Op->uses()) {

2822 if (!IsSinker(Insn, U.getOperandNo()))

2823 return false;

2824 }

2825

2827 if (Shuffle != Op)

2830 }

2831 return true;

2832}

2833

2837 LLVM_DEBUG(dbgs() << "Padding global arrays disabled\n");

2838 return false;

2839 }

2840

2841

2844 return 0;

2845

2846

2847 if (Size % 4 == 0)

2848 return 0;

2849

2850 unsigned NumBytesToPad = 4 - (Size % 4);

2851 unsigned NewSize = Size + NumBytesToPad;

2852

2853

2854

2856

2857 if (NewSize > MaxMemIntrinsicSize)

2858 return 0;

2859

2860 return NumBytesToPad;

2861}

SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn

static bool areExtractExts(Value *Ext1, Value *Ext2)

Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.

This file implements a class to represent arbitrary precision integral constant values and operations...

cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static Value * isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm)

static cl::opt< bool > AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), cl::desc("Enable the generation of WLS loops"))

static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)

Convert a vector load intrinsic into a simple llvm load instruction.

static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm)

static cl::opt< bool > UseWidenGlobalArrays("widen-global-strings", cl::Hidden, cl::init(true), cl::desc("Enable the widening of global strings to alignment boundaries"))

cl::opt< bool > EnableMaskedGatherScatters

static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount)

cl::opt< unsigned > MVEMaxSupportedInterleaveFactor

cl::opt< TailPredication::Mode > EnableTailPredication

static cl::opt< bool > DisableLowOverheadLoops("disable-arm-loloops", cl::Hidden, cl::init(false), cl::desc("Disable the generation of low-overhead loops"))

static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, const DataLayout &DL, const LoopAccessInfo *LAI)

static cl::opt< bool > EnableMaskedLoadStores("enable-arm-maskedldst", cl::Hidden, cl::init(true), cl::desc("Enable the generation of masked loads and stores"))

This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Cost tables and simple lookup functions.

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file provides the interface for the instcombine pass implementation.

static cl::opt< unsigned > UnrollCount("unroll-count", cl::Hidden, cl::desc("Use this unroll count for all loops including those with " "unroll_count pragma values, for testing purposes"))

This file defines the LoopVectorizationLegality class.

mir Rename Register Operands

static const Function * getCalledFunction(const Value *V)

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines the SmallVector class.

static SymbolRef::Type getType(const Symbol *Sym)

Class for arbitrary precision integers.

unsigned getBitWidth() const

Return the number of bits in the APInt.

static APInt getSplat(unsigned NewLen, const APInt &V)

Return a value containing V broadcasted over NewLen bits.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

static APInt getOneBitSet(unsigned numBits, unsigned BitNo)

Return an APInt with exactly one bit set in the result.

bool isThumb1Only() const

bool hasFPARMv8Base() const

unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const

bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const

InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)

bool maybeLoweredToCall(Instruction &I)

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const

bool isLegalMaskedStore(Type *DataTy, Align Alignment)

bool isLegalMaskedLoad(Type *DataTy, Align Alignment)

InstructionCost getMemcpyCost(const Instruction *I)

unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const

InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)

bool isLoweredToCall(const Function *F)

InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind)

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)

int getNumMemOps(const IntrinsicInst *I) const

Given a memcpy/memset/memmove instruction, return the number of memory operations performed,...

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const

getScalingFactorCost - Return the cost of the scaling used in addressing mode represented by AM.

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

bool hasArmWideBranch(bool Thumb) const

uint64_t getMaxMemIntrinsicInlineSizeThreshold() const

bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)

InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool isLegalMaskedGather(Type *Ty, Align Alignment)

bool areInlineCompatible(const Function *Caller, const Function *Callee) const

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const

InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)

bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)

bool isProfitableLSRChainElement(Instruction *I)

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const

Returns the number of interleaved accesses that will be generated when lowering accesses of the given...

bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const

Returns true if VecTy is a legal interleaved access type.

bool useSoftFloat() const override

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Class to represent array types.

A cache of @llvm.assume calls within a function.

static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)

Return a uniquified Attribute object that has the specific alignment set.

LLVM Basic Block Representation.

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

Get intrinsic cost based on arguments.

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

Try to calculate op costs for min/max reduction operations.

InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)

Compute a cost of the given call instruction.

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

Estimate the cost of type-legalization and the legalized type.

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})

Estimate the overhead of scalarizing an instruction.

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)

static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)

Construct a binary instruction, given the opcode and the two operands.

static Type * makeCmpResultType(Type *opnd_type)

Create a result type for fcmp/icmp.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ ICMP_SLE

signed less or equal

@ ICMP_SGT

signed greater than

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

This is the shared class of boolean and integer constants.

const APInt & getValue() const

Return the constant as an APInt value reference.

This class represents a range of values.

ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const

Return the range that results from the intersection of this range with another range.

This is an important base class in LLVM.

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

TypeSize getTypeAllocSize(Type *Ty) const

Returns the offset in bytes between successive objects of the specified type, including alignment pad...

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Convenience struct for specifying and reasoning about fast-math flags.

Container class for subtarget features.

Class to represent fixed width SIMD vectors.

unsigned getNumElements() const

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

ConstantInt * getTrue()

Get the constant value for i1 true.

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

The core instruction combiner logic.

const DataLayout & getDataLayout() const

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

DominatorTree & getDominatorTree() const

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

AssumptionCache & getAssumptionCache() const

Instruction * user_back()

Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

const SmallVectorImpl< Type * > & getArgTypes() const

Type * getReturnType() const

Intrinsic::ID getID() const

A wrapper class for inspecting calls to intrinsic functions.

This is an important class for using LLVM in a threaded context.

Drive the analysis of memory accesses in the loop.

const PredicatedScalarEvolution & getPSE() const

Used to add runtime SCEV checks.

LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...

LoopInfo * getLoopInfo() const

DominatorTree * getDominatorTree() const

AssumptionCache * getAssumptionCache() const

const LoopAccessInfo * getLAI() const

ScalarEvolution * getScalarEvolution() const

Represents a single loop in the control flow graph.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

This class represents an analyzed expression in the program.

Type * getType() const

Return the LLVM type of this SCEV expression.

The main scalar evolution driver.

const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

const SCEV * getOne(Type *Ty)

Return a SCEV for the constant 1 of a specific type.

bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

bool hasLoopInvariantBackedgeTakenCount(const Loop *L)

Return true if the specified loop has an analyzable loop-invariant backedge-taken count.

APInt getUnsignedRangeMax(const SCEV *S)

Determine the max of the unsigned range for a particular SCEV.

const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical add expression, or something simpler if possible.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

static StackOffset getScalable(int64_t Scalable)

static StackOffset getFixed(int64_t Fixed)

Provides information about what library functions are available for the current target.

int InstructionOpcodeToISD(unsigned Opcode) const

Get the ISD node that corresponds to the Instruction class opcode.

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

const TargetMachine & getTargetMachine() const

unsigned getMaxStoresPerMemcpy(bool OptSize) const

Get maximum # of store operations permitted for llvm.memcpy.

unsigned getMaxStoresPerMemmove(bool OptSize) const

Get maximum # of store operations permitted for llvm.memmove.

unsigned getMaxStoresPerMemset(bool OptSize) const

Get maximum # of store operations permitted for llvm.memset.

LegalizeAction getOperationAction(unsigned Op, EVT VT) const

Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...

bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

Primary interface to the complete machine description for the target machine.

bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const

bool isLoweredToCall(const Function *F) const

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)

A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...

@ TCC_Expensive

The cost of a 'div' instruction on x86.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_Select

Selects elements from the corresponding lane of either source operand.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_Reverse

Reverse the order of the vector.

@ SK_ExtractSubvector

ExtractSubvector Index indicates start offset.

CastContextHint

Represents a hint about the context in which a cast is used.

@ Masked

The cast is used with a masked load/store.

@ None

The cast is not used with a load/store of any kind.

@ Normal

The cast is used with a normal load/store.

This class represents a truncation of integer types.

The instances of the Type class are immutable: once they are created, they are never changed.

unsigned getIntegerBitWidth() const

bool isVectorTy() const

True if this is an instance of VectorType.

bool isArrayTy() const

True if this is an instance of ArrayType.

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

Type * getArrayElementType() const

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

static IntegerType * getInt32Ty(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

bool isFPOrFPVectorTy() const

Return true if this is a FP type or a vector of FP.

TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

bool hasNUses(unsigned N) const

Return true if this Value has exactly N uses.

Base class of all SIMD vector types.

Type * getElementType() const

constexpr ScalarTy getFixedValue() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

int getSOImmVal(unsigned Arg)

getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...

bool isThumbImmShiftedVal(unsigned V)

isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...

int getT2SOImmVal(unsigned Arg)

getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...

@ C

The default llvm calling convention, compatible with C.

@ ADD

Simple integer binary arithmetic operators.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ FADD

Simple binary floating point operators.

@ SDIVREM

SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.

@ SIGN_EXTEND

Conversion operators.

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

@ SHL

Shift and rotation operations.

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ SMIN

[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.

@ FP_EXTEND

X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

FNeg_match< OpTy > m_FNeg(const OpTy &X)

Match 'fneg X' as 'fsub -0.0, X'.

auto m_Undef()

Match an arbitrary undef constant.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

@ ForceEnabledNoReductions

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)

Find in cost table.

bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)

Returns true if Name is applied to TheLoop and enabled.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

@ Runtime

Detect stack use after return if not disabled runtime with (ASAN_OPTIONS=detect_stack_use_after_retur...

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)

Try to infer an alignment for the specified pointer.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)

Returns the instructions that use values defined in the loop.

SelectPatternFlavor

Specific patterns of select instructions we can match.

@ SPF_ABS

Floating point maxnum.

@ SPF_FMAXNUM

Floating point minnum.

@ SPF_UMIN

Signed minimum.

@ SPF_UMAX

Signed maximum.

@ SPF_SMAX

Unsigned minimum.

@ SPF_FMINNUM

Unsigned maximum.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)

Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

DWARFExpression::Operation Op

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)

isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...

@ DataWithoutLaneMask

Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...

@ Data

Use predicate only to mask operations on data in the loop.

const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)

Find in type conversion cost table.

This struct is a compact representation of a valid (non-zero power of two) alignment.

uint64_t value() const

This is a hole in the type system and should not be abused.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

uint64_t getScalarSizeInBits() const

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

bool isFixedLengthVector() const

bool isVector() const

Return true if this is a vector value type.

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

bool isInteger() const

Return true if this is an integer or a vector integer type.

Attributes of a target dependent hardware loop.

bool canAnalyze(LoopInfo &LI)

bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)

static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)

SelectPatternFlavor Flavor

LoopVectorizationLegality * LVL

This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...

Flags describing the kind of vector reduction.

Parameters that control the generic loop unrolling transformation.

bool UpperBound

Allow using trip count upper bound to unroll loops.

bool Force

Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

unsigned DefaultUnrollRuntimeCount

Default unroll count for loops with run-time trip count.

unsigned UnrollAndJamInnerLoopThreshold

Threshold for unroll and jam, for inner loop size.

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...

unsigned OptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).

Type Conversion Cost Table.