LLVM: lib/Transforms/Vectorize/VPlanTransforms.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

44

45using namespace llvm;

48

52 GetIntOrFpInductionDescriptor,

54

58

59 if (!VPBB->getParent())

60 break;

62 auto EndIter = Term ? Term->getIterator() : VPBB->end();

63

66

67 VPValue *VPV = Ingredient.getVPSingleValue();

69 continue;

70

72

75 auto *Phi = cast(PhiR->getUnderlyingValue());

76 const auto *II = GetIntOrFpInductionDescriptor(Phi);

77 if (II) {

78 NewRecipe = new VPWidenPHIRecipe(Phi, nullptr, PhiR->getDebugLoc());

79 for (VPValue *Op : PhiR->operands())

81 } else {

85

86

87

90 Phi, Start, Step, &Plan.getVF(), *II, Flags,

91 Ingredient.getDebugLoc());

92 }

93 } else {

96

99 *Load, Ingredient.getOperand(0), nullptr ,

100 false , false , *VPI,

101 Ingredient.getDebugLoc());

104 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),

105 nullptr , false , false , *VPI,

106 Ingredient.getDebugLoc());

109 Ingredient.getDebugLoc());

113 return false;

117 *VPI, CI->getDebugLoc());

120 *VPI, Ingredient.getDebugLoc());

123 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,

125 } else {

127 *VPI, Ingredient.getDebugLoc());

128 }

129 }

130

134 else

136 "Only recpies with zero or one defined values expected");

137 Ingredient.eraseFromParent();

138 }

139 }

140 return true;

141}

142

143

148 const Loop &L;

150

151

152

154 if (A->getOpcode() != Instruction::Store ||

155 B->getOpcode() != Instruction::Store)

156 return false;

157

158 VPValue *AddrA = A->getOperand(1);

160 VPValue *AddrB = B->getOperand(1);

163 return false;

164

165 const APInt *Distance;

166 if (match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance)))

167 return false;

168

170 Type *TyA = TypeInfo.inferScalarType(A->getOperand(0));

171 uint64_t SizeA = DL.getTypeStoreSize(TyA);

172 Type *TyB = TypeInfo.inferScalarType(B->getOperand(0));

173 uint64_t SizeB = DL.getTypeStoreSize(TyB);

174

175

176

177

178 uint64_t MaxStoreSize = std::max(SizeA, SizeB);

179

180 auto VFs = B->getParent()->getPlan()->vectorFactors();

182 return Distance->abs().uge(

184 }

185

186public:

190 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), SE(SE), L(L),

191 TypeInfo(TypeInfo) {}

192

193

194

195

198 return ExcludeRecipes.contains(&R) ||

199 (Store && isNoAliasViaDistance(Store, &GroupLeader));

200 }

201};

202

203

204

205

206

207

208

209static bool

212 std::optional SinkInfo = {}) {

213 bool CheckReads = SinkInfo.has_value();

215 return false;

216

218

220 Block = Block->getSingleSuccessor()) {

222 "Expected at most one successor in block chain");

225 if (SinkInfo && SinkInfo->shouldSkip(R))

226 continue;

227

228

229 if (R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))

230 continue;

231

233 if (Loc)

234

235

236 return false;

237

238

239

240 if (CheckReads && R.mayReadFromMemory() &&

243 continue;

244

245

247 Loc->AATags.NoAlias))

248 return false;

249 }

250

251 if (Block == LastBB)

252 break;

253 }

254 return true;

255}

256

257

258

260

261

263 return false;

264

265

266

267 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())

268 return true;

269

270

272 return RepR && RepR->getOpcode() == Instruction::Alloca;

273}

274

279

281 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](

283 auto *Candidate =

285 if (!Candidate)

286 return;

287

288

289

291 return;

292

294 return;

295

297 if (!ScalarVFOnly && RepR->isSingleScalar())

298 return;

299

300 WorkList.insert({SinkTo, Candidate});

301 };

302

303

304

307 if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)

308 continue;

311 continue;

312 for (auto &Recipe : *VPBB)

313 for (VPValue *Op : Recipe.operands())

314 InsertIfValidSinkCandidate(VPBB, Op);

315 }

316

317

318 for (unsigned I = 0; I != WorkList.size(); ++I) {

321 std::tie(SinkTo, SinkCandidate) = WorkList[I];

322

323

324

325

326 auto UsersOutsideSinkTo =

328 return cast(U)->getParent() != SinkTo;

329 });

330 if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {

331 return !U->usesFirstLaneOnly(SinkCandidate);

332 }))

333 continue;

334 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();

335

336 if (NeedsDuplicating) {

337 if (ScalarVFOnly)

338 continue;

340 if (auto *SinkCandidateRepR =

342

343

346 nullptr , *SinkCandidateRepR,

347 *SinkCandidateRepR);

348

349 } else {

350 Clone = SinkCandidate->clone();

351 }

352

356 });

357 }

360 InsertIfValidSinkCandidate(SinkTo, Op);

362 }

364}

365

366

367

370 if (!EntryBB || EntryBB->size() != 1 ||

372 return nullptr;

373

375}

376

377

380 if (EntryBB->getNumSuccessors() != 2)

381 return nullptr;

382

385 if (!Succ0 || !Succ1)

386 return nullptr;

387

388 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)

389 return nullptr;

390 if (Succ0->getSingleSuccessor() == Succ1)

391 return Succ0;

392 if (Succ1->getSingleSuccessor() == Succ0)

393 return Succ1;

394 return nullptr;

395}

396

397

398

399

402

403

404

405

409 if (!Region1->isReplicator())

410 continue;

411 auto *MiddleBasicBlock =

413 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())

414 continue;

415

416 auto *Region2 =

418 if (!Region2 || !Region2->isReplicator())

419 continue;

420

423 if (!Mask1 || Mask1 != Mask2)

424 continue;

425

426 assert(Mask1 && Mask2 && "both region must have conditions");

428 }

429

430

432 if (TransformedRegions.contains(Region1))

433 continue;

434 auto *MiddleBasicBlock = cast(Region1->getSingleSuccessor());

435 auto *Region2 = cast(MiddleBasicBlock->getSingleSuccessor());

436

439 if (!Then1 || !Then2)

440 continue;

441

442

443

444

445

446

449

452

453

454

455

459 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();

462 });

463

464

465 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {

466 Phi1ToMove.eraseFromParent();

467 continue;

468 }

469 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());

470 }

471

472

475 R.eraseFromParent();

476

477

481 }

483 TransformedRegions.insert(Region1);

484 }

485

486 return !TransformedRegions.empty();

487}

488

492

493 std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();

494 assert(Instr->getParent() && "Predicated instruction not in any basic block");

495 auto *BlockInMask = PredRecipe->getMask();

499 auto *Entry =

501

502

503

506 PredRecipe->isSingleScalar(), nullptr , *PredRecipe, *PredRecipe,

508 auto *Pred =

510

514 RecipeWithoutMask->getDebugLoc());

516 PHIRecipe->setOperand(0, RecipeWithoutMask);

517 }

519 auto *Exiting =

523

524

525

528

530}

531

538 if (RepR->isPredicated())

540 }

541 }

542

543 unsigned BBNum = 0;

547

551

555

557 if (ParentRegion && ParentRegion->getExiting() == CurrentBlock)

559 }

560}

561

562

563

568

569

570

571 if (!VPBB->getParent())

572 continue;

573 auto *PredVPBB =

575 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||

577 continue;

579 }

580

584 R.moveBefore(*PredVPBB, PredVPBB->end());

586 auto *ParentRegion = VPBB->getParent();

587 if (ParentRegion && ParentRegion->getExiting() == VPBB)

588 ParentRegion->setExiting(PredVPBB);

589 for (auto *Succ : to_vector(VPBB->successors())) {

592 }

593

594 }

595 return !WorkList.empty();

596}

597

599

601

602 bool ShouldSimplify = true;

603 while (ShouldSimplify) {

607 }

608}

609

610

611

612

613

614

615

619 if (IV || IV->getTruncInst())

620 continue;

621

622

623

624

625

626

627

628

629

634 for (auto *U : FindMyCast->users()) {

636 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {

637 FoundUserCast = UserCast;

638 break;

639 }

640 }

641 FindMyCast = FoundUserCast;

642 }

644 }

645}

646

647

648

655 if (WidenNewIV)

656 break;

657 }

658

659 if (!WidenNewIV)

660 return;

661

665

666 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())

667 continue;

668

669

670

671

672

675

676

677

678

679 WidenOriginalIV->dropPoisonGeneratingFlags();

682 return;

683 }

684 }

685}

686

687

689

690

692 bool IsConditionalAssume = RepR && RepR->isPredicated() &&

694 if (IsConditionalAssume)

695 return true;

696

697 if (R.mayHaveSideEffects())

698 return false;

699

700

701 return all_of(R.definedValues(),

702 [](VPValue *V) { return V->getNumUsers() == 0; });

703}

704

708

709

712 R.eraseFromParent();

713 continue;

714 }

715

716

718 if (!PhiR || PhiR->getNumOperands() != 2)

719 continue;

720 VPUser *PhiUser = PhiR->getSingleUser();

721 if (!PhiUser)

722 continue;

724 if (PhiUser != Incoming->getDefiningRecipe() ||

725 Incoming->getNumUsers() != 1)

726 continue;

727 PhiR->replaceAllUsesWith(PhiR->getOperand(0));

728 PhiR->eraseFromParent();

729 Incoming->getDefiningRecipe()->eraseFromParent();

730 }

731 }

732}

733

744 Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");

745

746

749 if (TruncI) {

752 "Not truncating.");

753 assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");

754 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy, DL);

755 ResultTy = TruncTy;

756 }

757

758

760 if (ResultTy != StepTy) {

762 "Not truncating.");

763 assert(StepTy->isIntegerTy() && "Truncation requires an integer type");

764 auto *VecPreheader =

767 Builder.setInsertPoint(VecPreheader);

768 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy, DL);

769 }

770 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,

772}

773

776 for (unsigned I = 0; I != Users.size(); ++I) {

779 continue;

781 Users.insert_range(V->users());

782 }

783 return Users.takeVector();

784}

785

786

787

788

797 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);

798

799 return Builder.createPtrAdd(PtrIV->getStartValue(), Steps,

801}

802

803

804

805

806

807

808

809

810

811

812

813

814

821 if (!PhiR)

822 continue;

823

824

825

826

827

832

834 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||

835 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))

836 continue;

837

838

840 continue;

841

843 Def->operands(), true,

844 nullptr, *Def);

845 Clone->insertAfter(Def);

846 Def->replaceAllUsesWith(Clone);

847 }

848

849

850

853 !PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))

854 continue;

855

857 PtrIV->replaceAllUsesWith(PtrAdd);

858 continue;

859 }

860

861

862

864 if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {

865 return U->usesScalars(WideIV);

866 }))

867 continue;

868

871 Plan, ID.getKind(), ID.getInductionOpcode(),

873 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),

874 WideIV->getDebugLoc(), Builder);

875

876

877 if (!HasOnlyVectorVFs) {

879 "plans containing a scalar VF cannot also include scalable VFs");

880 WideIV->replaceAllUsesWith(Steps);

881 } else {

883 WideIV->replaceUsesWithIf(Steps,

884 [WideIV, HasScalableVF](VPUser &U, unsigned) {

885 if (HasScalableVF)

886 return U.usesFirstLaneOnly(WideIV);

887 return U.usesScalars(WideIV);

888 });

889 }

890 }

891}

892

893

894

895

899 if (WideIV) {

900

901

903 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;

904 }

905

906

908 if (!Def || Def->getNumOperands() != 2)

909 return nullptr;

911 if (!WideIV)

913 if (!WideIV)

914 return nullptr;

915

916 auto IsWideIVInc = [&]() {

917 auto &ID = WideIV->getInductionDescriptor();

918

919

920 VPValue *IVStep = WideIV->getStepValue();

921 switch (ID.getInductionOpcode()) {

922 case Instruction::Add:

924 case Instruction::FAdd:

927 case Instruction::FSub:

930 case Instruction::Sub: {

931

932

935 return false;

941 }

942 default:

945 m_Specific(WideIV->getStepValue())));

946 }

947 llvm_unreachable("should have been covered by switch above");

948 };

949 return IsWideIVInc() ? WideIV : nullptr;

950}

951

952

953

962 return nullptr;

963

965 if (!WideIV)

966 return nullptr;

967

969 if (WideIntOrFp && WideIntOrFp->getTruncInst())

970 return nullptr;

971

972

977

979 VPValue *FirstActiveLane =

982 FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,

983 FirstActiveLaneType, DL);

985 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL);

986

987

988

989

992 EndValue = B.createNaryOp(Instruction::Add, {EndValue, One}, DL);

993 }

994

995 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {

997 VPValue *Start = WideIV->getStartValue();

998 VPValue *Step = WideIV->getStepValue();

999 EndValue = B.createDerivedIV(

1001 Start, EndValue, Step);

1002 }

1003

1004 return EndValue;

1005}

1006

1007

1008

1014 return nullptr;

1015

1017 if (!WideIV)

1018 return nullptr;

1019

1021 assert(EndValue && "end value must have been pre-computed");

1022

1023

1024

1025

1027 return EndValue;

1028

1029

1031 VPValue *Step = WideIV->getStepValue();

1034 return B.createNaryOp(Instruction::Sub, {EndValue, Step},

1039 return B.createPtrAdd(EndValue,

1040 B.createNaryOp(Instruction::Sub, {Zero, Step}),

1042 }

1044 const auto &ID = WideIV->getInductionDescriptor();

1045 return B.createNaryOp(

1046 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd

1047 ? Instruction::FSub

1048 : Instruction::FAdd,

1049 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});

1050 }

1051 llvm_unreachable("all possible induction types must be handled");

1052 return nullptr;

1053}

1054

1063

1064 for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {

1065 VPValue *Escape = nullptr;

1066 if (PredVPBB == MiddleVPBB)

1068 ExitIRI->getOperand(Idx),

1069 EndValues, SE);

1070 else

1072 ExitIRI->getOperand(Idx), SE);

1073 if (Escape)

1074 ExitIRI->setOperand(Idx, Escape);

1075 }

1076 }

1077 }

1078}

1079

1080

1081

1084

1088 if (!ExpR)

1089 continue;

1090

1091 const auto &[V, Inserted] = SCEV2VPV.try_emplace(ExpR->getSCEV(), ExpR);

1092 if (Inserted)

1093 continue;

1094 ExpR->replaceAllUsesWith(V->second);

1095 ExpR->eraseFromParent();

1096 }

1097}

1098

1103

1104 while (!WorkList.empty()) {

1106 if (!Seen.insert(Cur).second)

1107 continue;

1109 if (!R)

1110 continue;

1112 continue;

1114 R->eraseFromParent();

1115 }

1116}

1117

1118

1119

1120

1121static std::optional<std::pair<bool, unsigned>>

1124 std::optional<std::pair<bool, unsigned>>>(R)

1127 [](auto *I) { return std::make_pair(false, I->getOpcode()); })

1128 .Case([](auto *I) {

1129 return std::make_pair(true, I->getVectorIntrinsicID());

1130 })

1131 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {

1132

1133

1134

1135 return std::make_pair(false,

1137 })

1138 .Default([](auto *) { return std::nullopt; });

1139}

1140

1141

1142

1143

1149 if (!OpcodeOrIID)

1150 return nullptr;

1151

1154 if (Op->isLiveIn() || Op->getLiveInIRValue())

1155 return nullptr;

1156 Ops.push_back(Op->getLiveInIRValue());

1157 }

1158

1159 auto FoldToIRValue = [&]() -> Value * {

1161 if (OpcodeOrIID->first) {

1162 if (R.getNumOperands() != 2)

1163 return nullptr;

1164 unsigned ID = OpcodeOrIID->second;

1165 return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1],

1167 }

1168 unsigned Opcode = OpcodeOrIID->second;

1175 switch (Opcode) {

1177 return Folder.FoldSelect(Ops[0], Ops[1],

1180 return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],

1182 case Instruction::Select:

1183 return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);

1184 case Instruction::ICmp:

1185 case Instruction::FCmp:

1187 Ops[1]);

1188 case Instruction::GetElementPtr: {

1191 return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],

1193 }

1199

1200

1201 case Instruction::ExtractElement:

1202 assert(Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");

1203 return Ops[0];

1204 }

1205 return nullptr;

1206 };

1207

1208 if (Value *V = FoldToIRValue())

1209 return R.getParent()->getPlan()->getOrAddLiveIn(V);

1210 return nullptr;

1211}

1212

1213

1215 VPlan *Plan = Def->getParent()->getPlan();

1216

1217

1218

1222 return Def->replaceAllUsesWith(V);

1223

1224

1226 VPValue *Op = PredPHI->getOperand(0);

1227 if (Op->isLiveIn())

1228 PredPHI->replaceAllUsesWith(Op);

1229 }

1230

1236 if (TruncTy == ATy) {

1237 Def->replaceAllUsesWith(A);

1238 } else {

1239

1241 return;

1243

1245 ? Instruction::SExt

1246 : Instruction::ZExt;

1248 TruncTy);

1249 if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {

1250

1251 Ext->setUnderlyingValue(UnderlyingExt);

1252 }

1253 Def->replaceAllUsesWith(Ext);

1255 auto *Trunc = Builder.createWidenCast(Instruction::Trunc, A, TruncTy);

1256 Def->replaceAllUsesWith(Trunc);

1257 }

1258 }

1259#ifndef NDEBUG

1260

1261

1264 for (VPUser *U : A->users()) {

1266 for (VPValue *VPV : R->definedValues())

1268 }

1269#endif

1270 }

1271

1272

1273

1274

1275

1280 Def->replaceAllUsesWith(X);

1281 Def->eraseFromParent();

1282 return;

1283 }

1284

1285

1287 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) == X));

1288

1289

1291 return Def->replaceAllUsesWith(X);

1292

1293

1295 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) == X));

1296

1297

1299 return Def->replaceAllUsesWith(Def->getOperand(1));

1300

1301

1304

1305

1306 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||

1307 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))

1308 return Def->replaceAllUsesWith(

1309 Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));

1310

1311

1313 return Def->replaceAllUsesWith(Plan->getFalse());

1314

1316 return Def->replaceAllUsesWith(X);

1317

1318

1321 Def->setOperand(0, C);

1322 Def->setOperand(1, Y);

1323 Def->setOperand(2, X);

1324 return;

1325 }

1326

1327

1328

1329

1332 X->hasMoreThanOneUniqueUser())

1333 return Def->replaceAllUsesWith(

1334 Builder.createLogicalAnd(X, Builder.createLogicalAnd(Y, Z)));

1335

1337 return Def->replaceAllUsesWith(A);

1338

1340 return Def->replaceAllUsesWith(A);

1341

1343 return Def->replaceAllUsesWith(

1344 Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));

1345

1348 return Def->replaceAllUsesWith(A);

1349

1350

1354 if (all_of(Cmp->users(),

1362

1363 R->setOperand(1, Y);

1364 R->setOperand(2, X);

1365 } else {

1366

1368 R->replaceAllUsesWith(Cmp);

1369 }

1370 }

1371

1372

1373 if (!Cmp->getDebugLoc() && Def->getDebugLoc())

1374 Cmp->setDebugLoc(Def->getDebugLoc());

1375 }

1376 }

1377 }

1378

1379

1380

1384 for (VPValue *Op : Def->operands()) {

1386 if (Op->getNumUsers() > 1 ||

1390 } else if (!UnpairedCmp) {

1391 UnpairedCmp = Op->getDefiningRecipe();

1392 } else {

1395 UnpairedCmp = nullptr;

1396 }

1397 }

1398

1399 if (UnpairedCmp)

1401

1402 if (NewOps.size() < Def->getNumOperands()) {

1404 return Def->replaceAllUsesWith(NewAnyOf);

1405 }

1406 }

1407

1408

1409

1410

1416 return Def->replaceAllUsesWith(NewCmp);

1417 }

1418

1419

1424 return Def->replaceAllUsesWith(Def->getOperand(1));

1425

1430 X = Builder.createWidenCast(Instruction::Trunc, X, WideStepTy);

1431 Def->replaceAllUsesWith(X);

1432 return;

1433 }

1434

1435

1436

1441 Def->setOperand(1, Def->getOperand(0));

1442 Def->setOperand(0, Y);

1443 return;

1444 }

1445

1447 if (Phi->getOperand(0) == Phi->getOperand(1))

1448 Phi->replaceAllUsesWith(Phi->getOperand(0));

1449 return;

1450 }

1451

1452

1456 Def->replaceAllUsesWith(

1457 BuildVector->getOperand(BuildVector->getNumOperands() - 1));

1458 return;

1459 }

1461 return Def->replaceAllUsesWith(A);

1462 }

1463

1464

1467 Def->replaceAllUsesWith(

1468 BuildVector->getOperand(BuildVector->getNumOperands() - 2));

1469 return;

1470 }

1471

1475 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));

1476 return;

1477 }

1478

1480 Def->replaceAllUsesWith(

1482 return;

1483 }

1484

1485

1486

1490 "broadcast operand must be single-scalar");

1491 Def->setOperand(0, C);

1492 return;

1493 }

1494

1496 if (Phi->getNumOperands() == 1)

1497 Phi->replaceAllUsesWith(Phi->getOperand(0));

1498 return;

1499 }

1500

1501

1502

1504 return;

1505

1506

1510 if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&

1511 Phi->getSingleUser() == Def) {

1512 Phi->setOperand(0, Y);

1513 Def->replaceAllUsesWith(Phi);

1514 return;

1515 }

1516 }

1517

1518

1520 if (VecPtr->isFirstPart()) {

1521 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));

1522 return;

1523 }

1524 }

1525

1526

1527

1530 Steps->replaceAllUsesWith(Steps->getOperand(0));

1531 return;

1532 }

1533 }

1534

1538 Def->replaceUsesWithIf(StartV, [](const VPUser &U, unsigned Idx) {

1540 return PhiR && PhiR->isInLoop();

1541 });

1542 return;

1543 }

1544

1546 Def->replaceAllUsesWith(A);

1547 return;

1548 }

1549

1555 [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) {

1556 return Def->replaceAllUsesWith(A);

1557 }

1558

1560 return Def->replaceAllUsesWith(A);

1561}

1562

1573

1576 return;

1577

1578

1579

1580

1581

1587 continue;

1589 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))

1590 continue;

1591

1593 if (RepR && isa(RepR->getUnderlyingInstr()) &&

1596 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),

1597 true , nullptr , *RepR ,

1598 *RepR , RepR->getDebugLoc());

1599 Clone->insertBefore(RepOrWidenR);

1601 VPValue *ExtractOp = Clone->getOperand(0);

1603 ExtractOp =

1605 ExtractOp =

1607 Clone->setOperand(0, ExtractOp);

1608 RepR->eraseFromParent();

1609 continue;

1610 }

1611

1612

1614 continue;

1615

1616

1617

1618

1619

1620

1621 if (all\_of(RepOrWidenR->users(),

1622 [RepOrWidenR](const VPUser *U) {

1623 if (auto *VPI = dyn_cast(U)) {

1624 unsigned Opcode = VPI->getOpcode();

1625 if (Opcode == VPInstruction::ExtractLastLane ||

1626 Opcode == VPInstruction::ExtractLastPart ||

1627 Opcode == VPInstruction::ExtractPenultimateElement)

1628 return true;

1629 }

1630

1631 return U->usesScalars(RepOrWidenR);

1632 }) &&

1633 none_of(RepOrWidenR->operands(), [RepOrWidenR](VPValue *Op) {

1634 if (Op->getSingleUser() != RepOrWidenR)

1635 return false;

1636

1637

1638 bool LiveInNeedsBroadcast =

1639 Op->isLiveIn() && !isa(Op->getLiveInIRValue());

1640 auto *OpR = dyn_cast(Op);

1641 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());

1642 }))

1643 continue;

1644

1646 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),

1647 true , nullptr, *RepOrWidenR);

1648 Clone->insertBefore(RepOrWidenR);

1649 RepOrWidenR->replaceAllUsesWith(Clone);

1651 RepOrWidenR->eraseFromParent();

1652 }

1653 }

1654}

1655

1656

1657

1660 return;

1661 VPValue *CommonEdgeMask;

1664 return;

1668 return;

1671}

1672

1673

1674

1680 if (!Blend)

1681 continue;

1682

1684

1685

1687 if (Blend->isNormalized() || match(Blend->getMask(0), m_False()))

1688 UniqueValues.insert(Blend->getIncomingValue(0));

1689 for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)

1691 UniqueValues.insert(Blend->getIncomingValue(I));

1692

1693 if (UniqueValues.size() == 1) {

1694 Blend->replaceAllUsesWith(*UniqueValues.begin());

1695 Blend->eraseFromParent();

1696 continue;

1697 }

1698

1699 if (Blend->isNormalized())

1700 continue;

1701

1702

1703

1704

1705 unsigned StartIndex = 0;

1706 for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {

1707

1708

1709

1710 VPValue *Mask = Blend->getMask(I);

1711 if (Mask->getNumUsers() == 1 && match(Mask, m_False())) {

1712 StartIndex = I;

1713 break;

1714 }

1715 }

1716

1718 OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));

1719

1720 for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {

1721 if (I == StartIndex)

1722 continue;

1723 OperandsWithMask.push_back(Blend->getIncomingValue(I));

1724 OperandsWithMask.push_back(Blend->getMask(I));

1725 }

1726

1727 auto *NewBlend =

1729 OperandsWithMask, Blend->getDebugLoc());

1730 NewBlend->insertBefore(&R);

1731

1732 VPValue *DeadMask = Blend->getMask(StartIndex);

1734 Blend->eraseFromParent();

1736

1737

1739 if (NewBlend->getNumOperands() == 3 &&

1741 VPValue *Inc0 = NewBlend->getOperand(0);

1742 VPValue *Inc1 = NewBlend->getOperand(1);

1743 VPValue *OldMask = NewBlend->getOperand(2);

1744 NewBlend->setOperand(0, Inc1);

1745 NewBlend->setOperand(1, Inc0);

1746 NewBlend->setOperand(2, NewMask);

1749 }

1750 }

1751 }

1752}

1753

1754

1755

1758 unsigned BestUF) {

1759

1761 return false;

1762

1763 const APInt *TC;

1765 return false;

1766

1767

1768

1770 APInt AlignedTC =

1773 APInt MaxVal = AlignedTC - 1;

1775 };

1776 unsigned NewBitWidth =

1778

1781

1782 bool MadeChange = false;

1783

1787

1788

1789

1790

1791 if (!WideIV || !WideIV->isCanonical() ||

1792 WideIV->hasMoreThanOneUniqueUser() ||

1793 NewIVTy == WideIV->getScalarType())

1794 continue;

1795

1796

1797

1798 VPUser *SingleUser = WideIV->getSingleUser();

1799 if (!SingleUser ||

1803 continue;

1804

1805

1807 WideIV->setStartValue(NewStart);

1809 WideIV->setStepValue(NewStep);

1810

1815 Cmp->setOperand(1, NewBTC);

1816

1817 MadeChange = true;

1818 }

1819

1820 return MadeChange;

1821}

1822

1823

1824

1829 return any_of(Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,

1831 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);

1832 });

1833

1836 m_Specific(CanIV->getBackedgeValue()),

1838 return false;

1839

1840

1841

1842

1843

1844 const SCEV *VectorTripCount =

1849 "Trip count SCEV must be computable");

1853}

1854

1855

1856

1857

1858

1859

1860

1861

1863 unsigned UF) {

1865 return false;

1866

1869 auto *Term = &ExitingVPBB->back();

1870

1874 return false;

1875

1878

1882 for (unsigned Part = 0; Part < UF; ++Part) {

1887 auto *Ext =

1890 Extracts[Part] = Ext;

1892 }

1893 };

1894

1895

1899 if (!Phi)

1900 continue;

1901 VPValue *Index = nullptr;

1902 match(Phi->getBackedgeValue(),

1904 assert(Index && "Expected index from ActiveLaneMask instruction");

1905

1907 if (match(Index,

1910 Phis[Part] = Phi;

1911 else

1912

1913 Phis[0] = Phi;

1914 }

1915

1917 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");

1918

1921

1924 "Expected incoming values of Phi to be ActiveLaneMasks");

1925

1926

1927

1929 EntryALM->setOperand(2, ALMMultiplier);

1930 LoopALM->setOperand(2, ALMMultiplier);

1931

1932

1934 ExtractFromALM(EntryALM, EntryExtracts);

1935

1936

1937

1939 ExtractFromALM(LoopALM, LoopExtracts);

1941 Not->setOperand(0, LoopExtracts[0]);

1942

1943

1944 for (unsigned Part = 0; Part < UF; ++Part) {

1945 Phis[Part]->setStartValue(EntryExtracts[Part]);

1946 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);

1947 }

1948

1949 return true;

1950}

1951

1952

1953

1955 unsigned BestUF,

1959 auto *Term = &ExitingVPBB->back();

1965

1966

1967 const SCEV *VectorTripCount =

1972 "Trip count SCEV must be computable");

1976 return false;

1978

1979

1981 return false;

1982 } else {

1983 return false;

1984 }

1985

1986

1987

1988

1989

1990

1991

1994 if (auto *R = dyn_cast(&Phi))

1995 return R->isCanonical();

1996 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,

1997 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);

1998 })) {

2003 R->getScalarType());

2005 HeaderR.eraseFromParent();

2006 continue;

2007 }

2009 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));

2010 HeaderR.eraseFromParent();

2011 }

2012

2017

2019 B->setParent(nullptr);

2020

2024 } else {

2025

2026

2028 {}, {}, Term->getDebugLoc());

2030 }

2031

2032 Term->eraseFromParent();

2033

2034 return true;

2035}

2036

2037

2038

2046 continue;

2047

2051 continue;

2054 continue;

2055

2058 R.getDebugLoc());

2059 R.getVPSingleValue()->replaceAllUsesWith(Trunc);

2060 return true;

2061 }

2062 }

2063 return false;

2064}

2065

2067 unsigned BestUF,

2069 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");

2070 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");

2071

2076

2077 if (MadeChange) {

2078 Plan.setVF(BestVF);

2079 assert(Plan.getUF() == BestUF && "BestUF must match the Plan's UF");

2080 }

2081}

2082

2083

2084

2085

2086static bool

2090

2093 Seen.insert(Previous);

2094 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {

2095

2096

2097 if (SinkCandidate == Previous)

2098 return false;

2099

2101 !Seen.insert(SinkCandidate).second ||

2103 return true;

2104

2106 return false;

2107

2108 WorkList.push_back(SinkCandidate);

2109 return true;

2110 };

2111

2112

2114 for (unsigned I = 0; I != WorkList.size(); ++I) {

2117 "only recipes with a single defined value expected");

2118

2121 return false;

2122 }

2123 }

2124

2125

2126

2129 });

2130

2131 for (VPRecipeBase *SinkCandidate : WorkList) {

2132 if (SinkCandidate == FOR)

2133 continue;

2134

2135 SinkCandidate->moveAfter(Previous);

2136 Previous = SinkCandidate;

2137 }

2138 return true;

2139}

2140

2141

2146 return false;

2147

2148

2152

2153

2154 for (VPUser *U : FOR->users()) {

2157 HoistPoint = R;

2158 }

2160 [&VPDT, HoistPoint](VPUser *U) {

2161 auto *R = cast(U);

2162 return HoistPoint == R ||

2163 VPDT.properlyDominates(HoistPoint, R);

2164 }) &&

2165 "HoistPoint must dominate all users of FOR");

2166

2167 auto NeedsHoisting = [HoistPoint, &VPDT,

2169 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();

2170 if (!HoistCandidate)

2171 return nullptr;

2175 HoistCandidate->getRegion() == EnclosingLoopRegion) &&

2176 "CFG in VPlan should still be flat, without replicate regions");

2177

2178 if (!Visited.insert(HoistCandidate).second)

2179 return nullptr;

2180

2181

2182

2184 return nullptr;

2185

2186

2187

2189 return nullptr;

2190 return HoistCandidate;

2191 };

2192

2194 return true;

2195

2196

2197 HoistCandidates.push_back(Previous);

2198

2199 for (unsigned I = 0; I != HoistCandidates.size(); ++I) {

2202 "only recipes with a single defined value expected");

2204 return false;

2205

2207

2208

2209

2210

2211 if (Op == FOR)

2212 return false;

2213

2214 if (auto *R = NeedsHoisting(Op)) {

2215

2216

2217 if (R->getNumDefinedValues() != 1)

2218 return false;

2220 }

2221 }

2222 }

2223

2224

2225

2228 });

2229

2230 for (VPRecipeBase *HoistCandidate : HoistCandidates) {

2231 HoistCandidate->moveBefore(*HoistPoint->getParent(),

2233 }

2234

2235 return true;

2236}

2237

2241

2247

2250 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();

2251

2252

2253 while (auto *PrevPhi =

2255 assert(PrevPhi->getParent() == FOR->getParent());

2257 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();

2258 }

2259

2262 return false;

2263

2264

2265

2269 else

2272

2273 auto *RecurSplice =

2275 {FOR, FOR->getBackedgeValue()});

2276

2277 FOR->replaceAllUsesWith(RecurSplice);

2278

2279

2280 RecurSplice->setOperand(0, FOR);

2281

2282

2283

2284

2285

2286 for (VPUser *U : RecurSplice->users()) {

2289 continue;

2290

2296 VPValue *PenultimateIndex =

2297 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});

2298 VPValue *PenultimateLastIter =

2300 {PenultimateIndex, FOR->getBackedgeValue()});

2303

2305 VPValue *Sel = B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);

2307 }

2308 }

2309 return true;

2310}

2311

2316 if (!PhiR)

2317 continue;

2318 RecurKind RK = PhiR->getRecurrenceKind();

2321 continue;

2322

2325 RecWithFlags->dropPoisonGeneratingFlags();

2326 }

2327 }

2328}

2329

2330namespace {

2331struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {

2333 return Def == getEmptyKey() || Def == getTombstoneKey();

2334 }

2335

2336

2337

2339

2340

2344 return GEP->getSourceElementType();

2345 return nullptr;

2346 })

2347 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(

2348 [](auto *I) { return I->getSourceElementType(); })

2349 .Default([](auto *) { return nullptr; });

2350 }

2351

2352

2353 static bool canHandle(const VPSingleDefRecipe *Def) {

2354

2355

2356

2358

2359

2360

2361

2362 if (C || (C->first && (C->second == Instruction::InsertValue ||

2363 C->second == Instruction::ExtractValue)))

2364 return false;

2365

2366

2367

2368

2369 return Def->mayReadFromMemory();

2370 }

2371

2372

2373 static unsigned getHashValue(const VPSingleDefRecipe *Def) {

2374 const VPlan *Plan = Def->getParent()->getPlan();

2375 VPTypeAnalysis TypeInfo(*Plan);

2378 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),

2381 if (RFlags->hasPredicate())

2382 return hash_combine(Result, RFlags->getPredicate());

2384 }

2385

2386

2387 static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) {

2389 return L == R;

2390 if (L->getVPDefID() != R->getVPDefID() ||

2392 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||

2394 equal(L->operands(), R->operands()))

2395 return false;

2397 "must have valid opcode info for both recipes");

2399 if (LFlags->hasPredicate() &&

2400 LFlags->getPredicate() !=

2402 return false;

2403

2404

2405

2406 const VPRegionBlock *RegionL = L->getRegion();

2407 const VPRegionBlock *RegionR = R->getRegion();

2408 if (((RegionL && RegionL->isReplicator()) ||

2410 L->getParent() != R->getParent())

2411 return false;

2412 const VPlan *Plan = L->getParent()->getPlan();

2413 VPTypeAnalysis TypeInfo(*Plan);

2414 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);

2415 }

2416};

2417}

2418

2419

2420

2424

2429 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))

2430 continue;

2432

2433 if (!VPDT.dominates(V->getParent(), VPBB))

2434 continue;

2435

2438 Def->replaceAllUsesWith(V);

2439 continue;

2440 }

2441 CSEMap[Def] = Def;

2442 }

2443 }

2444}

2445

2446

2449

2450

2451

2452

2453

2454

2457 "Expected vector prehader's successor to be the vector loop region");

2462 continue;

2464 return !Op->isDefinedOutsideLoopRegions();

2465 }))

2466 continue;

2467 R.moveBefore(*Preheader, Preheader->end());

2468 }

2469 }

2470}

2471

2475 return;

2476

2477

2478

2479

2488 &R))

2489 continue;

2490

2491 VPValue *ResultVPV = R.getVPSingleValue();

2493 unsigned NewResSizeInBits = MinBWs.lookup(UI);

2494 if (!NewResSizeInBits)

2495 continue;

2496

2497

2498

2499

2500

2502 continue;

2503

2506 assert(OldResTy->isIntegerTy() && "only integer types supported");

2507 (void)OldResSizeInBits;

2508

2510

2511

2512

2513

2515 VPW->dropPoisonGeneratingFlags();

2516

2517 if (OldResSizeInBits != NewResSizeInBits &&

2519

2520 auto *Ext =

2522 Ext->insertAfter(&R);

2524 Ext->setOperand(0, ResultVPV);

2525 assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");

2526 } else {

2528 "Only ICmps should not need extending the result.");

2529 }

2530

2533 continue;

2534

2535

2537 for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {

2538 auto *Op = R.getOperand(Idx);

2539 unsigned OpSizeInBits =

2541 if (OpSizeInBits == NewResSizeInBits)

2542 continue;

2543 assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");

2544 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op);

2545 if (!IterIsEmpty) {

2546 R.setOperand(Idx, ProcessedIter->second);

2547 continue;

2548 }

2549

2551 if (Op->isLiveIn())

2553 else

2554 Builder.setInsertPoint(&R);

2556 Builder.createWidenCast(Instruction::Trunc, Op, NewResTy);

2557 ProcessedIter->second = NewOp;

2558 R.setOperand(Idx, NewOp);

2559 }

2560

2561 }

2562 }

2563}

2564

2569

2571 continue;

2572

2573 assert(VPBB->getNumSuccessors() == 2 &&

2574 "Two successors expected for BranchOnCond");

2575 unsigned RemovedIdx;

2577 RemovedIdx = 1;

2579 RemovedIdx = 0;

2580 else

2581 continue;

2582

2586 "There must be a single edge between VPBB and its successor");

2587

2588

2591

2592

2593

2595 VPBB->back().eraseFromParent();

2596 }

2597}

2598

2618

2619

2620

2621

2622

2623

2624

2625

2626

2627

2628

2629

2630

2631

2632

2633

2634

2635

2636

2637

2638

2639

2640

2641

2642

2643

2644

2645

2646

2647

2648

2649

2650

2651

2657 VPValue *StartV = CanonicalIVPHI->getStartValue();

2658

2659 auto *CanonicalIVIncrement =

2661

2662

2663 CanonicalIVIncrement->dropPoisonGeneratingFlags();

2664 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();

2665

2666

2667

2669 VPBuilder Builder(VecPreheader);

2670

2671

2673

2674 VPValue *TripCount, *IncrementValue;

2676

2677

2678

2679 IncrementValue = CanonicalIVIncrement;

2680 TripCount = TC;

2681 } else {

2682

2683

2684

2685 IncrementValue = CanonicalIVPHI;

2687 {TC}, DL);

2688 }

2689 auto *EntryIncrement = Builder.createOverflowingOp(

2691 "index.part.next");

2692

2693

2694 VPValue *ALMMultiplier =

2697 {EntryIncrement, TC, ALMMultiplier}, DL,

2698 "active.lane.mask.entry");

2699

2700

2701

2702 auto *LaneMaskPhi =

2704 LaneMaskPhi->insertAfter(CanonicalIVPHI);

2705

2706

2707

2709 Builder.setInsertPoint(OriginalTerminator);

2710 auto *InLoopIncrement =

2712 {IncrementValue}, {false, false}, DL);

2714 {InLoopIncrement, TripCount, ALMMultiplier},

2715 DL, "active.lane.mask.next");

2717

2718

2719

2720 auto *NotMask = Builder.createNot(ALM, DL);

2723 return LaneMaskPhi;

2724}

2725

2726

2727

2728

2729

2733 auto *FoundWidenCanonicalIVUser = find_if(

2737 "Must have at most one VPWideCanonicalIVRecipe");

2738 if (FoundWidenCanonicalIVUser !=

2740 auto *WideCanonicalIV =

2742 WideCanonicalIVs.push_back(WideCanonicalIV);

2743 }

2744

2745

2746

2750 if (WidenOriginalIV && WidenOriginalIV->isCanonical())

2751 WideCanonicalIVs.push_back(WidenOriginalIV);

2752 }

2753

2754

2755

2757 for (auto *Wide : WideCanonicalIVs) {

2761 continue;

2762

2763 assert(VPI->getOperand(0) == Wide &&

2764 "WidenCanonicalIV must be the first operand of the compare");

2765 assert(!HeaderMask && "Multiple header masks found?");

2766 HeaderMask = VPI;

2767 }

2768 }

2769 return HeaderMask;

2770}

2771

2773 VPlan &Plan, bool UseActiveLaneMaskForControlFlow,

2776 UseActiveLaneMaskForControlFlow) &&

2777 "DataAndControlFlowWithoutRuntimeCheck implies "

2778 "UseActiveLaneMaskForControlFlow");

2779

2781 auto *FoundWidenCanonicalIVUser = find_if(

2783 assert(FoundWidenCanonicalIVUser &&

2784 "Must have widened canonical IV when tail folding!");

2786 auto *WideCanonicalIV =

2789 if (UseActiveLaneMaskForControlFlow) {

2792 } else {

2796 LaneMask =

2798 {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},

2799 nullptr, "active.lane.mask");

2800 }

2801

2802

2803

2804

2807}

2808

2812

2814

2815 template bool match(OpTy *V) const {

2817 Out = nullptr;

2818 return true;

2819 }

2821 }

2822};

2823

2824

2825

2826template <typename Op0_t, typename Op1_t>

2831

2832

2833

2834

2835

2836

2837

2838

2839

2845 VPValue *Addr, *Mask, *EndPtr;

2846

2847

2848 auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {

2850 EVLEndPtr->insertBefore(&CurRecipe);

2851 EVLEndPtr->setOperand(1, &EVL);

2852 return EVLEndPtr;

2853 };

2854

2855 if (match(&CurRecipe,

2859 EVL, Mask);

2860

2861 if (match(&CurRecipe,

2866 AdjustEndPtr(EndPtr), EVL, Mask);

2867

2872 EVL, Mask);

2873

2879 AdjustEndPtr(EndPtr), EVL, Mask);

2880

2882 if (Rdx->isConditional() &&

2885

2887 if (Interleave->getMask() &&

2890

2892 if (match(&CurRecipe,

2895 Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL},

2897

2901 Intrinsic::vp_merge, {Mask, LHS, RHS, &EVL},

2903

2910 }

2911

2912 return nullptr;

2913}

2914

2915

2920

2924 "User of VF that we can't transform to EVL.");

2927 });

2928

2930 [&LoopRegion, &Plan](VPUser *U) {

2931 return match(U,

2932 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),

2933 m_Specific(&Plan.getVFxUF()))) ||

2934 isa(U);

2935 }) &&

2936 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "

2937 "increment of the canonical induction.");

2939

2940

2942 });

2943

2944

2945

2947

2948

2949

2950 bool ContainsFORs =

2952 if (ContainsFORs) {

2953

2955

2957 MaxEVL = Builder.createScalarZExtOrTrunc(

2960

2961 Builder.setInsertPoint(Header, Header->getFirstNonPhi());

2962 VPValue *PrevEVL = Builder.createScalarPhi(

2964

2972 continue;

2976 Intrinsic::experimental_vp_splice,

2977 {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},

2979 R.getDebugLoc());

2981 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);

2983 }

2984 }

2985 }

2986

2988 if (!HeaderMask)

2989 return;

2990

2991

2992

2993

2994

2995

2999 VPValue *EVLMask = Builder.createICmp(

3004

3005

3006

3007

3008

3013 if (!EVLRecipe)

3014 continue;

3015

3017 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&

3018 "New recipe must define the same number of values as the "

3019 "original.");

3022 EVLRecipe)) {

3023 for (unsigned I = 0; I < NumDefVal; ++I) {

3024 VPValue *CurVPV = CurRecipe->getVPValue(I);

3026 }

3027 }

3029 }

3030

3033

3036 R->eraseFromParent();

3037 for (VPValue *Op : PossiblyDead)

3039 }

3040}

3041

3042

3043

3044

3045

3046

3047

3048

3049

3050

3051

3052

3053

3054

3055

3056

3057

3058

3059

3060

3061

3062

3063

3064

3065

3066

3067

3068

3069

3070

3071

3072

3073

3074

3075

3076

3077

3078

3079

3080

3081

3082

3083

3084

3086 VPlan &Plan, const std::optional &MaxSafeElements) {

3088 return;

3091

3092 auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();

3094 VPValue *StartV = CanonicalIVPHI->getStartValue();

3095

3096

3098 EVLPhi->insertAfter(CanonicalIVPHI);

3099 VPBuilder Builder(Header, Header->getFirstNonPhi());

3100

3101

3102 VPPhi *AVLPhi = Builder.createScalarPhi(

3105

3106 if (MaxSafeElements) {

3107

3111 "safe_avl");

3112 }

3115

3116 auto *CanonicalIVIncrement =

3118 Builder.setInsertPoint(CanonicalIVIncrement);

3119 VPValue *OpVPEVL = VPEVL;

3120

3122 OpVPEVL = Builder.createScalarZExtOrTrunc(

3123 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());

3124

3125 auto *NextEVLIV = Builder.createOverflowingOp(

3126 Instruction::Add, {OpVPEVL, EVLPhi},

3127 {CanonicalIVIncrement->hasNoUnsignedWrap(),

3128 CanonicalIVIncrement->hasNoSignedWrap()},

3129 CanonicalIVIncrement->getDebugLoc(), "index.evl.next");

3130 EVLPhi->addOperand(NextEVLIV);

3131

3132 VPValue *NextAVL = Builder.createOverflowingOp(

3133 Instruction::Sub, {AVLPhi, OpVPEVL}, {true, false},

3136

3138

3139

3140

3141 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

3142 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);

3143

3145}

3146

3148

3149

3151

3156 assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected");

3157 EVLPhi = PhiR;

3158 }

3159

3160

3161 if (!EVLPhi)

3162 return;

3163

3167 [[maybe_unused]] bool FoundAVL =

3168 match(EVLIncrement,

3170 assert(FoundAVL && "Didn't find AVL?");

3171

3172

3175 AVL = SafeAVL;

3176

3178 [[maybe_unused]] bool FoundAVLNext =

3181 assert(FoundAVLNext && "Didn't find AVL backedge?");

3182

3183

3184 auto *ScalarR =

3189

3190

3192 VPValue *Backedge = CanonicalIV->getIncomingValue(1);

3195 "Unexpected canonical iv");

3197

3198

3201 CanonicalIV->eraseFromParent();

3202

3203

3204

3205

3206

3210

3212 return;

3213 assert(LatchExitingBr &&

3214 match(LatchExitingBr,

3217 "Unexpected terminator in EVL loop");

3218

3220 VPBuilder Builder(LatchExitingBr);

3224 LatchExitingBr->eraseFromParent();

3225}

3226

3230

3231

3232 auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {

3234 return R->getRegion() ||

3236 };

3238 for (const SCEV *Stride : StridesMap.values()) {

3241 const APInt *StrideConst;

3243

3244 continue;

3245

3249

3250

3251

3254 continue;

3256 if (!StrideVPV)

3257 continue;

3258 unsigned BW = U->getType()->getScalarSizeInBits();

3263 }

3264 RewriteMap[StrideV] = PSE.getSCEV(StrideV);

3265 }

3266

3269 if (!ExpSCEV)

3270 continue;

3271 const SCEV *ScevExpr = ExpSCEV->getSCEV();

3272 auto *NewSCEV =

3274 if (NewSCEV != ScevExpr) {

3276 ExpSCEV->replaceAllUsesWith(NewExp);

3279 }

3280 }

3281}

3282

3285 const std::function<bool(BasicBlock *)> &BlockNeedsPredication) {

3286

3287

3289 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {

3292

3293

3294 while (!Worklist.empty()) {

3296

3297 if (!Visited.insert(CurRec).second)

3298 continue;

3299

3300

3301

3302

3303

3306 continue;

3307

3308

3309

3310

3313

3314

3315

3316

3317

3319 RecWithFlags->isDisjoint()) {

3320 VPBuilder Builder(RecWithFlags);

3321 VPInstruction *New = Builder.createOverflowingOp(

3322 Instruction::Add, {A, B}, {false, false},

3323 RecWithFlags->getDebugLoc());

3324 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());

3325 RecWithFlags->replaceAllUsesWith(New);

3326 RecWithFlags->eraseFromParent();

3327 CurRec = New;

3328 } else

3329 RecWithFlags->dropPoisonGeneratingFlags();

3330 } else {

3333 (void)Instr;

3334 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&

3335 "found instruction with poison generating flags not covered by "

3336 "VPRecipeWithIRFlags");

3337 }

3338

3339

3341 if (VPRecipeBase *OpDef = Operand->getDefiningRecipe())

3343 }

3344 });

3345

3346

3347

3348

3353 Instruction &UnderlyingInstr = WidenRec->getIngredient();

3354 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();

3355 if (AddrDef && WidenRec->isConsecutive() &&

3356 BlockNeedsPredication(UnderlyingInstr.getParent()))

3357 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);

3359 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();

3360 if (AddrDef) {

3361

3363 InterleaveRec->getInterleaveGroup();

3364 bool NeedPredication = false;

3365 for (int I = 0, NumMembers = InterGroup->getNumMembers();

3366 I < NumMembers; ++I) {

3368 if (Member)

3369 NeedPredication |= BlockNeedsPredication(Member->getParent());

3370 }

3371

3372 if (NeedPredication)

3373 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);

3374 }

3375 }

3376 }

3377 }

3378}

3379

3383 &InterleaveGroups,

3384 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed) {

3385 if (InterleaveGroups.empty())

3386 return;

3387

3388

3389

3390

3392 for (const auto *IG : InterleaveGroups) {

3393 auto *Start =

3398 StoredValues.push_back(StoreR->getStoredValue());

3399 for (unsigned I = 1; I < IG->getFactor(); ++I) {

3401 if (!MemberI)

3402 continue;

3406 StoredValues.push_back(StoreR->getStoredValue());

3407 InterleaveMD.intersect(*MemoryR);

3408 }

3409

3410 bool NeedsMaskForGaps =

3411 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||

3412 (!StoredValues.empty() && !IG->isFull());

3413

3414 Instruction *IRInsertPos = IG->getInsertPos();

3415 auto *InsertPos =

3417

3422

3423

3424 VPValue *Addr = Start->getAddr();

3427

3428

3429

3430

3431

3432

3433 assert(IG->getIndex(IRInsertPos) != 0 &&

3434 "index of insert position shouldn't be zero");

3438 IG->getIndex(IRInsertPos),

3439 true);

3442 Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);

3443 }

3444

3445

3446

3447

3448 if (IG->isReverse()) {

3451 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());

3452 ReversePtr->insertBefore(InsertPos);

3453 Addr = ReversePtr;

3454 }

3456 InsertPos->getMask(), NeedsMaskForGaps,

3457 InterleaveMD, InsertPos->getDebugLoc());

3458 VPIG->insertBefore(InsertPos);

3459

3460 unsigned J = 0;

3461 for (unsigned i = 0; i < IG->getFactor(); ++i)

3462 if (Instruction *Member = IG->getMember(i)) {

3464 if (!Member->getType()->isVoidTy()) {

3467 J++;

3468 }

3470 }

3471 }

3472}

3473

3474

3475

3476

3477

3478

3479

3480

3481

3482

3483

3484

3485

3486

3487

3488

3489

3490

3491

3492

3493

3494

3495

3496

3497

3498

3499

3500

3501

3502

3503

3504

3505static void

3513

3514

3515

3517

3523 AddOp = Instruction::Add;

3524 MulOp = Instruction::Mul;

3525 } else {

3526 AddOp = ID.getInductionOpcode();

3527 MulOp = Instruction::FMul;

3528 }

3529

3530

3534 assert(StepTy->isIntegerTy() && "Truncation requires an integer type");

3535 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL);

3536 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL);

3537

3538 Flags.dropPoisonGeneratingFlags();

3539 StepTy = Ty;

3540 }

3541

3542

3543 Type *IVIntTy =

3547 Init = Builder.createWidenCast(Instruction::UIToFP, Init, StepTy);

3548

3551

3552 Init = Builder.createNaryOp(MulOp, {Init, SplatStep}, Flags);

3553 Init = Builder.createNaryOp(AddOp, {SplatStart, Init}, Flags,

3555

3556

3559 WidePHI->insertBefore(WidenIVR);

3560

3561

3564

3566 Inc = SplatVF;

3568 } else {

3570 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));

3571

3572

3574 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,

3575 DL);

3576 else

3577 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,

3579

3580 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);

3582 Prev = WidePHI;

3583 }

3584

3587 auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,

3588 WidenIVR->getDebugLoc(), "vec.ind.next");

3589

3590 WidePHI->addOperand(Next);

3591

3593}

3594

3595

3596

3597

3598

3599

3600

3601

3602

3603

3604

3605

3606

3607

3608

3609

3610

3611

3612

3613

3614

3615

3618 VPlan *Plan = R->getParent()->getPlan();

3619 VPValue *Start = R->getStartValue();

3620 VPValue *Step = R->getStepValue();

3621 VPValue *VF = R->getVFValue();

3622

3623 assert(R->getInductionDescriptor().getKind() ==

3625 "Not a pointer induction according to InductionDescriptor!");

3628 "Recipe should have been replaced");

3629

3632

3633

3634 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start, DL, "pointer.phi");

3635

3636

3637

3638 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());

3641 Offset = Builder.createOverflowingOp(Instruction::Mul, {Offset, Step});

3642 VPValue *PtrAdd = Builder.createNaryOp(

3644 R->replaceAllUsesWith(PtrAdd);

3645

3646

3649 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF),

3650 DL);

3651 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});

3652

3654 Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind");

3655 ScalarPtrPhi->addOperand(InductionGEP);

3656}

3657

3659

3663 if (!R->isReplicator())

3665 }

3667 R->dissolveToCFGLoop();

3668}

3669

3678 ToRemove.push_back(WidenIVR);

3679 continue;

3680 }

3681

3683

3684

3685 if (WidenIVR->onlyScalarsGenerated(Plan.hasScalableVF())) {

3689 WidenIVR->replaceAllUsesWith(PtrAdd);

3690 ToRemove.push_back(WidenIVR);

3691 continue;

3692 }

3694 ToRemove.push_back(WidenIVR);

3695 continue;

3696 }

3697

3698

3702 for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)

3703 Select = Builder.createSelect(Blend->getMask(I),

3704 Blend->getIncomingValue(I), Select,

3705 R.getDebugLoc(), "predphi");

3706 Blend->replaceAllUsesWith(Select);

3708 }

3709

3711 Expr->decompose();

3713 }

3714

3715

3717 if (LastActiveL &&

3719

3721 for (VPValue *Op : LastActiveL->operands()) {

3722 VPValue *NotMask = Builder.createNot(Op, LastActiveL->getDebugLoc());

3724 }

3725

3726

3727 VPValue *FirstInactiveLane = Builder.createNaryOp(

3729 LastActiveL->getDebugLoc(), "first.inactive.lane");

3730

3731

3734 VPValue *LastLane = Builder.createNaryOp(

3735 Instruction::Sub, {FirstInactiveLane, One},

3736 LastActiveL->getDebugLoc(), "last.active.lane");

3737

3739 ToRemove.push_back(LastActiveL);

3740 continue;

3741 }

3742

3747 continue;

3748

3749

3754 ? Instruction::UIToFP

3755 : Instruction::Trunc;

3756 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);

3757 }

3758

3759 assert(match(ScalarStep, m_One()) && "Expected non-unit scalar-step");

3761 ScalarStep =

3762 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);

3763 }

3764

3767 Flags = {VPI->getFastMathFlags()};

3768

3769 unsigned MulOpc =

3770 IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;

3772 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());

3773 VectorStep = Mul;

3774 VPI->replaceAllUsesWith(VectorStep);

3776 }

3777 }

3778

3780 R->eraseFromParent();

3781}

3782

3792 EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&

3793 "unsupported early exit VPBB");

3794

3795

3796

3799 }

3800

3804 "Terminator must be be BranchOnCond");

3805 VPValue *CondOfEarlyExitingVPBB =

3807 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB

3808 ? CondOfEarlyExitingVPBB

3809 : Builder.createNot(CondOfEarlyExitingVPBB);

3810

3811

3812

3813 VPValue *IsEarlyExitTaken =

3821

3823

3824

3825 VPBuilder MiddleBuilder(NewMiddle);

3826 VPBuilder EarlyExitB(VectorEarlyExitVPBB);

3829

3830

3831 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;

3832 if (ExitIRI->getNumOperands() != 1) {

3833

3834

3835 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);

3836 }

3837

3838 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);

3839 if (!IncomingFromEarlyExit->isLiveIn()) {

3840

3844 IncomingFromEarlyExit = EarlyExitB.createNaryOp(

3847 ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);

3848 }

3849 }

3851

3852

3853

3854

3857 "Unexpected terminator");

3858 auto *IsLatchExitTaken =

3859 Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),

3860 LatchExitingBranch->getOperand(1));

3861 auto *AnyExitTaken = Builder.createNaryOp(

3862 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});

3864 LatchExitingBranch->eraseFromParent();

3865}

3866

3867

3868

3869

3870

3874 Type *RedTy = Ctx.Types.inferScalarType(Red);

3875 VPValue *VecOp = Red->getVecOp();

3876

3877

3878 auto IsExtendedRedValidAndClampRange =

3884

3889

3890 if (Red->isPartialReduction()) {

3893

3894

3895 ExtRedCost = Ctx.TTI.getPartialReductionCost(

3896 Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,

3898 } else {

3899 ExtRedCost = Ctx.TTI.getExtendedReductionCost(

3900 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,

3901 Red->getFastMathFlags(), CostKind);

3902 }

3903 return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;

3904 },

3906 };

3907

3909

3911 IsExtendedRedValidAndClampRange(

3914 Ctx.Types.inferScalarType(A)))

3916

3917 return nullptr;

3918}

3919

3920

3921

3922

3923

3924

3925

3926

3927

3932 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)

3933 return nullptr;

3934

3935 Type *RedTy = Ctx.Types.inferScalarType(Red);

3936

3937

3938 auto IsMulAccValidAndClampRange =

3944 Type *SrcTy =

3945 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;

3947

3948 if (Red->isPartialReduction()) {

3949 Type *SrcTy2 =

3950 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr;

3951

3952

3953 MulAccCost = Ctx.TTI.getPartialReductionCost(

3954 Opcode, SrcTy, SrcTy2, RedTy, VF,

3956 Ext0->getOpcode())

3959 Ext1->getOpcode())

3962 } else {

3963

3964 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())

3965 return false;

3966

3967 bool IsZExt =

3968 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;

3970 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,

3972 }

3973

3977 if (Ext0)

3978 ExtCost += Ext0->computeCost(VF, Ctx);

3979 if (Ext1)

3980 ExtCost += Ext1->computeCost(VF, Ctx);

3981 if (OuterExt)

3982 ExtCost += OuterExt->computeCost(VF, Ctx);

3983

3984 return MulAccCost.isValid() &&

3985 MulAccCost < ExtCost + MulCost + RedCost;

3986 },

3988 };

3989

3990 VPValue *VecOp = Red->getVecOp();

3994

3997 VecOp = Tmp;

3998 }

3999

4000

4001

4002

4003

4004

4005 auto ExtendAndReplaceConstantOp = [&Ctx](VPWidenCastRecipe *ExtA,

4008 if (!ExtA || ExtB || !ValB->isLiveIn())

4009 return;

4010 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));

4012 const APInt *Const;

4016 return;

4017

4018

4019

4020

4021

4023 auto *Trunc =

4024 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);

4025 Type *WideTy = Ctx.Types.inferScalarType(ExtA);

4026 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);

4027 Mul->setOperand(1, ExtB);

4028 };

4029

4030

4035

4036

4037 ExtendAndReplaceConstantOp(RecipeA, RecipeB, B, Mul);

4038

4039

4042 IsMulAccValidAndClampRange(Mul, RecipeA, RecipeB, nullptr)) {

4043 if (Sub)

4047 }

4048

4049 if (Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr))

4051 }

4052

4053

4054 if (Sub)

4055 return nullptr;

4056

4057

4063

4064

4065

4066 ExtendAndReplaceConstantOp(Ext0, Ext1, B, Mul);

4067

4068

4069

4070

4071

4072

4073

4074 if (Ext0 && Ext1 &&

4075 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&

4076 Ext0->getOpcode() == Ext1->getOpcode() &&

4077 IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {

4079 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr,

4080 *Ext0, *Ext0, Ext0->getDebugLoc());

4081 NewExt0->insertBefore(Ext0);

4082

4084 if (Ext0 != Ext1) {

4085 NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),

4086 Ext->getResultType(), nullptr, *Ext1,

4087 *Ext1, Ext1->getDebugLoc());

4089 }

4090 Mul->setOperand(0, NewExt0);

4091 Mul->setOperand(1, NewExt1);

4092 Red->setOperand(1, Mul);

4094 }

4095 }

4096 return nullptr;

4097}

4098

4099

4100

4105 auto IP = std::next(Red->getIterator());

4106 auto *VPBB = Red->getParent();

4108 AbstractR = MulAcc;

4110 AbstractR = ExtRed;

4111

4112 if (!AbstractR)

4113 return;

4114

4116 Red->replaceAllUsesWith(AbstractR);

4117}

4118

4129

4132 return;

4133

4134#ifndef NDEBUG

4136#endif

4137

4144

4146 for (VPValue *VPV : VPValues) {

4148 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&

4150 continue;

4151

4152

4156 if (User->usesScalars(VPV))

4157 continue;

4159 HoistPoint = HoistBlock->begin();

4160 else

4163 "All users must be in the vector preheader or dominated by it");

4164 }

4165

4168 VPV->replaceUsesWithIf(Broadcast,

4169 [VPV, Broadcast](VPUser &U, unsigned Idx) {

4170 return Broadcast != &U && !U.usesScalars(VPV);

4171 });

4172 }

4173}

4174

4177

4178

4179

4185

4187 if (RepR->isPredicated() || !RepR->isSingleScalar() ||

4188 RepR->getOpcode() != Instruction::Load)

4189 continue;

4190

4191 VPValue *Addr = RepR->getOperand(0);

4194 if (Loc.AATags.Scope)

4195 continue;

4197 }

4198 }

4199 if (R.mayWriteToMemory()) {

4201 if (Loc || Loc->AATags.Scope || Loc->AATags.NoAlias)

4202 return;

4204 }

4205 }

4206 }

4207

4209 for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {

4210

4211

4212

4213 const AAMDNodes &LoadAA = LoadLoc.AATags;

4217 })) {

4218 LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());

4219 }

4220 }

4221}

4222

4223

4224

4228 CommonMetadata.intersect(*Recipe);

4229 return CommonMetadata;

4230}

4231

4232template

4235 const Loop *L) {

4236 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,

4237 "Only Load and Store opcodes supported");

4238 constexpr bool IsLoad = (Opcode == Instruction::Load);

4241

4242

4248 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())

4249 continue;

4250

4251

4252 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);

4255 RecipesByAddress[AddrSCEV].push_back(RepR);

4256 }

4257 }

4258

4259

4262 return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));

4263 };

4264 for (auto &[Addr, Recipes] : RecipesByAddress) {

4265 if (Recipes.size() < 2)

4266 continue;

4267

4268

4270 if (!RecipeI)

4271 continue;

4272

4273 VPValue *MaskI = RecipeI->getMask();

4274 Type *TypeI = GetLoadStoreValueType(RecipeI);

4277 RecipeI = nullptr;

4278

4279

4280 bool HasComplementaryMask = false;

4282 if (!RecipeJ)

4283 continue;

4284

4285 VPValue *MaskJ = RecipeJ->getMask();

4286 Type *TypeJ = GetLoadStoreValueType(RecipeJ);

4287 if (TypeI == TypeJ) {

4288

4289

4293 RecipeJ = nullptr;

4294 }

4295 }

4296

4297 if (HasComplementaryMask) {

4298 assert(Group.size() >= 2 && "must have at least 2 entries");

4299 AllGroups.push_back(std::move(Group));

4300 }

4301 }

4302 }

4303

4304 return AllGroups;

4305}

4306

4307

4308template

4312 return cast(A->getUnderlyingInstr())->getAlign() <

4314 });

4315}

4316

4318 const Loop *L) {

4322 return;

4323

4325

4326

4327 for (auto &Group : Groups) {

4328

4331 });

4332

4333

4337

4338

4341 continue;

4342

4343

4345

4346

4348

4349

4350

4352 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},

4353 false, nullptr, *EarliestLoad,

4354 CommonMetadata);

4355

4356 UnpredicatedLoad->insertBefore(EarliestLoad);

4357

4358

4360 Load->replaceAllUsesWith(UnpredicatedLoad);

4361 Load->eraseFromParent();

4362 }

4363 }

4364}

4365

4366static bool

4371 if (!StoreLoc || !StoreLoc->AATags.Scope)

4372 return false;

4373

4374

4375

4377 StoresToSink.end());

4378

4381 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);

4383}

4384

4386 const Loop *L) {

4390 return;

4391

4394

4395 for (auto &Group : Groups) {

4398 });

4399

4401 continue;

4402

4403

4404

4407

4408

4410

4411

4412 VPValue *SelectedValue = Group[0]->getOperand(0);

4414

4415 for (unsigned I = 1; I < Group.size(); ++I) {

4416 VPValue *Mask = Group[I]->getMask();

4418 SelectedValue = Builder.createSelect(Mask, Value, SelectedValue,

4420 }

4421

4422

4424

4425

4426 auto *UnpredicatedStore =

4428 {SelectedValue, LastStore->getOperand(1)},

4429 false,

4430 nullptr, *LastStore, CommonMetadata);

4431 UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator());

4432

4433

4435 Store->eraseFromParent();

4436 }

4437}

4438

4442 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");

4443 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");

4444

4446

4447

4448

4453 return;

4454

4455

4456

4457

4458

4462 return;

4463 const SCEV *VFxUF = SE.getElementCount(TCScev->getType(), BestVF * BestUF);

4467}

4468

4473 return;

4474

4477 auto *TCMO = Builder.createNaryOp(

4481}

4482

4485 return;

4486

4493

4494

4495

4496

4497

4498

4503 continue;

4505 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {

4507 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;

4508 };

4514 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))

4515 continue;

4516

4518 unsigned Opcode = ScalarTy->isStructTy()

4521 auto *BuildVector = new VPInstruction(Opcode, {DefR});

4523

4524 DefR->replaceUsesWithIf(

4525 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](

4526 VPUser &U, unsigned) {

4527 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);

4528 });

4529 }

4530 }

4531

4532

4533

4534

4535

4536 for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {

4540 continue;

4541 for (VPValue *Def : R.definedValues()) {

4542

4543

4544

4545

4546

4548 continue;

4549

4550

4551

4552

4553

4554 auto IsCandidateUnpackUser = [Def](VPUser *U) {

4556 return U->usesScalars(Def) &&

4557 (!ParentRegion || !ParentRegion->isReplicator());

4558 };

4559 if (none_of(Def->users(), IsCandidateUnpackUser))

4560 continue;

4561

4563 if (R.isPhi())

4564 Unpack->insertBefore(*VPBB, VPBB->getFirstNonPhi());

4565 else

4566 Unpack->insertAfter(&R);

4567 Def->replaceUsesWithIf(Unpack,

4568 [&IsCandidateUnpackUser](VPUser &U, unsigned) {

4569 return IsCandidateUnpackUser(&U);

4570 });

4571 }

4572 }

4573 }

4574}

4575

4578 bool TailByMasking,

4579 bool RequiresScalarEpilogue) {

4581 assert(VectorTC.isLiveIn() && "vector-trip-count must be a live-in");

4582

4583

4585 return;

4586

4589 VPBuilder Builder(VectorPHVPBB, VectorPHVPBB->begin());

4591

4592

4593

4594

4595

4596

4597

4598

4599

4600 if (TailByMasking) {

4601 TC = Builder.createNaryOp(

4602 Instruction::Add,

4603 {TC, Builder.createNaryOp(Instruction::Sub,

4606 }

4607

4608

4609

4610

4611

4612

4614 Builder.createNaryOp(Instruction::URem, {TC, Step},

4616

4617

4618

4619

4620

4621

4622

4623 if (RequiresScalarEpilogue) {

4624 assert(!TailByMasking &&

4625 "requiring scalar epilogue is not supported with fail folding");

4628 R = Builder.createSelect(IsZero, Step, R);

4629 }

4630

4631 VPValue *Res = Builder.createNaryOp(

4634}

4635

4642

4643

4644

4645

4646

4647

4650 Builder.createElementCount(TCTy, VFEC * Plan.getUF());

4652 return;

4653 }

4654

4655

4656

4657 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);

4661 BC, [&VF](VPUser &U, unsigned) { return !U.usesScalars(&VF); });

4662 }

4664

4666 VPValue *MulByUF = Builder.createOverflowingOp(

4667 Instruction::Mul, {RuntimeVF, UF}, {true, false});

4669}

4670

4673 SCEVExpander Expander(SE, "induction", false);

4674

4676 BasicBlock *EntryBB = Entry->getIRBasicBlock();

4680 continue;

4682 if (!ExpSCEV)

4683 break;

4684 const SCEV *Expr = ExpSCEV->getSCEV();

4687 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;

4692 ExpSCEV->eraseFromParent();

4693 }

4695 "VPExpandSCEVRecipes must be at the beginning of the entry block, "

4696 "after any VPIRInstructions");

4697

4698

4699 auto EI = Entry->begin();

4703 EI++;

4704 continue;

4705 }

4707 }

4708

4709 return ExpandedSCEVs;

4710}

4711

4712

4713

4714

4715

4716

4717

4718

4719

4721 VPValue *OpV, unsigned Idx) {

4724 if (!Member0OpR)

4725 return Member0Op == OpV;

4727 return !W->getMask() && Member0Op == OpV;

4729 return IR->getInterleaveGroup()->isFull() && IR->getVPValue(Idx) == OpV;

4730 return false;

4731}

4732

4733

4734

4735

4740 if (!InterleaveR || InterleaveR->getMask())

4741 return false;

4742

4743 Type *GroupElementTy = nullptr;

4747 [&TypeInfo, GroupElementTy](VPValue *Op) {

4748 return TypeInfo.inferScalarType(Op) == GroupElementTy;

4749 }))

4750 return false;

4751 } else {

4752 GroupElementTy =

4755 [&TypeInfo, GroupElementTy](VPValue *Op) {

4756 return TypeInfo.inferScalarType(Op) == GroupElementTy;

4757 }))

4758 return false;

4759 }

4760

4765 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&

4766 GroupSize == VectorRegWidth;

4767}

4768

4769

4772 return true;

4774 return RepR && RepR->isSingleScalar();

4775}

4776

4777

4778

4781 auto *R = V->getDefiningRecipe();

4782 if (!R || NarrowedOps.contains(V))

4783 return V;

4784

4786 return V;

4787

4789 for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)

4790 WideMember0->setOperand(

4791 Idx,

4793 return V;

4794 }

4795

4797

4798

4799 auto *LI = cast(LoadGroup->getInterleaveGroup()->getInsertPos());

4801 *LI, LoadGroup->getAddr(), LoadGroup->getMask(), true,

4802 false, {}, LoadGroup->getDebugLoc());

4803 L->insertBefore(LoadGroup);

4804 NarrowedOps.insert(L);

4805 return L;

4806 }

4807

4809 assert(RepR->isSingleScalar() &&

4811 "must be a single scalar load");

4812 NarrowedOps.insert(RepR);

4813 return RepR;

4814 }

4815

4817 VPValue *PtrOp = WideLoad->getAddr();

4819 PtrOp = VecPtr->getOperand(0);

4820

4821

4822 auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp},

4823 true,

4824 nullptr, {}, *WideLoad);

4825 N->insertBefore(WideLoad);

4827 return N;

4828}

4829

4834 return;

4835

4837

4841 continue;

4842

4845 continue;

4846

4847

4848

4849

4850

4851 if (R.isPhi())

4852 return;

4853

4855 if (R.mayWriteToMemory() && !InterleaveR)

4856 return;

4857

4858

4859

4860

4861

4862

4864 return;

4865

4866

4867

4868 if (!InterleaveR)

4869 continue;

4870

4871

4873 VectorRegWidth))

4874 return;

4875

4876

4877 if (InterleaveR->getStoredValues().empty())

4878 continue;

4879

4880

4881

4882 auto *Member0 = InterleaveR->getStoredValues()[0];

4884 all_of(InterleaveR->getStoredValues(),

4885 [Member0](VPValue *VPV) { return Member0 == VPV; })) {

4886 StoreGroups.push_back(InterleaveR);

4887 continue;

4888 }

4889

4890

4891

4892 if (all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) {

4893 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();

4894 if (!DefR)

4895 return false;

4896 auto *IR = dyn_cast(DefR);

4897 return IR && IR->getInterleaveGroup()->isFull() &&

4898 IR->getVPValue(Op.index()) == Op.value();

4899 })) {

4900 StoreGroups.push_back(InterleaveR);

4901 continue;

4902 }

4903

4904

4905

4906 auto *WideMember0 =

4908 if (!WideMember0)

4909 return;

4910 for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {

4912 if (!R || R->getOpcode() != WideMember0->getOpcode() ||

4913 R->getNumOperands() > 2)

4914 return;

4916 [WideMember0, Idx = I](const auto &P) {

4917 const auto &[OpIdx, OpV] = P;

4918 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);

4919 }))

4920 return;

4921 }

4922 StoreGroups.push_back(InterleaveR);

4923 }

4924

4925 if (StoreGroups.empty())

4926 return;

4927

4928

4930

4931 for (auto *StoreGroup : StoreGroups) {

4934 auto *SI =

4935 cast(StoreGroup->getInterleaveGroup()->getInsertPos());

4937 *SI, StoreGroup->getAddr(), Res, nullptr, true,

4938 false, {}, StoreGroup->getDebugLoc());

4939 S->insertBefore(StoreGroup);

4940 StoreGroup->eraseFromParent();

4941 }

4942

4943

4944

4948

4955 Instruction::Mul, {VScale, UF}, {true, false});

4958 } else {

4959 Inc->setOperand(1, UF);

4962 }

4964}

4965

4966

4967

4969 VPlan &Plan, ElementCount VF, std::optional VScaleForTuning) {

4971 auto *MiddleTerm =

4973

4974 if (!MiddleTerm)

4975 return;

4976

4978 "must have a BranchOnCond");

4979

4981 if (VF.isScalable() && VScaleForTuning.has_value())

4982 VectorStep *= *VScaleForTuning;

4983 assert(VectorStep > 0 && "trip count should not be zero");

4985 MDNode *BranchWeights =

4987 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);

4988}

4989

4990

4991

4992

4998

4999

5000 if (WideIntOrFp && WideIntOrFp->getTruncInst())

5001 return nullptr;

5002

5006 VPValue *EndValue = VectorTC;

5007 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {

5010 Start, VectorTC, Step);

5011 }

5012

5013

5014

5016 if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {

5017 EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,

5018 ScalarTypeOfWideIV,

5020 }

5021

5022 return EndValue;

5023}

5024

5029 auto *MiddleVPBB = cast(ScalarPH->getPredecessors()[0]);

5033 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());

5036

5037

5038

5043 IVEndValues[WideIVR] = EndValue;

5044 ResumePhiR->setOperand(0, EndValue);

5045 ResumePhiR->setName("bc.resume.val");

5046 continue;

5047 }

5048

5049

5050

5052 "should only skip truncated wide inductions");

5053 continue;

5054 }

5055

5056

5057

5058

5060 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();

5062 "Cannot handle loops with uncountable early exits");

5063 if (IsFOR) {

5064 auto *ExtractPart = MiddleBuilder.createNaryOp(

5066 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(

5068 "vector.recur.extract");

5069 }

5070 ResumePhiR->setName(IsFOR ? "scalar.recur.init" : "bc.merge.rdx");

5071 ResumePhiR->setOperand(0, ResumeFromVectorLoop);

5072 }

5073}

5074

5080 VPBuilder ScalarPHBuilder(ScalarPHVPBB);

5081 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());

5082

5083 auto IsScalableOne = [](ElementCount VF) -> bool {

5085 };

5086

5089 if (!FOR)

5090 continue;

5091

5093 "Cannot handle loops with uncountable early exits");

5094

5095

5096

5097

5098

5099

5100

5101

5102

5103

5104

5105

5106

5107

5108

5109

5110

5111

5112

5113

5114

5115

5116

5117

5118

5119

5120

5121

5122

5123

5124

5125

5126

5127

5128

5129

5130

5131

5132

5133

5134

5135

5136

5137

5138

5139

5140

5141

5142

5143

5144

5145

5146

5147

5148

5149

5150

5151

5152

5153

5154

5155

5156

5157

5158

5159

5160

5161

5162

5163

5164

5166 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {

5168 continue;

5169

5170

5171

5172

5173

5174

5177 return;

5180 "vector.recur.extract.for.phi");

5182 }

5183 }

5184}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU Register Bank Select

This file implements a class to represent arbitrary precision integral constant values and operations...

ReachingDefInfo InstSet & ToRemove

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static bool isEqual(const Function &Caller, const Function &Callee)

static const Function * getParent(const Value *V)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)

iv Induction Variable Users

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Legalize the Machine IR a function s Machine IR

static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first DebugLoc that has line number information, given a range of instructions.

This file provides utility analysis objects describing memory locations.

MachineInstr unsigned OpIdx

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

const SmallVectorImpl< MachineOperand > & Cond

This is the interface for a metadata-based scoped no-alias analysis.

This file defines generic set operations that may be used on set's of different types,...

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...

This file implements dominator tree analysis for a single level of a VPlan's H-CFG.

This file contains the declarations of different VPlan-related auxiliary helpers.

static VPValue * optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op, DenseMap< VPValue *, VPValue * > &EndValues, ScalarEvolution &SE)

Attempts to optimize the induction variable exit values for users in the exit block coming from the l...

Definition VPlanTransforms.cpp:1009

static void removeCommonBlendMask(VPBlendRecipe *Blend)

Try to see if all of Blend's masks share a common value logically and'ed and remove it from the masks...

Definition VPlanTransforms.cpp:1658

static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range)

This function tries to create abstract recipes from the reduction recipe for following optimizations ...

Definition VPlanTransforms.cpp:4101

static VPReplicateRecipe * findRecipeWithMinAlign(ArrayRef< VPReplicateRecipe * > Group)

Definition VPlanTransforms.cpp:4310

static bool sinkScalarOperands(VPlan &Plan)

Definition VPlanTransforms.cpp:275

static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R)

Return true if we do not know how to (mechanically) hoist or sink R out of a loop region.

Definition VPlanTransforms.cpp:259

static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)

Try to simplify the branch condition of Plan.

Definition VPlanTransforms.cpp:1954

static SmallVector< SmallVector< VPReplicateRecipe *, 4 > > collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE, const Loop *L)

Definition VPlanTransforms.cpp:4234

static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo)

Try to simplify VPSingleDefRecipe Def.

Definition VPlanTransforms.cpp:1214

static void removeRedundantInductionCasts(VPlan &Plan)

Remove redundant casts of inductions.

Definition VPlanTransforms.cpp:616

static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF, unsigned UF)

Try to replace multiple active lane masks used for control flow with a single, wide active lane mask ...

Definition VPlanTransforms.cpp:1862

static std::optional< std::pair< bool, unsigned > > getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R)

Get any instruction opcode or intrinsic ID data embedded in recipe R.

Definition VPlanTransforms.cpp:1122

static VPExpressionRecipe * tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range)

This function tries convert extended in-loop reductions to VPExpressionRecipe and clamp the Range if ...

Definition VPlanTransforms.cpp:3872

static VPScalarIVStepsRecipe * createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder)

Definition VPlanTransforms.cpp:735

static RemoveMask_match< Op0_t, Op1_t > m_RemoveMask(const Op0_t &In, Op1_t &Out)

Match a specific mask In, or a combination of it (logical-and In, Out).

Definition VPlanTransforms.cpp:2827

static VPIRMetadata getCommonMetadata(ArrayRef< VPReplicateRecipe * > Recipes)

Definition VPlanTransforms.cpp:4225

static VPValue * getPredicatedMask(VPRegionBlock *R)

If R is a region with a VPBranchOnMaskRecipe in the entry block, return the mask.

Definition VPlanTransforms.cpp:368

static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)

Sink users of FOR after the recipe defining the previous value Previous of the recurrence.

Definition VPlanTransforms.cpp:2087

static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan)

Definition VPlanTransforms.cpp:400

static VPActiveLaneMaskPHIRecipe * addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck)

Definition VPlanTransforms.cpp:2652

static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, VPTypeAnalysis &TypeInfo)

Expand a VPWidenPointerInductionRecipe into executable recipes, for the initial value,...

Definition VPlanTransforms.cpp:3616

static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL)

Replace recipes with their EVL variants.

Definition VPlanTransforms.cpp:2916

static bool isDeadRecipe(VPRecipeBase &R)

Returns true if R is dead and can be removed.

Definition VPlanTransforms.cpp:688

static void legalizeAndOptimizeInductions(VPlan &Plan)

Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd (IndStart, ScalarIVSteps (0,...

Definition VPlanTransforms.cpp:815

static void addReplicateRegions(VPlan &Plan)

Definition VPlanTransforms.cpp:532

static VPValue * tryToFoldLiveIns(VPSingleDefRecipe &R, ArrayRef< VPValue * > Operands, const DataLayout &DL, VPTypeAnalysis &TypeInfo)

Try to fold R using InstSimplifyFolder.

Definition VPlanTransforms.cpp:1144

static VPValue * tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC)

Compute and return the end value for WideIV, unless it is truncated.

Definition VPlanTransforms.cpp:4993

static void removeRedundantExpandSCEVRecipes(VPlan &Plan)

Remove redundant EpxandSCEVRecipes in Plan's entry block by replacing them with already existing reci...

Definition VPlanTransforms.cpp:1082

static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)

From the definition of llvm.experimental.get.vector.length, VPInstruction::ExplicitVectorLength(AVL) ...

Definition VPlanTransforms.cpp:2039

static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, ElementCount BestVF, unsigned BestUF, ScalarEvolution &SE)

Return true if Cond is known to be true for given BestVF and BestUF.

Definition VPlanTransforms.cpp:1825

static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)

Try to hoist Previous and its operands before all users of FOR.

Definition VPlanTransforms.cpp:2142

static VPValue * scalarizeVPWidenPointerInduction(VPWidenPointerInductionRecipe *PtrIV, VPlan &Plan, VPBuilder &Builder)

Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd (IndStart,...

Definition VPlanTransforms.cpp:790

static bool canSinkStoreWithNoAliasCheck(ArrayRef< VPReplicateRecipe * > StoresToSink, ScalarEvolution &SE, const Loop &L, VPTypeAnalysis &TypeInfo)

Definition VPlanTransforms.cpp:4367

static SmallVector< VPUser * > collectUsersRecursively(VPValue *V)

Definition VPlanTransforms.cpp:774

static void recursivelyDeleteDeadRecipes(VPValue *V)

Definition VPlanTransforms.cpp:1099

static VPValue * optimizeEarlyExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op, ScalarEvolution &SE)

Attempts to optimize the induction variable exit values for users in the early exit block.

Definition VPlanTransforms.cpp:954

static VPWidenInductionRecipe * getOptimizableIVOf(VPValue *VPV, ScalarEvolution &SE)

Check if VPV is an untruncated wide induction, either before or after the increment.

Definition VPlanTransforms.cpp:896

static VPRegionBlock * createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlan &Plan)

Definition VPlanTransforms.cpp:489

static VPBasicBlock * getPredicatedThenBlock(VPRegionBlock *R)

If R is a triangle region, return the 'then' block of the triangle.

Definition VPlanTransforms.cpp:378

static VPValue * narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl< VPValue * > &NarrowedOps)

Definition VPlanTransforms.cpp:4780

static bool canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, std::optional< SinkStoreInfo > SinkInfo={})

Check if a memory operation doesn't alias with memory operations in blocks between FirstBB and LastBB...

Definition VPlanTransforms.cpp:210

static void simplifyBlends(VPlan &Plan)

Normalize and simplify VPBlendRecipes.

Definition VPlanTransforms.cpp:1675

static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, ElementCount VF, VPTypeAnalysis &TypeInfo, TypeSize VectorRegWidth)

Returns true if IR is a full interleave group with factor and number of members both equal to VF.

Definition VPlanTransforms.cpp:4736

static VPRecipeBase * optimizeMaskToEVL(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, VPValue &EVL)

Try to optimize a CurRecipe masked by HeaderMask to a corresponding EVL-based recipe without the head...

Definition VPlanTransforms.cpp:2840

static bool isAlreadyNarrow(VPValue *VPV)

Returns true if VPValue is a narrow VPValue.

Definition VPlanTransforms.cpp:4770

static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF)

Optimize the width of vector induction variables in Plan based on a known constant Trip Count,...

Definition VPlanTransforms.cpp:1756

static VPExpressionRecipe * tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range)

This function tries convert extended in-loop reductions to VPExpressionRecipe and clamp the Range if ...

Definition VPlanTransforms.cpp:3929

static void expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, VPTypeAnalysis &TypeInfo)

Expand a VPWidenIntOrFpInduction into executable recipes, for the initial value, phi and backedge val...

Definition VPlanTransforms.cpp:3506

static VPSingleDefRecipe * findHeaderMask(VPlan &Plan)

Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...

Definition VPlanTransforms.cpp:2730

static void removeRedundantCanonicalIVs(VPlan &Plan)

Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV recipe, if it exists.

Definition VPlanTransforms.cpp:649

static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, VPValue *OpV, unsigned Idx)

Returns true if V is VPWidenLoadRecipe or VPInterleaveRecipe that can be converted to a narrower reci...

Definition VPlanTransforms.cpp:4720

static void narrowToSingleScalarRecipes(VPlan &Plan)

Definition VPlanTransforms.cpp:1574

This file provides utility VPlan to VPlan transformations.

This file declares the class VPlanVerifier, which contains utility functions to check the consistency...

This file contains the declarations of the Vectorization Plan base classes:

static const X86InstrFMA3Group Groups[]

static const uint32_t IV[8]

Helper for extra no-alias checks via known-safe recipe and SCEV.

Definition VPlanTransforms.cpp:144

SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, ScalarEvolution &SE, const Loop &L, VPTypeAnalysis &TypeInfo)

Definition VPlanTransforms.cpp:187

bool shouldSkip(VPRecipeBase &R) const

Return true if R should be skipped during alias checking, either because it's in the exclude set or b...

Definition VPlanTransforms.cpp:196

Class for arbitrary precision integers.

LLVM_ABI APInt zext(unsigned width) const

Zero extend to a new width.

unsigned getActiveBits() const

Compute the number of active bits in the value.

APInt abs() const

Get the absolute value.

unsigned getBitWidth() const

Return the number of bits in the APInt.

LLVM_ABI APInt sext(unsigned width) const

Sign extend to a new width.

bool uge(const APInt &RHS) const

Unsigned greater or equal comparison.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

const T & back() const

back - Get the last element.

const T & front() const

front - Get the first element.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

This class represents a function call, abstracting a target machine's calling convention.

@ ICMP_ULT

unsigned less than

@ ICMP_ULE

unsigned less or equal

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

A parsed version of the target data layout string in and methods for querying it.

static DebugLoc getCompilerGenerated()

static DebugLoc getUnknown()

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

dominates - Returns true iff A dominates B.

constexpr bool isVector() const

One or more elements.

static constexpr ElementCount getScalable(ScalarTy MinVal)

Utility class for floating point operations which can have information about relaxed accuracy require...

Represents flags for the getelementptr instruction/expression.

GEPNoWrapFlags withoutNoUnsignedWrap() const

static GEPNoWrapFlags none()

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

A struct for saving information about induction variables.

InductionKind

This enum represents the kinds of inductions that we support.

@ IK_PtrInduction

Pointer induction var. Step = C.

@ IK_IntInduction

Integer induction variable. Step = C.

InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

The group of interleaved loads/stores sharing the same stride and close to each other.

InstTy * getMember(uint32_t Index) const

Get the member with the given index Index.

uint32_t getNumMembers() const

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)

Test a Predicate on a Range of VF's.

Represents a single loop in the control flow graph.

LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)

Return metadata containing two branch weights.

This class implements a map that also provides access to all stored values in a deterministic order.

ValueT lookup(const KeyT &Key) const

Representation for a specific memory location.

AAMDNodes AATags

The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

LLVM_ABI const SCEV * getSCEV(Value *V)

Returns the SCEV expression of V, in the context of the current SCEV predicate.

static LLVM_ABI unsigned getOpcode(RecurKind Kind)

Returns the opcode corresponding to the RecurrenceKind.

unsigned getOpcode() const

RegionT * getParent() const

Get the parent of the Region.

This class uses information about analyze scalars to rewrite expressions in canonical form.

LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)

Insert code to directly compute the specified SCEV expression into the program.

static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)

This class represents an analyzed expression in the program.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

The main scalar evolution driver.

LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

Return the SCEV object corresponding to -V.

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)

Get a canonical unsigned division expression, or something simpler if possible.

LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)

Test if the given expression is known to satisfy the condition described by Pred, LHS,...

static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)

This class represents the LLVM 'select' instruction.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Provides information about what library functions are available for the current target.

static LLVM_ABI PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I)

Get the kind of extension that an instruction represents.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

PartialReductionExtendKind

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)

This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...

TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)

Add a case on the given type.

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isPointerTy() const

True if this is an instance of PointerType.

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

bool isStructTy() const

True if this is an instance of StructType.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...

VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.

void appendRecipe(VPRecipeBase *Recipe)

Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.

RecipeListTy::iterator iterator

Instruction iterators...

iterator begin()

Recipe iterator methods.

iterator_range< iterator > phis()

Returns an iterator range over the PHI-like recipes in the block.

iterator getFirstNonPhi()

Return the position of the first non-phi node recipe in the block.

VPRegionBlock * getEnclosingLoopRegion()

VPBasicBlock * splitAt(iterator SplitAt)

Split current block at SplitAt by inserting a new block between the current block and its successors ...

VPRecipeBase * getTerminator()

If the block has multiple successors, return the branch recipe terminating the block.

const VPRecipeBase & back() const

A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.

VPValue * getMask(unsigned Idx) const

Return mask number Idx.

unsigned getNumIncomingValues() const

Return the number of incoming values, taking into account when normalized the first incoming value wi...

void setMask(unsigned Idx, VPValue *V)

Set mask number Idx to V.

bool isNormalized() const

A normalized blend is one that has an odd number of operands, whereby the first operand does not have...

VPBlockBase is the building block of the Hierarchical Control-Flow Graph.

VPRegionBlock * getParent()

const VPBasicBlock * getExitingBasicBlock() const

size_t getNumSuccessors() const

void swapSuccessors()

Swap successors of the block. The block must have exactly 2 successors.

size_t getNumPredecessors() const

const VPBlocksTy & getPredecessors() const

VPBlockBase * getSinglePredecessor() const

const VPBasicBlock * getEntryBasicBlock() const

VPBlockBase * getSingleHierarchicalPredecessor()

VPBlockBase * getSingleSuccessor() const

const VPBlocksTy & getSuccessors() const

static auto blocksOnly(const T &Range)

Return an iterator range over Range which only includes BlockTy blocks.

static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)

Inserts BlockPtr on the edge between From and To.

static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)

Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.

static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)

Connect VPBlockBases From and To bi-directionally.

static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)

Disconnect VPBlockBases From and To bi-directionally.

A recipe for generating conditional branches on the bits of a mask.

RAII object that stores the current insertion point and restores it when the object is destroyed.

VPlan-based builder utility analogous to IRBuilder.

VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)

VPValue * createElementCount(Type *Ty, ElementCount EC)

VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})

VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")

Convert the input value Current to the corresponding value of an induction with Start and Step values...

static VPBuilder getToInsertAfter(VPRecipeBase *R)

Create a VPBuilder to insert after R.

VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")

void setInsertPoint(VPBasicBlock *TheBB)

This specifies that created VPInstructions should be appended to the end of the specified block.

VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.

Canonical scalar induction phi of the vector loop.

unsigned getNumDefinedValues() const

Returns the number of values defined by the VPDef.

ArrayRef< VPValue * > definedValues()

Returns an ArrayRef of the values defined by the VPDef.

VPValue * getVPSingleValue()

Returns the only VPValue defined by the VPDef.

VPValue * getVPValue(unsigned I)

Returns the VPValue with index I defined by the VPDef.

A recipe for converting the input value IV value to the corresponding value of an IV with different s...

Template specialization of the standard LLVM dominator tree utility for VPBlockBases.

bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)

A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...

A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...

A special type of VPBasicBlock that wraps an existing IR basic block.

BasicBlock * getIRBasicBlock() const

Class to record and manage LLVM IR flags.

static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)

Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.

This is a concrete Recipe that models a single VPlan-level instruction.

@ ExtractLane

Extracts a single lane (first operand) from a set of vector operands.

@ ExtractPenultimateElement

@ Unpack

Extracts all lanes from its (non-scalable) vector operand.

@ FirstOrderRecurrenceSplice

@ BuildVector

Creates a fixed-width vector containing all operands.

@ BuildStructVector

Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...

@ CanonicalIVIncrementForPart

@ CalculateTripCountMinusVF

const InterleaveGroup< Instruction > * getInterleaveGroup() const

VPValue * getMask() const

Return the mask used by this recipe.

ArrayRef< VPValue * > getStoredValues() const

Return the VPValues stored by this interleave group.

A recipe for interleaved memory operations with vector-predication intrinsics.

VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...

VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...

VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.

VPRegionBlock * getRegion()

VPBasicBlock * getParent()

DebugLoc getDebugLoc() const

Returns the debug location of the recipe.

void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)

Unlink this recipe and insert into BB before I.

void insertBefore(VPRecipeBase *InsertPos)

Insert an unlinked recipe into a basic block immediately before the specified recipe.

void insertAfter(VPRecipeBase *InsertPos)

Insert an unlinked Recipe into a basic block immediately after the specified Recipe.

iplist< VPRecipeBase >::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Helper class to create VPRecipies from IR instructions.

VPRecipeBase * getRecipe(Instruction *I)

Return the recipe created for given ingredient.

A recipe to represent inloop reduction operations with vector-predication intrinsics,...

A recipe to represent inloop, ordered or partial reduction operations.

VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...

const VPBlockBase * getEntry() const

Type * getCanonicalIVType()

Return the type of the canonical IV for loop regions.

bool isReplicator() const

An indicator whether this region is to generate multiple replicated instances of output IR correspond...

void setExiting(VPBlockBase *ExitingBlock)

Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.

VPCanonicalIVPHIRecipe * getCanonicalIV()

Returns the canonical induction recipe of the region.

const VPBlockBase * getExiting() const

VPBasicBlock * getPreheaderVPBB()

Returns the pre-header VPBasicBlock of the loop region.

VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...

bool isSingleScalar() const

VPValue * getMask()

Return the mask of a predicated VPReplicateRecipe.

A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...

VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...

Instruction * getUnderlyingInstr()

Returns the underlying instruction.

VPSingleDefRecipe * clone() override=0

Clone the current recipe.

An analysis for type-inference for VPValues.

LLVMContext & getContext()

Return the LLVMContext used by the analysis.

Type * inferScalarType(const VPValue *V)

Infer the type of V. Returns the scalar type of V.

This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...

void setOperand(unsigned I, VPValue *New)

VPValue * getOperand(unsigned N) const

void addOperand(VPValue *Operand)

This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...

bool isDefinedOutsideLoopRegions() const

Returns true if the VPValue is defined outside any loop.

VPRecipeBase * getDefiningRecipe()

Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...

Value * getLiveInIRValue() const

Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.

Value * getUnderlyingValue() const

Return the underlying Value attached to this VPValue.

void setUnderlyingValue(Value *Val)

void replaceAllUsesWith(VPValue *New)

unsigned getNumUsers() const

bool isLiveIn() const

Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.

void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)

Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...

A recipe to compute a pointer to the last element of each part of a widened memory access for widened...

A Recipe for widening the canonical induction variable of the vector loop.

VPWidenCastRecipe is a recipe to create vector cast instructions.

Instruction::CastOps getOpcode() const

A recipe for handling GEP instructions.

Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...

PHINode * getPHINode() const

VPValue * getStepValue()

Returns the step value of the induction.

const InductionDescriptor & getInductionDescriptor() const

Returns the induction descriptor for the recipe.

A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...

VPValue * getLastUnrolledPartOperand()

Returns the VPValue representing the value of this induction at the last unrolled part,...

VPValue * getSplatVFValue()

A recipe for widening vector intrinsics.

A common base class for widening memory operations.

A recipe for widened phis.

VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

bool hasVF(ElementCount VF) const

LLVMContext & getContext() const

VPBasicBlock * getEntry()

VPValue & getVectorTripCount()

The vector trip count.

bool hasScalableVF() const

VPValue & getVFxUF()

Returns VF * UF of the vector loop region.

VPValue & getVF()

Returns the VF of the vector loop region.

VPValue * getTripCount() const

The trip count of the original loop.

VPValue * getTrue()

Return a VPValue wrapping i1 true.

VPValue * getOrCreateBackedgeTakenCount()

The backedge taken count of the original loop.

VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")

Create a new replicate region with Entry, Exiting and Name.

auto getLiveIns() const

Return the list of live-in VPValues available in the VPlan.

bool hasUF(unsigned UF) const

ArrayRef< VPIRBasicBlock * > getExitBlocks() const

Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.

VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)

Return a VPValue wrapping a ConstantInt with the given type and value.

void setVF(ElementCount VF)

bool isUnrolled() const

Returns true if the VPlan already has been unrolled, i.e.

LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()

Returns the VPRegionBlock of the vector loop.

void resetTripCount(VPValue *NewTripCount)

Resets the trip count for the VPlan.

VPBasicBlock * getMiddleBlock()

Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...

VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)

Create a new VPBasicBlock with Name and containing Recipe if present.

VPValue * getFalse()

Return a VPValue wrapping i1 false.

VPValue * getOrAddLiveIn(Value *V)

Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.

bool hasScalarVFOnly() const

VPBasicBlock * getScalarPreheader() const

Return the VPBasicBlock for the preheader of the scalar loop.

VPIRBasicBlock * getScalarHeader() const

Return the VPIRBasicBlock wrapping the header of the scalar loop.

VPValue * getLiveIn(Value *V) const

Return the live-in VPValue for V, if there is one or nullptr otherwise.

VPBasicBlock * getVectorPreheader()

Returns the preheader of the vector loop region, if one exists, or null otherwise.

bool hasScalarTail() const

Returns true if the scalar tail may execute after the vector loop.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

iterator_range< user_iterator > users()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

constexpr ScalarTy getFixedValue() const

static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const

constexpr bool isFixed() const

Returns true if the quantity is not scaled by vscale.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)

Return A unsign-divided by B, rounded by the given rounding mode.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)

Matches a register not-ed by a G_XOR.

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)

Matches MaskedStore Intrinsic.

ap_match< APInt > m_APInt(const APInt *&Res)

Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.

CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)

Matches Trunc.

LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)

Matches L && R either in the form of L & R or L ?

match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)

Matches MaskedLoad Intrinsic.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

deferredval_ty< Value > m_Deferred(Value *const &V)

Like m_Specific(), but works if the specific value to match is determined as part of the same match()...

SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

auto m_LogicalAnd()

Matches L && R where L and R are arbitrary values.

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)

Matches a Mul with LHS and RHS in either order.

MatchFunctor< Val, Pattern > match_fn(const Pattern &P)

A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.

BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

bind_cst_ty m_scev_APInt(const APInt *&C)

Match an SCEV constant and bind it to an APInt.

bool match(const SCEV *S, const Pattern &P)

VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)

AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)

Match a binary AND operation.

AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)

Match a binary OR operation.

VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()

AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)

AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)

GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)

AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)

VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)

VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)

specific_intval< 1 > m_False()

VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)

VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)

VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)

VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()

specific_intval< 1 > m_True()

VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)

VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)

VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)

class_match< VPValue > m_VPValue()

Match an arbitrary VPValue and ignore it.

VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)

VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()

BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...

VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)

VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)

bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)

Match a VPInstruction, capturing if we match.

VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()

VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)

NodeAddr< DefNode * > Def

bool isSingleScalar(const VPValue *VPV)

Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...

bool isUniformAcrossVFsAndUFs(VPValue *V)

Checks if V is uniform across all VF lanes and UF parts.

VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)

Get or create a VPValue that corresponds to the expansion of Expr.

std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)

Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...

bool onlyFirstLaneUsed(const VPValue *Def)

Returns true if only the first lane of Def is used.

VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)

Extracts and returns NoWrap and FastMath flags from the induction binop in ID.

bool onlyScalarValuesUsed(const VPValue *Def)

Returns true if only scalar values of Def are used by all users.

bool isHeaderMask(const VPValue *V, const VPlan &Plan)

Return true if V is a header mask in Plan.

const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)

Return the SCEV expression for V.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

auto min_element(R &&Range)

Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

constexpr from_range_t from_range

auto dyn_cast_if_present(const Y &Val)

dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

auto cast_or_null(const Y &Val)

iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order.

iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...

detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)

Returns a concatenated range across two or more ranges.

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in post order while traversing through ...

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)

Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...

bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)

Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

auto drop_end(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the last N elements excluded.

RecurKind

These are the kinds of recurrences that we support.

@ Mul

Product of integers.

@ Sub

Subtraction of integers.

@ AddChainWithSubs

A chain of adds and subs.

FunctionAddr VTableAddr Next

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

DWARFExpression::Operation Op

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

@ DataAndControlFlowWithoutRuntimeCheck

Use predicate to control both data and control flow, but modify the trip count so that a runtime over...

hash_code hash_combine(const Ts &...args)

Combine values into a single hash_code.

bool equal(L &&LRange, R &&RRange)

Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.

Type * toVectorTy(Type *Scalar, ElementCount EC)

A helper function for converting Scalar types to vector types.

@ Default

The result values are uniform if and only if all operands are uniform.

constexpr detail::IsaCheckPredicate< Types... > IsaPred

Function object wrapper for the llvm::isa type check.

hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)

Compute a hash_code for a sequence of values.

RemoveMask_match(const Op0_t &In, Op1_t &Out)

Definition VPlanTransforms.cpp:2813

bool match(OpTy *V) const

Definition VPlanTransforms.cpp:2815

Op0_t In

Definition VPlanTransforms.cpp:2810

Op1_t & Out

Definition VPlanTransforms.cpp:2811

A collection of metadata nodes that might be associated with a memory access used by the alias-analys...

MDNode * Scope

The tag for alias scope specification (used with noalias).

MDNode * NoAlias

The tag specifying the noalias scope.

This struct is a compact representation of a valid (non-zero power of two) alignment.

An information struct used to provide DenseMap with the various necessary components for a given valu...

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

A range of powers-of-2 vectorization factors with fixed start and adjustable end.

Struct to hold various analysis needed for cost computations.

A recipe for handling first-order recurrence phis.

A recipe for widening load operations with vector-predication intrinsics, using the address to load f...

A recipe for widening load operations, using the address to load from and an optional mask.

A recipe for widening select instructions.

A recipe for widening store operations with vector-predication intrinsics, using the value to store,...

A recipe for widening store operations, using the stored value, the address to store to and an option...

static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, const Loop *L)

Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...

Definition VPlanTransforms.cpp:4317

static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, const Loop *L)

Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...

Definition VPlanTransforms.cpp:4385

static void materializeBroadcasts(VPlan &Plan)

Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...

Definition VPlanTransforms.cpp:4130

static void materializePacksAndUnpacks(VPlan &Plan)

Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...

Definition VPlanTransforms.cpp:4483

static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)

Materialize the backedge-taken count to be computed explicitly using VPInstructions.

Definition VPlanTransforms.cpp:4469

static void optimizeInductionExitUsers(VPlan &Plan, DenseMap< VPValue *, VPValue * > &EndValues, ScalarEvolution &SE)

If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...

Definition VPlanTransforms.cpp:1055

static void hoistInvariantLoads(VPlan &Plan)

Hoist single-scalar loads with invariant addresses out of the vector loop to the preheader,...

Definition VPlanTransforms.cpp:4175

static void canonicalizeEVLLoops(VPlan &Plan)

Transform EVL loops to use variable-length stepping after region dissolution.

Definition VPlanTransforms.cpp:3147

static void dropPoisonGeneratingRecipes(VPlan &Plan, const std::function< bool(BasicBlock *)> &BlockNeedsPredication)

Drop poison flags from recipes that may generate a poison value that is used after vectorization,...

Definition VPlanTransforms.cpp:3283

static void createAndOptimizeReplicateRegions(VPlan &Plan)

Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...

Definition VPlanTransforms.cpp:598

static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed)

Definition VPlanTransforms.cpp:3380

static bool runPass(bool(*Transform)(VPlan &, ArgsTy...), VPlan &Plan, typename std::remove_reference< ArgsTy >::type &...Args)

Helper to run a VPlan transform Transform on VPlan, forwarding extra arguments to the transform.

static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)

Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.

Definition VPlanTransforms.cpp:4968

static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, TypeSize VectorRegWidth)

Try to convert a plan with interleave groups with VF elements to a plan with the interleave groups re...

Definition VPlanTransforms.cpp:4830

static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)

Expand VPExpandSCEVRecipes in Plan's entry block.

Definition VPlanTransforms.cpp:4672

static void convertToConcreteRecipes(VPlan &Plan)

Lower abstract recipes to concrete ones, that can be codegen'd.

Definition VPlanTransforms.cpp:3670

static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)

This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...

Definition VPlanTransforms.cpp:4119

static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)

Definition VPlanTransforms.cpp:4439

static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, function_ref< const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, const TargetLibraryInfo &TLI)

Replaces the VPInstructions in Plan with corresponding widen recipes.

Definition VPlanTransforms.cpp:49

static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range)

Handle users in the exit block for first order reductions in the original exit block.

Definition VPlanTransforms.cpp:5075

static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)

Add a VPEVLBasedIVPHIRecipe and related recipes to Plan and replaces all uses except the canonical IV...

Definition VPlanTransforms.cpp:3085

static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)

Replace symbolic strides from StridesMap in Plan with constants when possible.

Definition VPlanTransforms.cpp:3227

static void removeBranchOnConst(VPlan &Plan)

Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...

Definition VPlanTransforms.cpp:2565

static void removeDeadRecipes(VPlan &Plan)

Remove dead recipes from Plan.

Definition VPlanTransforms.cpp:705

static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue)

Materialize vector trip count computations to a set of VPInstructions.

Definition VPlanTransforms.cpp:4576

static void simplifyRecipes(VPlan &Plan)

Perform instcombine-like simplifications on recipes in Plan.

Definition VPlanTransforms.cpp:1563

static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)

Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.

Definition VPlanTransforms.cpp:3783

static void clearReductionWrapFlags(VPlan &Plan)

Clear NSW/NUW flags from reduction instructions if necessary.

Definition VPlanTransforms.cpp:2312

static void cse(VPlan &Plan)

Perform common-subexpression-elimination on Plan.

Definition VPlanTransforms.cpp:2421

static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck)

Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...

Definition VPlanTransforms.cpp:2772

static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)

Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...

Definition VPlanTransforms.cpp:2599

static void dissolveLoopRegions(VPlan &Plan)

Replace loop regions with explicit CFG.

Definition VPlanTransforms.cpp:3658

static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)

Insert truncates and extends for any truncated recipe.

Definition VPlanTransforms.cpp:2472

static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder)

Try to have all users of fixed-order recurrences appear after the recipe defining their previous valu...

Definition VPlanTransforms.cpp:2238

static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)

Optimize Plan based on BestVF and BestUF.

Definition VPlanTransforms.cpp:2066

static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)

Materialize VF and VFxUF to be computed explicitly using VPInstructions.

Definition VPlanTransforms.cpp:4636

static void updateScalarResumePhis(VPlan &Plan, DenseMap< VPValue *, VPValue * > &IVEndValues)

Update the resume phis in the scalar preheader after creating wide recipes for first-order recurrence...

Definition VPlanTransforms.cpp:5025