LLVM: lib/Transforms/Vectorize/VPlanTransforms.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

44

45using namespace llvm;

47

51 GetIntOrFpInductionDescriptor,

53

57

58 if (!VPBB->getParent())

59 break;

61 auto EndIter = Term ? Term->getIterator() : VPBB->end();

62

65

66 VPValue *VPV = Ingredient.getVPSingleValue();

68 continue;

69

71

74 auto *Phi = cast(PhiR->getUnderlyingValue());

75 const auto *II = GetIntOrFpInductionDescriptor(Phi);

76 if (II) {

77 NewRecipe = new VPWidenPHIRecipe(Phi, nullptr, PhiR->getDebugLoc());

78 for (VPValue *Op : PhiR->operands())

80 } else {

84

85

86

89 Phi, Start, Step, &Plan.getVF(), *II, Flags,

90 Ingredient.getDebugLoc());

91 }

92 } else {

95

98 *Load, Ingredient.getOperand(0), nullptr ,

99 false , false , *VPI,

100 Ingredient.getDebugLoc());

103 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),

104 nullptr , false , false , *VPI,

105 Ingredient.getDebugLoc());

108 Ingredient.getDebugLoc());

112 return false;

116 *VPI, CI->getDebugLoc());

119 *VPI, Ingredient.getDebugLoc());

122 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,

124 } else {

126 *VPI, Ingredient.getDebugLoc());

127 }

128 }

129

133 else

135 "Only recpies with zero or one defined values expected");

136 Ingredient.eraseFromParent();

137 }

138 }

139 return true;

140}

141

142

143

144

145

148 bool CheckReads,

151 return false;

152

154

156 Block = Block->getSingleSuccessor()) {

158 "Expected at most one successor in block chain");

161 if (ExcludeRecipes && ExcludeRecipes->contains(&R))

162 continue;

163

164

165 if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))

166 continue;

167

169 if (Loc)

170

171

172 return false;

173

174

175

176 if (CheckReads && R.mayReadFromMemory() &&

179 continue;

180

181

183 Loc->AATags.NoAlias))

184 return false;

185 }

186

187 if (Block == LastBB)

188 break;

189 }

190 return true;

191}

192

193

194

196

197

199 return false;

200

201

202

203 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())

204 return true;

205

206

208 return RepR && RepR->getOpcode() == Instruction::Alloca;

209}

210

215

217 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](

219 auto *Candidate =

221 if (!Candidate)

222 return;

223

224

225

227 return;

228

230 return;

231

233 if (!ScalarVFOnly && RepR->isSingleScalar())

234 return;

235

236 WorkList.insert({SinkTo, Candidate});

237 };

238

239

240

243 if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)

244 continue;

247 continue;

248 for (auto &Recipe : *VPBB)

249 for (VPValue *Op : Recipe.operands())

250 InsertIfValidSinkCandidate(VPBB, Op);

251 }

252

253

254 for (unsigned I = 0; I != WorkList.size(); ++I) {

257 std::tie(SinkTo, SinkCandidate) = WorkList[I];

258

259

260

261

262 auto UsersOutsideSinkTo =

264 return cast(U)->getParent() != SinkTo;

265 });

266 if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {

267 return !U->usesFirstLaneOnly(SinkCandidate);

268 }))

269 continue;

270 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();

271

272 if (NeedsDuplicating) {

273 if (ScalarVFOnly)

274 continue;

276 if (auto *SinkCandidateRepR =

278

279

282 nullptr , *SinkCandidateRepR,

283 *SinkCandidateRepR);

284

285 } else {

286 Clone = SinkCandidate->clone();

287 }

288

292 });

293 }

296 InsertIfValidSinkCandidate(SinkTo, Op);

298 }

300}

301

302

303

306 if (!EntryBB || EntryBB->size() != 1 ||

308 return nullptr;

309

311}

312

313

316 if (EntryBB->getNumSuccessors() != 2)

317 return nullptr;

318

321 if (!Succ0 || !Succ1)

322 return nullptr;

323

324 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)

325 return nullptr;

326 if (Succ0->getSingleSuccessor() == Succ1)

327 return Succ0;

328 if (Succ1->getSingleSuccessor() == Succ0)

329 return Succ1;

330 return nullptr;

331}

332

333

334

335

338

339

340

341

345 if (!Region1->isReplicator())

346 continue;

347 auto *MiddleBasicBlock =

349 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())

350 continue;

351

352 auto *Region2 =

354 if (!Region2 || !Region2->isReplicator())

355 continue;

356

359 if (!Mask1 || Mask1 != Mask2)

360 continue;

361

362 assert(Mask1 && Mask2 && "both region must have conditions");

364 }

365

366

368 if (TransformedRegions.contains(Region1))

369 continue;

370 auto *MiddleBasicBlock = cast(Region1->getSingleSuccessor());

371 auto *Region2 = cast(MiddleBasicBlock->getSingleSuccessor());

372

375 if (!Then1 || !Then2)

376 continue;

377

378

379

380

381

382

385

388

389

390

391

395 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();

398 });

399

400

401 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {

402 Phi1ToMove.eraseFromParent();

403 continue;

404 }

405 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());

406 }

407

408

411 R.eraseFromParent();

412

413

417 }

419 TransformedRegions.insert(Region1);

420 }

421

422 return !TransformedRegions.empty();

423}

424

428

429 std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();

430 assert(Instr->getParent() && "Predicated instruction not in any basic block");

431 auto *BlockInMask = PredRecipe->getMask();

435 auto *Entry =

437

438

439

442 PredRecipe->isSingleScalar(), nullptr , *PredRecipe, *PredRecipe,

444 auto *Pred =

446

450 RecipeWithoutMask->getDebugLoc());

452 PHIRecipe->setOperand(0, RecipeWithoutMask);

453 }

455 auto *Exiting =

459

460

461

464

466}

467

474 if (RepR->isPredicated())

476 }

477 }

478

479 unsigned BBNum = 0;

483

487

491

493 if (ParentRegion && ParentRegion->getExiting() == CurrentBlock)

495 }

496}

497

498

499

504

505

506

507 if (!VPBB->getParent())

508 continue;

509 auto *PredVPBB =

511 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||

513 continue;

515 }

516

520 R.moveBefore(*PredVPBB, PredVPBB->end());

522 auto *ParentRegion = VPBB->getParent();

523 if (ParentRegion && ParentRegion->getExiting() == VPBB)

524 ParentRegion->setExiting(PredVPBB);

525 for (auto *Succ : to_vector(VPBB->successors())) {

528 }

529

530 }

531 return !WorkList.empty();

532}

533

535

537

538 bool ShouldSimplify = true;

539 while (ShouldSimplify) {

543 }

544}

545

546

547

548

549

550

551

555 if (IV || IV->getTruncInst())

556 continue;

557

558

559

560

561

562

563

564

565

570 for (auto *U : FindMyCast->users()) {

572 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {

573 FoundUserCast = UserCast;

574 break;

575 }

576 }

577 FindMyCast = FoundUserCast;

578 }

580 }

581}

582

583

584

591 if (WidenNewIV)

592 break;

593 }

594

595 if (!WidenNewIV)

596 return;

597

601

602 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())

603 continue;

604

605

606

607

608

611

612

613

614

615 WidenOriginalIV->dropPoisonGeneratingFlags();

618 return;

619 }

620 }

621}

622

623

625

626

628 bool IsConditionalAssume = RepR && RepR->isPredicated() &&

630 if (IsConditionalAssume)

631 return true;

632

633 if (R.mayHaveSideEffects())

634 return false;

635

636

637 return all_of(R.definedValues(),

638 [](VPValue *V) { return V->getNumUsers() == 0; });

639}

640

644

645

648 R.eraseFromParent();

649 continue;

650 }

651

652

654 if (!PhiR || PhiR->getNumOperands() != 2)

655 continue;

656 VPUser *PhiUser = PhiR->getSingleUser();

657 if (!PhiUser)

658 continue;

660 if (PhiUser != Incoming->getDefiningRecipe() ||

661 Incoming->getNumUsers() != 1)

662 continue;

663 PhiR->replaceAllUsesWith(PhiR->getOperand(0));

664 PhiR->eraseFromParent();

665 Incoming->getDefiningRecipe()->eraseFromParent();

666 }

667 }

668}

669

680 Kind, FPBinOp, StartV, CanonicalIV, Step, "offset.idx");

681

682

685 if (TruncI) {

688 "Not truncating.");

689 assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");

690 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy, DL);

691 ResultTy = TruncTy;

692 }

693

694

696 if (ResultTy != StepTy) {

698 "Not truncating.");

699 assert(StepTy->isIntegerTy() && "Truncation requires an integer type");

700 auto *VecPreheader =

703 Builder.setInsertPoint(VecPreheader);

704 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy, DL);

705 }

706 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,

708}

709

712 for (unsigned I = 0; I != Users.size(); ++I) {

715 continue;

717 Users.insert_range(V->users());

718 }

719 return Users.takeVector();

720}

721

722

723

724

733 nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);

734

735 return Builder.createPtrAdd(PtrIV->getStartValue(), Steps,

737}

738

739

740

741

742

743

744

745

746

747

748

749

750

757 if (!PhiR)

758 continue;

759

760

761

762

763

768

770 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||

771 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))

772 continue;

773

774

776 continue;

777

779 Def->operands(), true,

780 nullptr, *Def);

781 Clone->insertAfter(Def);

782 Def->replaceAllUsesWith(Clone);

783 }

784

785

786

789 !PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))

790 continue;

791

793 PtrIV->replaceAllUsesWith(PtrAdd);

794 continue;

795 }

796

797

798

800 if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {

801 return U->usesScalars(WideIV);

802 }))

803 continue;

804

807 Plan, ID.getKind(), ID.getInductionOpcode(),

809 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),

810 WideIV->getDebugLoc(), Builder);

811

812

813 if (!HasOnlyVectorVFs) {

815 "plans containing a scalar VF cannot also include scalable VFs");

816 WideIV->replaceAllUsesWith(Steps);

817 } else {

819 WideIV->replaceUsesWithIf(Steps,

820 [WideIV, HasScalableVF](VPUser &U, unsigned) {

821 if (HasScalableVF)

822 return U.usesFirstLaneOnly(WideIV);

823 return U.usesScalars(WideIV);

824 });

825 }

826 }

827}

828

829

830

831

835 if (WideIV) {

836

837

839 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;

840 }

841

842

844 if (!Def || Def->getNumOperands() != 2)

845 return nullptr;

847 if (!WideIV)

849 if (!WideIV)

850 return nullptr;

851

852 auto IsWideIVInc = [&]() {

853 auto &ID = WideIV->getInductionDescriptor();

854

855

856 VPValue *IVStep = WideIV->getStepValue();

857 switch (ID.getInductionOpcode()) {

858 case Instruction::Add:

860 case Instruction::FAdd:

863 case Instruction::FSub:

866 case Instruction::Sub: {

867

868

871 return false;

877 }

878 default:

881 m_Specific(WideIV->getStepValue())));

882 }

883 llvm_unreachable("should have been covered by switch above");

884 };

885 return IsWideIVInc() ? WideIV : nullptr;

886}

887

888

889

898 return nullptr;

899

901 if (!WideIV)

902 return nullptr;

903

905 if (WideIntOrFp && WideIntOrFp->getTruncInst())

906 return nullptr;

907

908

913

915 VPValue *FirstActiveLane =

918 FirstActiveLane = B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,

919 FirstActiveLaneType, DL);

921 B.createNaryOp(Instruction::Add, {CanonicalIV, FirstActiveLane}, DL);

922

923

924

925

928 EndValue = B.createNaryOp(Instruction::Add, {EndValue, One}, DL);

929 }

930

931 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {

933 VPValue *Start = WideIV->getStartValue();

934 VPValue *Step = WideIV->getStepValue();

935 EndValue = B.createDerivedIV(

937 Start, EndValue, Step);

938 }

939

940 return EndValue;

941}

942

943

944

950 return nullptr;

951

953 if (!WideIV)

954 return nullptr;

955

957 assert(EndValue && "end value must have been pre-computed");

958

959

960

961

963 return EndValue;

964

965

967 VPValue *Step = WideIV->getStepValue();

970 return B.createNaryOp(Instruction::Sub, {EndValue, Step},

975 return B.createPtrAdd(EndValue,

976 B.createNaryOp(Instruction::Sub, {Zero, Step}),

978 }

980 const auto &ID = WideIV->getInductionDescriptor();

981 return B.createNaryOp(

982 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd

983 ? Instruction::FSub

984 : Instruction::FAdd,

985 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});

986 }

987 llvm_unreachable("all possible induction types must be handled");

988 return nullptr;

989}

990

999

1000 for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) {

1001 VPValue *Escape = nullptr;

1002 if (PredVPBB == MiddleVPBB)

1004 ExitIRI->getOperand(Idx),

1005 EndValues, SE);

1006 else

1008 ExitIRI->getOperand(Idx), SE);

1009 if (Escape)

1010 ExitIRI->setOperand(Idx, Escape);

1011 }

1012 }

1013 }

1014}

1015

1016

1017

1020

1024 if (!ExpR)

1025 continue;

1026

1027 const auto &[V, Inserted] = SCEV2VPV.try_emplace(ExpR->getSCEV(), ExpR);

1028 if (Inserted)

1029 continue;

1030 ExpR->replaceAllUsesWith(V->second);

1031 ExpR->eraseFromParent();

1032 }

1033}

1034

1039

1040 while (!WorkList.empty()) {

1042 if (!Seen.insert(Cur).second)

1043 continue;

1045 if (!R)

1046 continue;

1048 continue;

1050 R->eraseFromParent();

1051 }

1052}

1053

1054

1055

1056

1057static std::optional<std::pair<bool, unsigned>>

1060 std::optional<std::pair<bool, unsigned>>>(R)

1063 [](auto *I) { return std::make_pair(false, I->getOpcode()); })

1064 .Case([](auto *I) {

1065 return std::make_pair(true, I->getVectorIntrinsicID());

1066 })

1067 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](auto *I) {

1068

1069

1070

1071 return std::make_pair(false,

1073 })

1074 .Default([](auto *) { return std::nullopt; });

1075}

1076

1077

1078

1079

1085 if (!OpcodeOrIID)

1086 return nullptr;

1087

1090 if (Op->isLiveIn() || Op->getLiveInIRValue())

1091 return nullptr;

1092 Ops.push_back(Op->getLiveInIRValue());

1093 }

1094

1095 auto FoldToIRValue = [&]() -> Value * {

1097 if (OpcodeOrIID->first) {

1098 if (R.getNumOperands() != 2)

1099 return nullptr;

1100 unsigned ID = OpcodeOrIID->second;

1101 return Folder.FoldBinaryIntrinsic(ID, Ops[0], Ops[1],

1103 }

1104 unsigned Opcode = OpcodeOrIID->second;

1111 switch (Opcode) {

1113 return Folder.FoldSelect(Ops[0], Ops[1],

1116 return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],

1118 case Instruction::Select:

1119 return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);

1120 case Instruction::ICmp:

1121 case Instruction::FCmp:

1123 Ops[1]);

1124 case Instruction::GetElementPtr: {

1127 return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0],

1129 }

1135

1136

1137 case Instruction::ExtractElement:

1138 assert(Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");

1139 return Ops[0];

1140 }

1141 return nullptr;

1142 };

1143

1144 if (Value *V = FoldToIRValue())

1145 return R.getParent()->getPlan()->getOrAddLiveIn(V);

1146 return nullptr;

1147}

1148

1149

1151 VPlan *Plan = Def->getParent()->getPlan();

1152

1153

1154

1158 return Def->replaceAllUsesWith(V);

1159

1160

1162 VPValue *Op = PredPHI->getOperand(0);

1163 if (Op->isLiveIn())

1164 PredPHI->replaceAllUsesWith(Op);

1165 }

1166

1172 if (TruncTy == ATy) {

1173 Def->replaceAllUsesWith(A);

1174 } else {

1175

1177 return;

1179

1181 ? Instruction::SExt

1182 : Instruction::ZExt;

1184 TruncTy);

1185 if (auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {

1186

1187 Ext->setUnderlyingValue(UnderlyingExt);

1188 }

1189 Def->replaceAllUsesWith(Ext);

1191 auto *Trunc = Builder.createWidenCast(Instruction::Trunc, A, TruncTy);

1192 Def->replaceAllUsesWith(Trunc);

1193 }

1194 }

1195#ifndef NDEBUG

1196

1197

1200 for (VPUser *U : A->users()) {

1202 for (VPValue *VPV : R->definedValues())

1204 }

1205#endif

1206 }

1207

1208

1209

1210

1211

1216 Def->replaceAllUsesWith(X);

1217 Def->eraseFromParent();

1218 return;

1219 }

1220

1221

1223 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) == X));

1224

1225

1227 return Def->replaceAllUsesWith(X);

1228

1229

1231 return Def->replaceAllUsesWith(Def->getOperand(Def->getOperand(0) == X));

1232

1233

1235 return Def->replaceAllUsesWith(Def->getOperand(1));

1236

1237

1240

1241

1242 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||

1243 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))

1244 return Def->replaceAllUsesWith(

1245 Builder.createLogicalAnd(X, Builder.createOr(Y, Z)));

1246

1247

1249 return Def->replaceAllUsesWith(Plan->getFalse());

1250

1252 return Def->replaceAllUsesWith(X);

1253

1254

1257 Def->setOperand(0, C);

1258 Def->setOperand(1, Y);

1259 Def->setOperand(2, X);

1260 return;

1261 }

1262

1263

1264

1265

1268 X->hasMoreThanOneUniqueUser())

1269 return Def->replaceAllUsesWith(

1270 Builder.createLogicalAnd(X, Builder.createLogicalAnd(Y, Z)));

1271

1273 return Def->replaceAllUsesWith(A);

1274

1276 return Def->replaceAllUsesWith(A);

1277

1279 return Def->replaceAllUsesWith(

1280 Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0));

1281

1284 return Def->replaceAllUsesWith(A);

1285

1286

1290 if (all_of(Cmp->users(),

1298

1299 R->setOperand(1, Y);

1300 R->setOperand(2, X);

1301 } else {

1302

1304 R->replaceAllUsesWith(Cmp);

1305 }

1306 }

1307

1308

1309 if (!Cmp->getDebugLoc() && Def->getDebugLoc())

1310 Cmp->setDebugLoc(Def->getDebugLoc());

1311 }

1312 }

1313 }

1314

1315

1316

1320 for (VPValue *Op : Def->operands()) {

1322 if (Op->getNumUsers() > 1 ||

1326 } else if (!UnpairedCmp) {

1327 UnpairedCmp = Op->getDefiningRecipe();

1328 } else {

1331 UnpairedCmp = nullptr;

1332 }

1333 }

1334

1335 if (UnpairedCmp)

1337

1338 if (NewOps.size() < Def->getNumOperands()) {

1340 return Def->replaceAllUsesWith(NewAnyOf);

1341 }

1342 }

1343

1344

1345

1346

1352 return Def->replaceAllUsesWith(NewCmp);

1353 }

1354

1355

1360 return Def->replaceAllUsesWith(Def->getOperand(1));

1361

1366 X = Builder.createWidenCast(Instruction::Trunc, X, WideStepTy);

1367 Def->replaceAllUsesWith(X);

1368 return;

1369 }

1370

1371

1372

1377 Def->setOperand(1, Def->getOperand(0));

1378 Def->setOperand(0, Y);

1379 return;

1380 }

1381

1383 if (Phi->getOperand(0) == Phi->getOperand(1))

1384 Phi->replaceAllUsesWith(Phi->getOperand(0));

1385 return;

1386 }

1387

1388

1391 Def->replaceAllUsesWith(

1392 BuildVector->getOperand(BuildVector->getNumOperands() - 1));

1393 return;

1394 }

1395

1396

1399 Def->replaceAllUsesWith(

1400 BuildVector->getOperand(BuildVector->getNumOperands() - 2));

1401 return;

1402 }

1403

1407 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));

1408 return;

1409 }

1410

1412 Def->replaceAllUsesWith(

1414 return;

1415 }

1416

1417

1418

1422 "broadcast operand must be single-scalar");

1423 Def->setOperand(0, C);

1424 return;

1425 }

1426

1428 if (Phi->getNumOperands() == 1)

1429 Phi->replaceAllUsesWith(Phi->getOperand(0));

1430 return;

1431 }

1432

1433

1434

1436 return;

1437

1438

1442 if (Phi->getOperand(1) != Def && match(Phi->getOperand(0), m_ZeroInt()) &&

1443 Phi->getSingleUser() == Def) {

1444 Phi->setOperand(0, Y);

1445 Def->replaceAllUsesWith(Phi);

1446 return;

1447 }

1448 }

1449

1450

1452 if (VecPtr->isFirstPart()) {

1453 VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));

1454 return;

1455 }

1456 }

1457

1458

1459

1462 Steps->replaceAllUsesWith(Steps->getOperand(0));

1463 return;

1464 }

1465 }

1466

1470 Def->replaceUsesWithIf(StartV, [](const VPUser &U, unsigned Idx) {

1472 return PhiR && PhiR->isInLoop();

1473 });

1474 return;

1475 }

1476

1478 Def->replaceAllUsesWith(A);

1479 return;

1480 }

1481

1487 [Def, A](VPUser *U) { return U->usesScalars(A) || Def == U; })) {

1488 return Def->replaceAllUsesWith(A);

1489 }

1490

1492 return Def->replaceAllUsesWith(A);

1493}

1494

1505

1508 return;

1509

1510

1511

1512

1513

1519 continue;

1521 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))

1522 continue;

1523

1525 if (RepR && isa(RepR->getUnderlyingInstr()) &&

1528 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),

1529 true , nullptr , *RepR ,

1530 *RepR , RepR->getDebugLoc());

1531 Clone->insertBefore(RepOrWidenR);

1533 VPValue *ExtractOp = Clone->getOperand(0);

1535 ExtractOp =

1537 ExtractOp =

1539 Clone->setOperand(0, ExtractOp);

1540 RepR->eraseFromParent();

1541 continue;

1542 }

1543

1544

1546 continue;

1547

1548

1549

1550

1551

1552

1553 if (all\_of(RepOrWidenR->users(),

1554 [RepOrWidenR](const VPUser *U) {

1555 if (auto *VPI = dyn_cast(U)) {

1556 unsigned Opcode = VPI->getOpcode();

1557 if (Opcode == VPInstruction::ExtractLastLane ||

1558 Opcode == VPInstruction::ExtractLastPart ||

1559 Opcode == VPInstruction::ExtractPenultimateElement)

1560 return true;

1561 }

1562

1563 return U->usesScalars(RepOrWidenR);

1564 }) &&

1565 none_of(RepOrWidenR->operands(), [RepOrWidenR](VPValue *Op) {

1566 if (Op->getSingleUser() != RepOrWidenR)

1567 return false;

1568

1569

1570 bool LiveInNeedsBroadcast =

1571 Op->isLiveIn() && !isa(Op->getLiveInIRValue());

1572 auto *OpR = dyn_cast(Op);

1573 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());

1574 }))

1575 continue;

1576

1578 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),

1579 true , nullptr, *RepOrWidenR);

1580 Clone->insertBefore(RepOrWidenR);

1581 RepOrWidenR->replaceAllUsesWith(Clone);

1583 RepOrWidenR->eraseFromParent();

1584 }

1585 }

1586}

1587

1588

1589

1592 return;

1593 VPValue *CommonEdgeMask;

1596 return;

1600 return;

1603}

1604

1605

1606

1612 if (!Blend)

1613 continue;

1614

1616

1617

1619 if (Blend->isNormalized() || match(Blend->getMask(0), m_False()))

1620 UniqueValues.insert(Blend->getIncomingValue(0));

1621 for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)

1623 UniqueValues.insert(Blend->getIncomingValue(I));

1624

1625 if (UniqueValues.size() == 1) {

1626 Blend->replaceAllUsesWith(*UniqueValues.begin());

1627 Blend->eraseFromParent();

1628 continue;

1629 }

1630

1631 if (Blend->isNormalized())

1632 continue;

1633

1634

1635

1636

1637 unsigned StartIndex = 0;

1638 for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {

1639

1640

1641

1642 VPValue *Mask = Blend->getMask(I);

1643 if (Mask->getNumUsers() == 1 && match(Mask, m_False())) {

1644 StartIndex = I;

1645 break;

1646 }

1647 }

1648

1650 OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex));

1651

1652 for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) {

1653 if (I == StartIndex)

1654 continue;

1655 OperandsWithMask.push_back(Blend->getIncomingValue(I));

1656 OperandsWithMask.push_back(Blend->getMask(I));

1657 }

1658

1659 auto *NewBlend =

1661 OperandsWithMask, Blend->getDebugLoc());

1662 NewBlend->insertBefore(&R);

1663

1664 VPValue *DeadMask = Blend->getMask(StartIndex);

1666 Blend->eraseFromParent();

1668

1669

1671 if (NewBlend->getNumOperands() == 3 &&

1673 VPValue *Inc0 = NewBlend->getOperand(0);

1674 VPValue *Inc1 = NewBlend->getOperand(1);

1675 VPValue *OldMask = NewBlend->getOperand(2);

1676 NewBlend->setOperand(0, Inc1);

1677 NewBlend->setOperand(1, Inc0);

1678 NewBlend->setOperand(2, NewMask);

1681 }

1682 }

1683 }

1684}

1685

1686

1687

1690 unsigned BestUF) {

1691

1693 return false;

1694

1695 const APInt *TC;

1697 return false;

1698

1699

1700

1702 APInt AlignedTC =

1705 APInt MaxVal = AlignedTC - 1;

1707 };

1708 unsigned NewBitWidth =

1710

1713

1714 bool MadeChange = false;

1715

1719

1720

1721

1722

1723 if (!WideIV || !WideIV->isCanonical() ||

1724 WideIV->hasMoreThanOneUniqueUser() ||

1725 NewIVTy == WideIV->getScalarType())

1726 continue;

1727

1728

1729

1730 VPUser *SingleUser = WideIV->getSingleUser();

1731 if (!SingleUser ||

1735 continue;

1736

1737

1739 WideIV->setStartValue(NewStart);

1741 WideIV->setStepValue(NewStep);

1742

1747 Cmp->setOperand(1, NewBTC);

1748

1749 MadeChange = true;

1750 }

1751

1752 return MadeChange;

1753}

1754

1755

1756

1761 return any_of(Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,

1763 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);

1764 });

1765

1768 m_Specific(CanIV->getBackedgeValue()),

1770 return false;

1771

1772

1773

1774

1775

1776 const SCEV *VectorTripCount =

1781 "Trip count SCEV must be computable");

1785}

1786

1787

1788

1789

1790

1791

1792

1793

1795 unsigned UF) {

1797 return false;

1798

1801 auto *Term = &ExitingVPBB->back();

1802

1806 return false;

1807

1810

1814 for (unsigned Part = 0; Part < UF; ++Part) {

1819 auto *Ext =

1822 Extracts[Part] = Ext;

1824 }

1825 };

1826

1827

1831 if (!Phi)

1832 continue;

1833 VPValue *Index = nullptr;

1834 match(Phi->getBackedgeValue(),

1836 assert(Index && "Expected index from ActiveLaneMask instruction");

1837

1839 if (match(Index,

1842 Phis[Part] = Phi;

1843 else

1844

1845 Phis[0] = Phi;

1846 }

1847

1849 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");

1850

1853

1856 "Expected incoming values of Phi to be ActiveLaneMasks");

1857

1858

1859

1861 EntryALM->setOperand(2, ALMMultiplier);

1862 LoopALM->setOperand(2, ALMMultiplier);

1863

1864

1866 ExtractFromALM(EntryALM, EntryExtracts);

1867

1868

1869

1871 ExtractFromALM(LoopALM, LoopExtracts);

1873 Not->setOperand(0, LoopExtracts[0]);

1874

1875

1876 for (unsigned Part = 0; Part < UF; ++Part) {

1877 Phis[Part]->setStartValue(EntryExtracts[Part]);

1878 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);

1879 }

1880

1881 return true;

1882}

1883

1884

1885

1887 unsigned BestUF,

1891 auto *Term = &ExitingVPBB->back();

1897

1898

1899 const SCEV *VectorTripCount =

1904 "Trip count SCEV must be computable");

1908 return false;

1910

1911

1913 return false;

1914 } else {

1915 return false;

1916 }

1917

1918

1919

1920

1921

1922

1923

1926 if (auto *R = dyn_cast(&Phi))

1927 return R->isCanonical();

1928 return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,

1929 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);

1930 })) {

1935 R->getScalarType());

1937 HeaderR.eraseFromParent();

1938 continue;

1939 }

1941 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));

1942 HeaderR.eraseFromParent();

1943 }

1944

1949

1951 B->setParent(nullptr);

1952

1956 } else {

1957

1958

1960 {}, {}, Term->getDebugLoc());

1962 }

1963

1964 Term->eraseFromParent();

1965

1966 return true;

1967}

1968

1969

1970

1978 continue;

1979

1983 continue;

1986 continue;

1987

1990 R.getDebugLoc());

1991 R.getVPSingleValue()->replaceAllUsesWith(Trunc);

1992 return true;

1993 }

1994 }

1995 return false;

1996}

1997

1999 unsigned BestUF,

2001 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");

2002 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");

2003

2008

2009 if (MadeChange) {

2010 Plan.setVF(BestVF);

2011 assert(Plan.getUF() == BestUF && "BestUF must match the Plan's UF");

2012 }

2013}

2014

2015

2016

2017

2018static bool

2022

2025 Seen.insert(Previous);

2026 auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {

2027

2028

2029 if (SinkCandidate == Previous)

2030 return false;

2031

2033 !Seen.insert(SinkCandidate).second ||

2035 return true;

2036

2038 return false;

2039

2040 WorkList.push_back(SinkCandidate);

2041 return true;

2042 };

2043

2044

2046 for (unsigned I = 0; I != WorkList.size(); ++I) {

2049 "only recipes with a single defined value expected");

2050

2053 return false;

2054 }

2055 }

2056

2057

2058

2061 });

2062

2063 for (VPRecipeBase *SinkCandidate : WorkList) {

2064 if (SinkCandidate == FOR)

2065 continue;

2066

2067 SinkCandidate->moveAfter(Previous);

2068 Previous = SinkCandidate;

2069 }

2070 return true;

2071}

2072

2073

2078 return false;

2079

2080

2084

2085

2086 for (VPUser *U : FOR->users()) {

2089 HoistPoint = R;

2090 }

2092 [&VPDT, HoistPoint](VPUser *U) {

2093 auto *R = cast(U);

2094 return HoistPoint == R ||

2095 VPDT.properlyDominates(HoistPoint, R);

2096 }) &&

2097 "HoistPoint must dominate all users of FOR");

2098

2099 auto NeedsHoisting = [HoistPoint, &VPDT,

2101 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();

2102 if (!HoistCandidate)

2103 return nullptr;

2107 HoistCandidate->getRegion() == EnclosingLoopRegion) &&

2108 "CFG in VPlan should still be flat, without replicate regions");

2109

2110 if (!Visited.insert(HoistCandidate).second)

2111 return nullptr;

2112

2113

2114

2116 return nullptr;

2117

2118

2119

2121 return nullptr;

2122 return HoistCandidate;

2123 };

2124

2126 return true;

2127

2128

2129 HoistCandidates.push_back(Previous);

2130

2131 for (unsigned I = 0; I != HoistCandidates.size(); ++I) {

2134 "only recipes with a single defined value expected");

2136 return false;

2137

2139

2140

2141

2142

2143 if (Op == FOR)

2144 return false;

2145

2146 if (auto *R = NeedsHoisting(Op)) {

2147

2148

2149 if (R->getNumDefinedValues() != 1)

2150 return false;

2152 }

2153 }

2154 }

2155

2156

2157

2160 });

2161

2162 for (VPRecipeBase *HoistCandidate : HoistCandidates) {

2163 HoistCandidate->moveBefore(*HoistPoint->getParent(),

2165 }

2166

2167 return true;

2168}

2169

2173

2179

2182 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();

2183

2184

2185 while (auto *PrevPhi =

2187 assert(PrevPhi->getParent() == FOR->getParent());

2189 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();

2190 }

2191

2194 return false;

2195

2196

2197

2201 else

2204

2205 auto *RecurSplice =

2207 {FOR, FOR->getBackedgeValue()});

2208

2209 FOR->replaceAllUsesWith(RecurSplice);

2210

2211

2212 RecurSplice->setOperand(0, FOR);

2213

2214

2215

2216

2217

2218 for (VPUser *U : RecurSplice->users()) {

2221 continue;

2222

2228 VPValue *PenultimateIndex =

2229 B.createNaryOp(Instruction::Sub, {LastActiveLane, One});

2230 VPValue *PenultimateLastIter =

2232 {PenultimateIndex, FOR->getBackedgeValue()});

2235

2237 VPValue *Sel = B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);

2239 }

2240 }

2241 return true;

2242}

2243

2248 if (!PhiR)

2249 continue;

2250 RecurKind RK = PhiR->getRecurrenceKind();

2253 continue;

2254

2257 RecWithFlags->dropPoisonGeneratingFlags();

2258 }

2259 }

2260}

2261

2262namespace {

2263struct VPCSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {

2265 return Def == getEmptyKey() || Def == getTombstoneKey();

2266 }

2267

2268

2269

2271

2272

2276 return GEP->getSourceElementType();

2277 return nullptr;

2278 })

2279 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(

2280 [](auto *I) { return I->getSourceElementType(); })

2281 .Default([](auto *) { return nullptr; });

2282 }

2283

2284

2285 static bool canHandle(const VPSingleDefRecipe *Def) {

2286

2287

2288

2290

2291

2292

2293

2294 if (C || (C->first && (C->second == Instruction::InsertValue ||

2295 C->second == Instruction::ExtractValue)))

2296 return false;

2297

2298

2299

2300

2301 return Def->mayReadFromMemory();

2302 }

2303

2304

2305 static unsigned getHashValue(const VPSingleDefRecipe *Def) {

2306 const VPlan *Plan = Def->getParent()->getPlan();

2307 VPTypeAnalysis TypeInfo(*Plan);

2310 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),

2313 if (RFlags->hasPredicate())

2314 return hash_combine(Result, RFlags->getPredicate());

2316 }

2317

2318

2319 static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) {

2321 return L == R;

2322 if (L->getVPDefID() != R->getVPDefID() ||

2324 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||

2326 equal(L->operands(), R->operands()))

2327 return false;

2329 "must have valid opcode info for both recipes");

2331 if (LFlags->hasPredicate() &&

2332 LFlags->getPredicate() !=

2334 return false;

2335

2336

2337

2338 const VPRegionBlock *RegionL = L->getRegion();

2339 const VPRegionBlock *RegionR = R->getRegion();

2340 if (((RegionL && RegionL->isReplicator()) ||

2342 L->getParent() != R->getParent())

2343 return false;

2344 const VPlan *Plan = L->getParent()->getPlan();

2345 VPTypeAnalysis TypeInfo(*Plan);

2346 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);

2347 }

2348};

2349}

2350

2351

2352

2356

2361 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))

2362 continue;

2364

2365 if (!VPDT.dominates(V->getParent(), VPBB))

2366 continue;

2367

2370 Def->replaceAllUsesWith(V);

2371 continue;

2372 }

2373 CSEMap[Def] = Def;

2374 }

2375 }

2376}

2377

2378

2381

2382

2383

2384

2385

2386

2389 "Expected vector prehader's successor to be the vector loop region");

2394 continue;

2396 return !Op->isDefinedOutsideLoopRegions();

2397 }))

2398 continue;

2399 R.moveBefore(*Preheader, Preheader->end());

2400 }

2401 }

2402}

2403

2407 return;

2408

2409

2410

2411

2420 &R))

2421 continue;

2422

2423 VPValue *ResultVPV = R.getVPSingleValue();

2425 unsigned NewResSizeInBits = MinBWs.lookup(UI);

2426 if (!NewResSizeInBits)

2427 continue;

2428

2429

2430

2431

2432

2434 continue;

2435

2438 assert(OldResTy->isIntegerTy() && "only integer types supported");

2439 (void)OldResSizeInBits;

2440

2442

2443

2444

2445

2447 VPW->dropPoisonGeneratingFlags();

2448

2449 if (OldResSizeInBits != NewResSizeInBits &&

2451

2452 auto *Ext =

2454 Ext->insertAfter(&R);

2456 Ext->setOperand(0, ResultVPV);

2457 assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");

2458 } else {

2460 "Only ICmps should not need extending the result.");

2461 }

2462

2465 continue;

2466

2467

2469 for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {

2470 auto *Op = R.getOperand(Idx);

2471 unsigned OpSizeInBits =

2473 if (OpSizeInBits == NewResSizeInBits)

2474 continue;

2475 assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");

2476 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.try_emplace(Op);

2477 if (!IterIsEmpty) {

2478 R.setOperand(Idx, ProcessedIter->second);

2479 continue;

2480 }

2481

2483 if (Op->isLiveIn())

2485 else

2486 Builder.setInsertPoint(&R);

2488 Builder.createWidenCast(Instruction::Trunc, Op, NewResTy);

2489 ProcessedIter->second = NewOp;

2490 R.setOperand(Idx, NewOp);

2491 }

2492

2493 }

2494 }

2495}

2496

2501

2503 continue;

2504

2505 assert(VPBB->getNumSuccessors() == 2 &&

2506 "Two successors expected for BranchOnCond");

2507 unsigned RemovedIdx;

2509 RemovedIdx = 1;

2511 RemovedIdx = 0;

2512 else

2513 continue;

2514

2518 "There must be a single edge between VPBB and its successor");

2519

2520

2523

2524

2525

2527 VPBB->back().eraseFromParent();

2528 }

2529}

2530

2550

2551

2552

2553

2554

2555

2556

2557

2558

2559

2560

2561

2562

2563

2564

2565

2566

2567

2568

2569

2570

2571

2572

2573

2574

2575

2576

2577

2578

2579

2580

2581

2582

2583

2589 VPValue *StartV = CanonicalIVPHI->getStartValue();

2590

2591 auto *CanonicalIVIncrement =

2593

2594

2595 CanonicalIVIncrement->dropPoisonGeneratingFlags();

2596 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();

2597

2598

2599

2601 VPBuilder Builder(VecPreheader);

2602

2603

2605

2606 VPValue *TripCount, *IncrementValue;

2608

2609

2610

2611 IncrementValue = CanonicalIVIncrement;

2612 TripCount = TC;

2613 } else {

2614

2615

2616

2617 IncrementValue = CanonicalIVPHI;

2619 {TC}, DL);

2620 }

2621 auto *EntryIncrement = Builder.createOverflowingOp(

2623 "index.part.next");

2624

2625

2626 VPValue *ALMMultiplier =

2629 {EntryIncrement, TC, ALMMultiplier}, DL,

2630 "active.lane.mask.entry");

2631

2632

2633

2634 auto *LaneMaskPhi =

2636 LaneMaskPhi->insertAfter(CanonicalIVPHI);

2637

2638

2639

2641 Builder.setInsertPoint(OriginalTerminator);

2642 auto *InLoopIncrement =

2644 {IncrementValue}, {false, false}, DL);

2646 {InLoopIncrement, TripCount, ALMMultiplier},

2647 DL, "active.lane.mask.next");

2649

2650

2651

2652 auto *NotMask = Builder.createNot(ALM, DL);

2655 return LaneMaskPhi;

2656}

2657

2658

2659

2660

2661

2665 auto *FoundWidenCanonicalIVUser = find_if(

2669 "Must have at most one VPWideCanonicalIVRecipe");

2670 if (FoundWidenCanonicalIVUser !=

2672 auto *WideCanonicalIV =

2674 WideCanonicalIVs.push_back(WideCanonicalIV);

2675 }

2676

2677

2678

2682 if (WidenOriginalIV && WidenOriginalIV->isCanonical())

2683 WideCanonicalIVs.push_back(WidenOriginalIV);

2684 }

2685

2686

2687

2689 for (auto *Wide : WideCanonicalIVs) {

2693 continue;

2694

2695 assert(VPI->getOperand(0) == Wide &&

2696 "WidenCanonicalIV must be the first operand of the compare");

2697 assert(!HeaderMask && "Multiple header masks found?");

2698 HeaderMask = VPI;

2699 }

2700 }

2701 return HeaderMask;

2702}

2703

2705 VPlan &Plan, bool UseActiveLaneMaskForControlFlow,

2708 UseActiveLaneMaskForControlFlow) &&

2709 "DataAndControlFlowWithoutRuntimeCheck implies "

2710 "UseActiveLaneMaskForControlFlow");

2711

2713 auto *FoundWidenCanonicalIVUser = find_if(

2715 assert(FoundWidenCanonicalIVUser &&

2716 "Must have widened canonical IV when tail folding!");

2718 auto *WideCanonicalIV =

2721 if (UseActiveLaneMaskForControlFlow) {

2724 } else {

2728 LaneMask =

2730 {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier},

2731 nullptr, "active.lane.mask");

2732 }

2733

2734

2735

2736

2739}

2740

2744

2746

2747 template bool match(OpTy *V) const {

2749 Out = nullptr;

2750 return true;

2751 }

2753 }

2754};

2755

2756

2757

2758template <typename Op0_t, typename Op1_t>

2763

2764

2765

2766

2767

2768

2769

2770

2771

2777 VPValue *Addr, *Mask, *EndPtr;

2778

2779

2780 auto AdjustEndPtr = [&CurRecipe, &EVL](VPValue *EndPtr) {

2782 EVLEndPtr->insertBefore(&CurRecipe);

2783 EVLEndPtr->setOperand(1, &EVL);

2784 return EVLEndPtr;

2785 };

2786

2787 if (match(&CurRecipe,

2791 EVL, Mask);

2792

2793 if (match(&CurRecipe,

2798 AdjustEndPtr(EndPtr), EVL, Mask);

2799

2804 EVL, Mask);

2805

2811 AdjustEndPtr(EndPtr), EVL, Mask);

2812

2814 if (Rdx->isConditional() &&

2817

2819 if (Interleave->getMask() &&

2822

2824 if (match(&CurRecipe,

2827 Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL},

2829

2833 Intrinsic::vp_merge, {Mask, LHS, RHS, &EVL},

2835

2842 }

2843

2844 return nullptr;

2845}

2846

2847

2852

2856 "User of VF that we can't transform to EVL.");

2859 });

2860

2862 [&LoopRegion, &Plan](VPUser *U) {

2863 return match(U,

2864 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),

2865 m_Specific(&Plan.getVFxUF()))) ||

2866 isa(U);

2867 }) &&

2868 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "

2869 "increment of the canonical induction.");

2871

2872

2874 });

2875

2876

2877

2879

2880

2881

2882 bool ContainsFORs =

2884 if (ContainsFORs) {

2885

2887

2889 MaxEVL = Builder.createScalarZExtOrTrunc(

2892

2893 Builder.setInsertPoint(Header, Header->getFirstNonPhi());

2894 VPValue *PrevEVL = Builder.createScalarPhi(

2896

2904 continue;

2908 Intrinsic::experimental_vp_splice,

2909 {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL},

2911 R.getDebugLoc());

2913 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);

2915 }

2916 }

2917 }

2918

2920 if (!HeaderMask)

2921 return;

2922

2923

2924

2925

2926

2927

2931 VPValue *EVLMask = Builder.createICmp(

2936

2937

2938

2939

2940

2945 if (!EVLRecipe)

2946 continue;

2947

2949 assert(NumDefVal == CurRecipe->getNumDefinedValues() &&

2950 "New recipe must define the same number of values as the "

2951 "original.");

2954 EVLRecipe)) {

2955 for (unsigned I = 0; I < NumDefVal; ++I) {

2956 VPValue *CurVPV = CurRecipe->getVPValue(I);

2958 }

2959 }

2961 }

2962

2965

2968 R->eraseFromParent();

2969 for (VPValue *Op : PossiblyDead)

2971 }

2972}

2973

2974

2975

2976

2977

2978

2979

2980

2981

2982

2983

2984

2985

2986

2987

2988

2989

2990

2991

2992

2993

2994

2995

2996

2997

2998

2999

3000

3001

3002

3003

3004

3005

3006

3007

3008

3009

3010

3011

3012

3013

3014

3015

3016

3018 VPlan &Plan, const std::optional &MaxSafeElements) {

3020 return;

3023

3024 auto *CanonicalIVPHI = LoopRegion->getCanonicalIV();

3026 VPValue *StartV = CanonicalIVPHI->getStartValue();

3027

3028

3030 EVLPhi->insertAfter(CanonicalIVPHI);

3031 VPBuilder Builder(Header, Header->getFirstNonPhi());

3032

3033

3034 VPPhi *AVLPhi = Builder.createScalarPhi(

3037

3038 if (MaxSafeElements) {

3039

3043 "safe_avl");

3044 }

3047

3048 auto *CanonicalIVIncrement =

3050 Builder.setInsertPoint(CanonicalIVIncrement);

3051 VPValue *OpVPEVL = VPEVL;

3052

3054 OpVPEVL = Builder.createScalarZExtOrTrunc(

3055 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());

3056

3057 auto *NextEVLIV = Builder.createOverflowingOp(

3058 Instruction::Add, {OpVPEVL, EVLPhi},

3059 {CanonicalIVIncrement->hasNoUnsignedWrap(),

3060 CanonicalIVIncrement->hasNoSignedWrap()},

3061 CanonicalIVIncrement->getDebugLoc(), "index.evl.next");

3062 EVLPhi->addOperand(NextEVLIV);

3063

3064 VPValue *NextAVL = Builder.createOverflowingOp(

3065 Instruction::Sub, {AVLPhi, OpVPEVL}, {true, false},

3068

3070

3071

3072

3073 CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

3074 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);

3075

3077}

3078

3080

3081

3083

3088 assert(!EVLPhi && "Found multiple EVL PHIs. Only one expected");

3089 EVLPhi = PhiR;

3090 }

3091

3092

3093 if (!EVLPhi)

3094 return;

3095

3099 [[maybe_unused]] bool FoundAVL =

3100 match(EVLIncrement,

3102 assert(FoundAVL && "Didn't find AVL?");

3103

3104

3107 AVL = SafeAVL;

3108

3110 [[maybe_unused]] bool FoundAVLNext =

3113 assert(FoundAVLNext && "Didn't find AVL backedge?");

3114

3115

3116 auto *ScalarR =

3121

3122

3124 VPValue *Backedge = CanonicalIV->getIncomingValue(1);

3127 "Unexpected canonical iv");

3129

3130

3133 CanonicalIV->eraseFromParent();

3134

3135

3136

3137

3138

3142

3144 return;

3145 assert(LatchExitingBr &&

3146 match(LatchExitingBr,

3149 "Unexpected terminator in EVL loop");

3150

3152 VPBuilder Builder(LatchExitingBr);

3156 LatchExitingBr->eraseFromParent();

3157}

3158

3162

3163

3164 auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {

3166 return R->getRegion() ||

3168 };

3170 for (const SCEV *Stride : StridesMap.values()) {

3173 const APInt *StrideConst;

3174 if (match(PSE.getSCEV(StrideV), m_scev_APInt(StrideConst)))

3175

3176 continue;

3177

3181

3182

3183

3186 continue;

3188 if (!StrideVPV)

3189 continue;

3190 unsigned BW = U->getType()->getScalarSizeInBits();

3195 }

3196 RewriteMap[StrideV] = PSE.getSCEV(StrideV);

3197 }

3198

3201 if (!ExpSCEV)

3202 continue;

3203 const SCEV *ScevExpr = ExpSCEV->getSCEV();

3204 auto *NewSCEV =

3206 if (NewSCEV != ScevExpr) {

3208 ExpSCEV->replaceAllUsesWith(NewExp);

3211 }

3212 }

3213}

3214

3217 const std::function<bool(BasicBlock *)> &BlockNeedsPredication) {

3218

3219

3221 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {

3224

3225

3226 while (!Worklist.empty()) {

3228

3229 if (!Visited.insert(CurRec).second)

3230 continue;

3231

3232

3233

3234

3235

3238 continue;

3239

3240

3241

3242

3245

3246

3247

3248

3249

3251 RecWithFlags->isDisjoint()) {

3252 VPBuilder Builder(RecWithFlags);

3253 VPInstruction *New = Builder.createOverflowingOp(

3254 Instruction::Add, {A, B}, {false, false},

3255 RecWithFlags->getDebugLoc());

3256 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());

3257 RecWithFlags->replaceAllUsesWith(New);

3258 RecWithFlags->eraseFromParent();

3259 CurRec = New;

3260 } else

3261 RecWithFlags->dropPoisonGeneratingFlags();

3262 } else {

3265 (void)Instr;

3266 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&

3267 "found instruction with poison generating flags not covered by "

3268 "VPRecipeWithIRFlags");

3269 }

3270

3271

3273 if (VPRecipeBase *OpDef = Operand->getDefiningRecipe())

3275 }

3276 });

3277

3278

3279

3280

3285 Instruction &UnderlyingInstr = WidenRec->getIngredient();

3286 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();

3287 if (AddrDef && WidenRec->isConsecutive() &&

3288 BlockNeedsPredication(UnderlyingInstr.getParent()))

3289 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);

3291 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();

3292 if (AddrDef) {

3293

3295 InterleaveRec->getInterleaveGroup();

3296 bool NeedPredication = false;

3297 for (int I = 0, NumMembers = InterGroup->getNumMembers();

3298 I < NumMembers; ++I) {

3300 if (Member)

3301 NeedPredication |= BlockNeedsPredication(Member->getParent());

3302 }

3303

3304 if (NeedPredication)

3305 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);

3306 }

3307 }

3308 }

3309 }

3310}

3311

3315 &InterleaveGroups,

3316 VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed) {

3317 if (InterleaveGroups.empty())

3318 return;

3319

3320

3321

3322

3324 for (const auto *IG : InterleaveGroups) {

3325 auto *Start =

3330 StoredValues.push_back(StoreR->getStoredValue());

3331 for (unsigned I = 1; I < IG->getFactor(); ++I) {

3333 if (!MemberI)

3334 continue;

3338 StoredValues.push_back(StoreR->getStoredValue());

3339 InterleaveMD.intersect(*MemoryR);

3340 }

3341

3342 bool NeedsMaskForGaps =

3343 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||

3344 (!StoredValues.empty() && !IG->isFull());

3345

3346 Instruction *IRInsertPos = IG->getInsertPos();

3347 auto *InsertPos =

3349

3354

3355

3356 VPValue *Addr = Start->getAddr();

3359

3360

3361

3362

3363

3364

3365 assert(IG->getIndex(IRInsertPos) != 0 &&

3366 "index of insert position shouldn't be zero");

3370 IG->getIndex(IRInsertPos),

3371 true);

3374 Addr = B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);

3375 }

3376

3377

3378

3379

3380 if (IG->isReverse()) {

3383 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());

3384 ReversePtr->insertBefore(InsertPos);

3385 Addr = ReversePtr;

3386 }

3388 InsertPos->getMask(), NeedsMaskForGaps,

3389 InterleaveMD, InsertPos->getDebugLoc());

3390 VPIG->insertBefore(InsertPos);

3391

3392 unsigned J = 0;

3393 for (unsigned i = 0; i < IG->getFactor(); ++i)

3394 if (Instruction *Member = IG->getMember(i)) {

3396 if (!Member->getType()->isVoidTy()) {

3399 J++;

3400 }

3402 }

3403 }

3404}

3405

3406

3407

3408

3409

3410

3411

3412

3413

3414

3415

3416

3417

3418

3419

3420

3421

3422

3423

3424

3425

3426

3427

3428

3429

3430

3431

3432

3433

3434

3435

3436

3437static void

3445

3446

3447

3449

3455 AddOp = Instruction::Add;

3456 MulOp = Instruction::Mul;

3457 } else {

3458 AddOp = ID.getInductionOpcode();

3459 MulOp = Instruction::FMul;

3460 }

3461

3462

3466 assert(StepTy->isIntegerTy() && "Truncation requires an integer type");

3467 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL);

3468 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL);

3469

3470 Flags.dropPoisonGeneratingFlags();

3471 StepTy = Ty;

3472 }

3473

3474

3475 Type *IVIntTy =

3479 Init = Builder.createWidenCast(Instruction::UIToFP, Init, StepTy);

3480

3483

3484 Init = Builder.createNaryOp(MulOp, {Init, SplatStep}, Flags);

3485 Init = Builder.createNaryOp(AddOp, {SplatStart, Init}, Flags,

3487

3488

3491 WidePHI->insertBefore(WidenIVR);

3492

3493

3496

3498 Inc = SplatVF;

3500 } else {

3502 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));

3503

3504

3506 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,

3507 DL);

3508 else

3509 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,

3511

3512 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);

3514 Prev = WidePHI;

3515 }

3516

3519 auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,

3520 WidenIVR->getDebugLoc(), "vec.ind.next");

3521

3522 WidePHI->addOperand(Next);

3523

3525}

3526

3527

3528

3529

3530

3531

3532

3533

3534

3535

3536

3537

3538

3539

3540

3541

3542

3543

3544

3545

3546

3547

3550 VPlan *Plan = R->getParent()->getPlan();

3551 VPValue *Start = R->getStartValue();

3552 VPValue *Step = R->getStepValue();

3553 VPValue *VF = R->getVFValue();

3554

3555 assert(R->getInductionDescriptor().getKind() ==

3557 "Not a pointer induction according to InductionDescriptor!");

3560 "Recipe should have been replaced");

3561

3564

3565

3566 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start, DL, "pointer.phi");

3567

3568

3569

3570 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());

3573 Offset = Builder.createOverflowingOp(Instruction::Mul, {Offset, Step});

3574 VPValue *PtrAdd = Builder.createNaryOp(

3576 R->replaceAllUsesWith(PtrAdd);

3577

3578

3581 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF),

3582 DL);

3583 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});

3584

3586 Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind");

3587 ScalarPtrPhi->addOperand(InductionGEP);

3588}

3589

3591

3595 if (!R->isReplicator())

3597 }

3599 R->dissolveToCFGLoop();

3600}

3601

3610 ToRemove.push_back(WidenIVR);

3611 continue;

3612 }

3613

3615

3616

3617 if (WidenIVR->onlyScalarsGenerated(Plan.hasScalableVF())) {

3621 WidenIVR->replaceAllUsesWith(PtrAdd);

3622 ToRemove.push_back(WidenIVR);

3623 continue;

3624 }

3626 ToRemove.push_back(WidenIVR);

3627 continue;

3628 }

3629

3630

3634 for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)

3635 Select = Builder.createSelect(Blend->getMask(I),

3636 Blend->getIncomingValue(I), Select,

3637 R.getDebugLoc(), "predphi");

3638 Blend->replaceAllUsesWith(Select);

3640 }

3641

3643 Expr->decompose();

3645 }

3646

3647

3649 if (LastActiveL &&

3651

3653 for (VPValue *Op : LastActiveL->operands()) {

3654 VPValue *NotMask = Builder.createNot(Op, LastActiveL->getDebugLoc());

3656 }

3657

3658

3659 VPValue *FirstInactiveLane = Builder.createNaryOp(

3661 LastActiveL->getDebugLoc(), "first.inactive.lane");

3662

3663

3666 VPValue *LastLane = Builder.createNaryOp(

3667 Instruction::Sub, {FirstInactiveLane, One},

3668 LastActiveL->getDebugLoc(), "last.active.lane");

3669

3671 ToRemove.push_back(LastActiveL);

3672 continue;

3673 }

3674

3679 continue;

3680

3681

3686 ? Instruction::UIToFP

3687 : Instruction::Trunc;

3688 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);

3689 }

3690

3691 assert(match(ScalarStep, m_One()) && "Expected non-unit scalar-step");

3693 ScalarStep =

3694 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);

3695 }

3696

3699 Flags = {VPI->getFastMathFlags()};

3700

3701 unsigned MulOpc =

3702 IVTy->isFloatingPointTy() ? Instruction::FMul : Instruction::Mul;

3704 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());

3705 VectorStep = Mul;

3706 VPI->replaceAllUsesWith(VectorStep);

3708 }

3709 }

3710

3712 R->eraseFromParent();

3713}

3714

3724 EarlyExitVPBB->getPredecessors()[0] == EarlyExitingVPBB &&

3725 "unsupported early exit VPBB");

3726

3727

3728

3731 }

3732

3736 "Terminator must be be BranchOnCond");

3737 VPValue *CondOfEarlyExitingVPBB =

3739 auto *CondToEarlyExit = TrueSucc == EarlyExitVPBB

3740 ? CondOfEarlyExitingVPBB

3741 : Builder.createNot(CondOfEarlyExitingVPBB);

3742

3743

3744

3745 VPValue *IsEarlyExitTaken =

3753

3755

3756

3757 VPBuilder MiddleBuilder(NewMiddle);

3758 VPBuilder EarlyExitB(VectorEarlyExitVPBB);

3761

3762

3763 unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;

3764 if (ExitIRI->getNumOperands() != 1) {

3765

3766

3767 ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);

3768 }

3769

3770 VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);

3771 if (!IncomingFromEarlyExit->isLiveIn()) {

3772

3776 IncomingFromEarlyExit = EarlyExitB.createNaryOp(

3779 ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);

3780 }

3781 }

3783

3784

3785

3786

3789 "Unexpected terminator");

3790 auto *IsLatchExitTaken =

3791 Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),

3792 LatchExitingBranch->getOperand(1));

3793 auto *AnyExitTaken = Builder.createNaryOp(

3794 Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});

3796 LatchExitingBranch->eraseFromParent();

3797}

3798

3799

3800

3801

3802

3806 Type *RedTy = Ctx.Types.inferScalarType(Red);

3807 VPValue *VecOp = Red->getVecOp();

3808

3809

3810 auto IsExtendedRedValidAndClampRange =

3816

3821

3822 if (Red->isPartialReduction()) {

3825

3826

3827 ExtRedCost = Ctx.TTI.getPartialReductionCost(

3828 Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,

3830 } else {

3831 ExtRedCost = Ctx.TTI.getExtendedReductionCost(

3832 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,

3833 Red->getFastMathFlags(), CostKind);

3834 }

3835 return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;

3836 },

3838 };

3839

3841

3843 IsExtendedRedValidAndClampRange(

3846 Ctx.Types.inferScalarType(A)))

3848

3849 return nullptr;

3850}

3851

3852

3853

3854

3855

3856

3857

3858

3859

3864 if (Opcode != Instruction::Add && Opcode != Instruction::Sub)

3865 return nullptr;

3866

3867 Type *RedTy = Ctx.Types.inferScalarType(Red);

3868

3869

3870 auto IsMulAccValidAndClampRange =

3876 Type *SrcTy =

3877 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;

3879

3880 if (Red->isPartialReduction()) {

3881 Type *SrcTy2 =

3882 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : nullptr;

3883

3884

3885 MulAccCost = Ctx.TTI.getPartialReductionCost(

3886 Opcode, SrcTy, SrcTy2, RedTy, VF,

3888 Ext0->getOpcode())

3891 Ext1->getOpcode())

3894 } else {

3895

3896 if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())

3897 return false;

3898

3899 bool IsZExt =

3900 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;

3902 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,

3904 }

3905

3909 if (Ext0)

3910 ExtCost += Ext0->computeCost(VF, Ctx);

3911 if (Ext1)

3912 ExtCost += Ext1->computeCost(VF, Ctx);

3913 if (OuterExt)

3914 ExtCost += OuterExt->computeCost(VF, Ctx);

3915

3916 return MulAccCost.isValid() &&

3917 MulAccCost < ExtCost + MulCost + RedCost;

3918 },

3920 };

3921

3922 VPValue *VecOp = Red->getVecOp();

3926

3929 VecOp = Tmp;

3930 }

3931

3932

3933

3934

3935

3936

3937 auto ExtendAndReplaceConstantOp = [&Ctx](VPWidenCastRecipe *ExtA,

3940 if (!ExtA || ExtB || !ValB->isLiveIn())

3941 return;

3942 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));

3944 const APInt *Const;

3948 return;

3949

3950

3951

3952

3953

3955 auto *Trunc =

3956 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);

3957 Type *WideTy = Ctx.Types.inferScalarType(ExtA);

3958 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);

3959 Mul->setOperand(1, ExtB);

3960 };

3961

3962

3967

3968

3969 ExtendAndReplaceConstantOp(RecipeA, RecipeB, B, Mul);

3970

3971

3974 IsMulAccValidAndClampRange(Mul, RecipeA, RecipeB, nullptr)) {

3975 if (Sub)

3979 }

3980

3981 if (Sub && IsMulAccValidAndClampRange(Mul, nullptr, nullptr, nullptr))

3983 }

3984

3985

3986 if (Sub)

3987 return nullptr;

3988

3989

3995

3996

3997

3998 ExtendAndReplaceConstantOp(Ext0, Ext1, B, Mul);

3999

4000

4001

4002

4003

4004

4005

4006 if (Ext0 && Ext1 &&

4007 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&

4008 Ext0->getOpcode() == Ext1->getOpcode() &&

4009 IsMulAccValidAndClampRange(Mul, Ext0, Ext1, Ext) && Mul->hasOneUse()) {

4011 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(), nullptr,

4012 *Ext0, *Ext0, Ext0->getDebugLoc());

4013 NewExt0->insertBefore(Ext0);

4014

4016 if (Ext0 != Ext1) {

4017 NewExt1 = new VPWidenCastRecipe(Ext1->getOpcode(), Ext1->getOperand(0),

4018 Ext->getResultType(), nullptr, *Ext1,

4019 *Ext1, Ext1->getDebugLoc());

4021 }

4022 Mul->setOperand(0, NewExt0);

4023 Mul->setOperand(1, NewExt1);

4024 Red->setOperand(1, Mul);

4026 }

4027 }

4028 return nullptr;

4029}

4030

4031

4032

4037 auto IP = std::next(Red->getIterator());

4038 auto *VPBB = Red->getParent();

4040 AbstractR = MulAcc;

4042 AbstractR = ExtRed;

4043

4044 if (!AbstractR)

4045 return;

4046

4048 Red->replaceAllUsesWith(AbstractR);

4049}

4050

4061

4064 return;

4065

4066#ifndef NDEBUG

4068#endif

4069

4076

4078 for (VPValue *VPV : VPValues) {

4080 (VPV->isLiveIn() && VPV->getLiveInIRValue() &&

4082 continue;

4083

4084

4088 if (User->usesScalars(VPV))

4089 continue;

4091 HoistPoint = HoistBlock->begin();

4092 else

4095 "All users must be in the vector preheader or dominated by it");

4096 }

4097

4100 VPV->replaceUsesWithIf(Broadcast,

4101 [VPV, Broadcast](VPUser &U, unsigned Idx) {

4102 return Broadcast != &U && !U.usesScalars(VPV);

4103 });

4104 }

4105}

4106

4109

4110

4111

4117

4119 if (RepR->isPredicated() || !RepR->isSingleScalar() ||

4120 RepR->getOpcode() != Instruction::Load)

4121 continue;

4122

4123 VPValue *Addr = RepR->getOperand(0);

4126 if (Loc.AATags.Scope)

4127 continue;

4129 }

4130 }

4131 if (R.mayWriteToMemory()) {

4133 if (Loc || Loc->AATags.Scope || Loc->AATags.NoAlias)

4134 return;

4136 }

4137 }

4138 }

4139

4141 for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {

4142

4143

4144

4145 const AAMDNodes &LoadAA = LoadLoc.AATags;

4149 })) {

4150 LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());

4151 }

4152 }

4153}

4154

4155

4156

4160 CommonMetadata.intersect(*Recipe);

4161 return CommonMetadata;

4162}

4163

4164template

4167 const Loop *L) {

4168 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,

4169 "Only Load and Store opcodes supported");

4170 constexpr bool IsLoad = (Opcode == Instruction::Load);

4173

4174

4180 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())

4181 continue;

4182

4183

4184 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);

4187 RecipesByAddress[AddrSCEV].push_back(RepR);

4188 }

4189 }

4190

4191

4194 return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));

4195 };

4196 for (auto &[Addr, Recipes] : RecipesByAddress) {

4197 if (Recipes.size() < 2)

4198 continue;

4199

4200

4202 if (!RecipeI)

4203 continue;

4204

4205 VPValue *MaskI = RecipeI->getMask();

4206 Type *TypeI = GetLoadStoreValueType(RecipeI);

4209 RecipeI = nullptr;

4210

4211

4212 bool HasComplementaryMask = false;

4214 if (!RecipeJ)

4215 continue;

4216

4217 VPValue *MaskJ = RecipeJ->getMask();

4218 Type *TypeJ = GetLoadStoreValueType(RecipeJ);

4219 if (TypeI == TypeJ) {

4220

4221

4225 RecipeJ = nullptr;

4226 }

4227 }

4228

4229 if (HasComplementaryMask) {

4230 assert(Group.size() >= 2 && "must have at least 2 entries");

4231 AllGroups.push_back(std::move(Group));

4232 }

4233 }

4234 }

4235

4236 return AllGroups;

4237}

4238

4239

4240template

4244 return cast(A->getUnderlyingInstr())->getAlign() <

4246 });

4247}

4248

4250 const Loop *L) {

4254 return;

4255

4257

4258

4259 for (auto &Group : Groups) {

4260

4263 });

4264

4265

4269

4270

4273 false))

4274 continue;

4275

4276

4278

4279

4281

4282

4283

4285 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},

4286 false, nullptr, *EarliestLoad,

4287 CommonMetadata);

4288

4289 UnpredicatedLoad->insertBefore(EarliestLoad);

4290

4291

4293 Load->replaceAllUsesWith(UnpredicatedLoad);

4294 Load->eraseFromParent();

4295 }

4296 }

4297}

4298

4299static bool

4302 if (!StoreLoc || !StoreLoc->AATags.Scope)

4303 return false;

4304

4305

4306

4308 StoresToSink.end());

4309

4313 true, &StoresToSinkSet);

4314}

4315

4317 const Loop *L) {

4321 return;

4322

4324

4325 for (auto &Group : Groups) {

4328 });

4329

4331 continue;

4332

4333

4334

4337

4338

4340

4341

4342 VPValue *SelectedValue = Group[0]->getOperand(0);

4344

4345 for (unsigned I = 1; I < Group.size(); ++I) {

4346 VPValue *Mask = Group[I]->getMask();

4348 SelectedValue = Builder.createSelect(Mask, Value, SelectedValue,

4350 }

4351

4352

4354

4355

4356 auto *UnpredicatedStore =

4358 {SelectedValue, LastStore->getOperand(1)},

4359 false,

4360 nullptr, *LastStore, CommonMetadata);

4361 UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator());

4362

4363

4365 Store->eraseFromParent();

4366 }

4367}

4368

4372 assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");

4373 assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");

4374

4376

4377

4378

4383 return;

4384

4385

4386

4387

4388

4392 return;

4393 const SCEV *VFxUF = SE.getElementCount(TCScev->getType(), BestVF * BestUF);

4397}

4398

4403 return;

4404

4407 auto *TCMO = Builder.createNaryOp(

4411}

4412

4415 return;

4416

4423

4424

4425

4426

4427

4428

4433 continue;

4435 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](VPUser *U) {

4437 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;

4438 };

4444 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))

4445 continue;

4446

4448 unsigned Opcode = ScalarTy->isStructTy()

4451 auto *BuildVector = new VPInstruction(Opcode, {DefR});

4453

4454 DefR->replaceUsesWithIf(

4455 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](

4456 VPUser &U, unsigned) {

4457 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);

4458 });

4459 }

4460 }

4461

4462

4463

4464

4465

4466 for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {

4470 continue;

4471 for (VPValue *Def : R.definedValues()) {

4472

4473

4474

4475

4476

4478 continue;

4479

4480

4481

4482

4483

4484 auto IsCandidateUnpackUser = [Def](VPUser *U) {

4486 return U->usesScalars(Def) &&

4487 (!ParentRegion || !ParentRegion->isReplicator());

4488 };

4489 if (none_of(Def->users(), IsCandidateUnpackUser))

4490 continue;

4491

4493 if (R.isPhi())

4494 Unpack->insertBefore(*VPBB, VPBB->getFirstNonPhi());

4495 else

4496 Unpack->insertAfter(&R);

4497 Def->replaceUsesWithIf(Unpack,

4498 [&IsCandidateUnpackUser](VPUser &U, unsigned) {

4499 return IsCandidateUnpackUser(&U);

4500 });

4501 }

4502 }

4503 }

4504}

4505

4508 bool TailByMasking,

4509 bool RequiresScalarEpilogue) {

4511 assert(VectorTC.isLiveIn() && "vector-trip-count must be a live-in");

4512

4513

4515 return;

4516

4519 VPBuilder Builder(VectorPHVPBB, VectorPHVPBB->begin());

4521

4522

4523

4524

4525

4526

4527

4528

4529

4530 if (TailByMasking) {

4531 TC = Builder.createNaryOp(

4532 Instruction::Add,

4533 {TC, Builder.createNaryOp(Instruction::Sub,

4536 }

4537

4538

4539

4540

4541

4542

4544 Builder.createNaryOp(Instruction::URem, {TC, Step},

4546

4547

4548

4549

4550

4551

4552

4553 if (RequiresScalarEpilogue) {

4554 assert(!TailByMasking &&

4555 "requiring scalar epilogue is not supported with fail folding");

4558 R = Builder.createSelect(IsZero, Step, R);

4559 }

4560

4561 VPValue *Res = Builder.createNaryOp(

4564}

4565

4572

4573

4574

4575

4576

4577

4580 Builder.createElementCount(TCTy, VFEC * Plan.getUF());

4582 return;

4583 }

4584

4585

4586

4587 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);

4591 BC, [&VF](VPUser &U, unsigned) { return !U.usesScalars(&VF); });

4592 }

4594

4596 VPValue *MulByUF = Builder.createOverflowingOp(

4597 Instruction::Mul, {RuntimeVF, UF}, {true, false});

4599}

4600

4604 SCEVExpander Expander(SE, DL, "induction", false);

4605

4607 BasicBlock *EntryBB = Entry->getIRBasicBlock();

4611 continue;

4613 if (!ExpSCEV)

4614 break;

4615 const SCEV *Expr = ExpSCEV->getSCEV();

4618 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;

4623 ExpSCEV->eraseFromParent();

4624 }

4626 "VPExpandSCEVRecipes must be at the beginning of the entry block, "

4627 "after any VPIRInstructions");

4628

4629

4630 auto EI = Entry->begin();

4634 EI++;

4635 continue;

4636 }

4638 }

4639

4640 return ExpandedSCEVs;

4641}

4642

4643

4644

4645

4646

4647

4648

4649

4650

4652 VPValue *OpV, unsigned Idx) {

4655 if (!Member0OpR)

4656 return Member0Op == OpV;

4658 return !W->getMask() && Member0Op == OpV;

4660 return IR->getInterleaveGroup()->isFull() && IR->getVPValue(Idx) == OpV;

4661 return false;

4662}

4663

4664

4665

4666

4671 if (!InterleaveR || InterleaveR->getMask())

4672 return false;

4673

4674 Type *GroupElementTy = nullptr;

4678 [&TypeInfo, GroupElementTy](VPValue *Op) {

4679 return TypeInfo.inferScalarType(Op) == GroupElementTy;

4680 }))

4681 return false;

4682 } else {

4683 GroupElementTy =

4686 [&TypeInfo, GroupElementTy](VPValue *Op) {

4687 return TypeInfo.inferScalarType(Op) == GroupElementTy;

4688 }))

4689 return false;

4690 }

4691

4696 return IG->getFactor() == VFMin && IG->getNumMembers() == VFMin &&

4697 GroupSize == VectorRegWidth;

4698}

4699

4700

4703 return true;

4705 return RepR && RepR->isSingleScalar();

4706}

4707

4708

4709

4712 auto *R = V->getDefiningRecipe();

4713 if (!R || NarrowedOps.contains(V))

4714 return V;

4715

4717 return V;

4718

4720 for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)

4721 WideMember0->setOperand(

4722 Idx,

4724 return V;

4725 }

4726

4728

4729

4730 auto *LI = cast(LoadGroup->getInterleaveGroup()->getInsertPos());

4732 *LI, LoadGroup->getAddr(), LoadGroup->getMask(), true,

4733 false, {}, LoadGroup->getDebugLoc());

4734 L->insertBefore(LoadGroup);

4735 NarrowedOps.insert(L);

4736 return L;

4737 }

4738

4740 assert(RepR->isSingleScalar() &&

4742 "must be a single scalar load");

4743 NarrowedOps.insert(RepR);

4744 return RepR;

4745 }

4746

4748 VPValue *PtrOp = WideLoad->getAddr();

4750 PtrOp = VecPtr->getOperand(0);

4751

4752

4753 auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), {PtrOp},

4754 true,

4755 nullptr, {}, *WideLoad);

4756 N->insertBefore(WideLoad);

4758 return N;

4759}

4760

4765 return;

4766

4768

4772 continue;

4773

4776 continue;

4777

4778

4779

4780

4781

4782 if (R.isPhi())

4783 return;

4784

4786 if (R.mayWriteToMemory() && !InterleaveR)

4787 return;

4788

4789

4790

4791

4792

4793

4795 return;

4796

4797

4798

4799 if (!InterleaveR)

4800 continue;

4801

4802

4804 VectorRegWidth))

4805 return;

4806

4807

4808 if (InterleaveR->getStoredValues().empty())

4809 continue;

4810

4811

4812

4813 auto *Member0 = InterleaveR->getStoredValues()[0];

4815 all_of(InterleaveR->getStoredValues(),

4816 [Member0](VPValue *VPV) { return Member0 == VPV; })) {

4817 StoreGroups.push_back(InterleaveR);

4818 continue;

4819 }

4820

4821

4822

4823 if (all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) {

4824 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();

4825 if (!DefR)

4826 return false;

4827 auto *IR = dyn_cast(DefR);

4828 return IR && IR->getInterleaveGroup()->isFull() &&

4829 IR->getVPValue(Op.index()) == Op.value();

4830 })) {

4831 StoreGroups.push_back(InterleaveR);

4832 continue;

4833 }

4834

4835

4836

4837 auto *WideMember0 =

4839 if (!WideMember0)

4840 return;

4841 for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {

4843 if (!R || R->getOpcode() != WideMember0->getOpcode() ||

4844 R->getNumOperands() > 2)

4845 return;

4847 [WideMember0, Idx = I](const auto &P) {

4848 const auto &[OpIdx, OpV] = P;

4849 return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);

4850 }))

4851 return;

4852 }

4853 StoreGroups.push_back(InterleaveR);

4854 }

4855

4856 if (StoreGroups.empty())

4857 return;

4858

4859

4861

4862 for (auto *StoreGroup : StoreGroups) {

4865 auto *SI =

4866 cast(StoreGroup->getInterleaveGroup()->getInsertPos());

4868 *SI, StoreGroup->getAddr(), Res, nullptr, true,

4869 false, {}, StoreGroup->getDebugLoc());

4870 S->insertBefore(StoreGroup);

4871 StoreGroup->eraseFromParent();

4872 }

4873

4874

4875

4879

4886 Instruction::Mul, {VScale, UF}, {true, false});

4889 } else {

4890 Inc->setOperand(1, UF);

4893 }

4895}

4896

4897

4898

4900 VPlan &Plan, ElementCount VF, std::optional VScaleForTuning) {

4902 auto *MiddleTerm =

4904

4905 if (!MiddleTerm)

4906 return;

4907

4909 "must have a BranchOnCond");

4910

4912 if (VF.isScalable() && VScaleForTuning.has_value())

4913 VectorStep *= *VScaleForTuning;

4914 assert(VectorStep > 0 && "trip count should not be zero");

4916 MDNode *BranchWeights =

4918 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);

4919}

4920

4921

4922

4923

4929

4930

4931 if (WideIntOrFp && WideIntOrFp->getTruncInst())

4932 return nullptr;

4933

4937 VPValue *EndValue = VectorTC;

4938 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {

4941 Start, VectorTC, Step);

4942 }

4943

4944

4945

4947 if (ScalarTypeOfWideIV != TypeInfo.inferScalarType(EndValue)) {

4948 EndValue = VectorPHBuilder.createScalarCast(Instruction::Trunc, EndValue,

4949 ScalarTypeOfWideIV,

4951 }

4952

4953 return EndValue;

4954}

4955

4960 auto *MiddleVPBB = cast(ScalarPH->getPredecessors()[0]);

4964 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());

4967

4968

4969

4974 IVEndValues[WideIVR] = EndValue;

4975 ResumePhiR->setOperand(0, EndValue);

4976 ResumePhiR->setName("bc.resume.val");

4977 continue;

4978 }

4979

4980

4981

4983 "should only skip truncated wide inductions");

4984 continue;

4985 }

4986

4987

4988

4989

4991 auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();

4993 "Cannot handle loops with uncountable early exits");

4994 if (IsFOR) {

4995 auto *ExtractPart = MiddleBuilder.createNaryOp(

4997 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(

4999 "vector.recur.extract");

5000 }

5001 ResumePhiR->setName(IsFOR ? "scalar.recur.init" : "bc.merge.rdx");

5002 ResumePhiR->setOperand(0, ResumeFromVectorLoop);

5003 }

5004}

5005

5011 VPBuilder ScalarPHBuilder(ScalarPHVPBB);

5012 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());

5013

5014 auto IsScalableOne = [](ElementCount VF) -> bool {

5016 };

5017

5020 if (!FOR)

5021 continue;

5022

5024 "Cannot handle loops with uncountable early exits");

5025

5026

5027

5028

5029

5030

5031

5032

5033

5034

5035

5036

5037

5038

5039

5040

5041

5042

5043

5044

5045

5046

5047

5048

5049

5050

5051

5052

5053

5054

5055

5056

5057

5058

5059

5060

5061

5062

5063

5064

5065

5066

5067

5068

5069

5070

5071

5072

5073

5074

5075

5076

5077

5078

5079

5080

5081

5082

5083

5084

5085

5086

5087

5088

5089

5090

5091

5092

5093

5094

5095

5097 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {

5099 continue;

5100

5101

5102

5103

5104

5105

5108 return;

5111 "vector.recur.extract.for.phi");

5113 }

5114 }

5115}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU Register Bank Select

This file implements a class to represent arbitrary precision integral constant values and operations...

ReachingDefInfo InstSet & ToRemove

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static bool isEqual(const Function &Caller, const Function &Callee)

static const Function * getParent(const Value *V)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)

iv Induction Variable Users

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Legalize the Machine IR a function s Machine IR

static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first DebugLoc that has line number information, given a range of instructions.

This file provides utility analysis objects describing memory locations.

MachineInstr unsigned OpIdx

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

const SmallVectorImpl< MachineOperand > & Cond

This is the interface for a metadata-based scoped no-alias analysis.

This file defines generic set operations that may be used on set's of different types,...

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...

This file implements dominator tree analysis for a single level of a VPlan's H-CFG.

This file contains the declarations of different VPlan-related auxiliary helpers.

static VPValue * optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op, DenseMap< VPValue *, VPValue * > &EndValues, ScalarEvolution &SE)

Attempts to optimize the induction variable exit values for users in the exit block coming from the l...

Definition VPlanTransforms.cpp:945

static void removeCommonBlendMask(VPBlendRecipe *Blend)

Try to see if all of Blend's masks share a common value logically and'ed and remove it from the masks...

Definition VPlanTransforms.cpp:1590

static void tryToCreateAbstractReductionRecipe(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range)

This function tries to create abstract recipes from the reduction recipe for following optimizations ...

Definition VPlanTransforms.cpp:4033

static VPReplicateRecipe * findRecipeWithMinAlign(ArrayRef< VPReplicateRecipe * > Group)

Definition VPlanTransforms.cpp:4242

static bool sinkScalarOperands(VPlan &Plan)

Definition VPlanTransforms.cpp:211

static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R)

Return true if we do not know how to (mechanically) hoist or sink R out of a loop region.

Definition VPlanTransforms.cpp:195

static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)

Try to simplify the branch condition of Plan.

Definition VPlanTransforms.cpp:1886

static SmallVector< SmallVector< VPReplicateRecipe *, 4 > > collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE, const Loop *L)

Definition VPlanTransforms.cpp:4166

static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo)

Try to simplify VPSingleDefRecipe Def.

Definition VPlanTransforms.cpp:1150

static void removeRedundantInductionCasts(VPlan &Plan)

Remove redundant casts of inductions.

Definition VPlanTransforms.cpp:552

static bool tryToReplaceALMWithWideALM(VPlan &Plan, ElementCount VF, unsigned UF)

Try to replace multiple active lane masks used for control flow with a single, wide active lane mask ...

Definition VPlanTransforms.cpp:1794

static std::optional< std::pair< bool, unsigned > > getOpcodeOrIntrinsicID(const VPSingleDefRecipe *R)

Get any instruction opcode or intrinsic ID data embedded in recipe R.

Definition VPlanTransforms.cpp:1058

static VPExpressionRecipe * tryToMatchAndCreateExtendedReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range)

This function tries convert extended in-loop reductions to VPExpressionRecipe and clamp the Range if ...

Definition VPlanTransforms.cpp:3804

static VPScalarIVStepsRecipe * createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind, Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, Instruction *TruncI, VPValue *StartV, VPValue *Step, DebugLoc DL, VPBuilder &Builder)

Definition VPlanTransforms.cpp:671

static RemoveMask_match< Op0_t, Op1_t > m_RemoveMask(const Op0_t &In, Op1_t &Out)

Match a specific mask In, or a combination of it (logical-and In, Out).

Definition VPlanTransforms.cpp:2759

static VPIRMetadata getCommonMetadata(ArrayRef< VPReplicateRecipe * > Recipes)

Definition VPlanTransforms.cpp:4157

static VPValue * getPredicatedMask(VPRegionBlock *R)

If R is a region with a VPBranchOnMaskRecipe in the entry block, return the mask.

Definition VPlanTransforms.cpp:304

static bool sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)

Sink users of FOR after the recipe defining the previous value Previous of the recurrence.

Definition VPlanTransforms.cpp:2019

static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan)

Definition VPlanTransforms.cpp:336

static VPActiveLaneMaskPHIRecipe * addVPLaneMaskPhiAndUpdateExitBranch(VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck)

Definition VPlanTransforms.cpp:2584

static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R, VPTypeAnalysis &TypeInfo)

Expand a VPWidenPointerInductionRecipe into executable recipes, for the initial value,...

Definition VPlanTransforms.cpp:3548

static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL)

Replace recipes with their EVL variants.

Definition VPlanTransforms.cpp:2848

static bool canSinkStoreWithNoAliasCheck(ArrayRef< VPReplicateRecipe * > StoresToSink)

Definition VPlanTransforms.cpp:4300

static bool isDeadRecipe(VPRecipeBase &R)

Returns true if R is dead and can be removed.

Definition VPlanTransforms.cpp:624

static void legalizeAndOptimizeInductions(VPlan &Plan)

Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd (IndStart, ScalarIVSteps (0,...

Definition VPlanTransforms.cpp:751

static void addReplicateRegions(VPlan &Plan)

Definition VPlanTransforms.cpp:468

static VPValue * tryToFoldLiveIns(VPSingleDefRecipe &R, ArrayRef< VPValue * > Operands, const DataLayout &DL, VPTypeAnalysis &TypeInfo)

Try to fold R using InstSimplifyFolder.

Definition VPlanTransforms.cpp:1080

static VPValue * tryToComputeEndValueForInduction(VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC)

Compute and return the end value for WideIV, unless it is truncated.

Definition VPlanTransforms.cpp:4924

static void removeRedundantExpandSCEVRecipes(VPlan &Plan)

Remove redundant EpxandSCEVRecipes in Plan's entry block by replacing them with already existing reci...

Definition VPlanTransforms.cpp:1018

static bool simplifyKnownEVL(VPlan &Plan, ElementCount VF, PredicatedScalarEvolution &PSE)

From the definition of llvm.experimental.get.vector.length, VPInstruction::ExplicitVectorLength(AVL) ...

Definition VPlanTransforms.cpp:1971

static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, ElementCount BestVF, unsigned BestUF, ScalarEvolution &SE)

Return true if Cond is known to be true for given BestVF and BestUF.

Definition VPlanTransforms.cpp:1757

static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR, VPRecipeBase *Previous, VPDominatorTree &VPDT)

Try to hoist Previous and its operands before all users of FOR.

Definition VPlanTransforms.cpp:2074

static VPValue * scalarizeVPWidenPointerInduction(VPWidenPointerInductionRecipe *PtrIV, VPlan &Plan, VPBuilder &Builder)

Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd (IndStart,...

Definition VPlanTransforms.cpp:726

static bool canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, bool CheckReads, const SmallPtrSetImpl< VPRecipeBase * > *ExcludeRecipes=nullptr)

Definition VPlanTransforms.cpp:146

static SmallVector< VPUser * > collectUsersRecursively(VPValue *V)

Definition VPlanTransforms.cpp:710

static void recursivelyDeleteDeadRecipes(VPValue *V)

Definition VPlanTransforms.cpp:1035

static VPValue * optimizeEarlyExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo, VPBlockBase *PredVPBB, VPValue *Op, ScalarEvolution &SE)

Attempts to optimize the induction variable exit values for users in the early exit block.

Definition VPlanTransforms.cpp:890

static VPWidenInductionRecipe * getOptimizableIVOf(VPValue *VPV, ScalarEvolution &SE)

Check if VPV is an untruncated wide induction, either before or after the increment.

Definition VPlanTransforms.cpp:832

static VPRegionBlock * createReplicateRegion(VPReplicateRecipe *PredRecipe, VPlan &Plan)

Definition VPlanTransforms.cpp:425

static VPBasicBlock * getPredicatedThenBlock(VPRegionBlock *R)

If R is a triangle region, return the 'then' block of the triangle.

Definition VPlanTransforms.cpp:314

static VPValue * narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl< VPValue * > &NarrowedOps)

Definition VPlanTransforms.cpp:4711

static void simplifyBlends(VPlan &Plan)

Normalize and simplify VPBlendRecipes.

Definition VPlanTransforms.cpp:1607

static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR, ElementCount VF, VPTypeAnalysis &TypeInfo, TypeSize VectorRegWidth)

Returns true if IR is a full interleave group with factor and number of members both equal to VF.

Definition VPlanTransforms.cpp:4667

static VPRecipeBase * optimizeMaskToEVL(VPValue *HeaderMask, VPRecipeBase &CurRecipe, VPTypeAnalysis &TypeInfo, VPValue &EVL)

Try to optimize a CurRecipe masked by HeaderMask to a corresponding EVL-based recipe without the head...

Definition VPlanTransforms.cpp:2772

static bool isAlreadyNarrow(VPValue *VPV)

Returns true if VPValue is a narrow VPValue.

Definition VPlanTransforms.cpp:4701

static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF)

Optimize the width of vector induction variables in Plan based on a known constant Trip Count,...

Definition VPlanTransforms.cpp:1688

static VPExpressionRecipe * tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, VPCostContext &Ctx, VFRange &Range)

This function tries convert extended in-loop reductions to VPExpressionRecipe and clamp the Range if ...

Definition VPlanTransforms.cpp:3861

static void expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, VPTypeAnalysis &TypeInfo)

Expand a VPWidenIntOrFpInduction into executable recipes, for the initial value, phi and backedge val...

Definition VPlanTransforms.cpp:3438

static VPSingleDefRecipe * findHeaderMask(VPlan &Plan)

Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...

Definition VPlanTransforms.cpp:2662

static void removeRedundantCanonicalIVs(VPlan &Plan)

Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV recipe, if it exists.

Definition VPlanTransforms.cpp:585

static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx, VPValue *OpV, unsigned Idx)

Returns true if V is VPWidenLoadRecipe or VPInterleaveRecipe that can be converted to a narrower reci...

Definition VPlanTransforms.cpp:4651

static void narrowToSingleScalarRecipes(VPlan &Plan)

Definition VPlanTransforms.cpp:1506

This file provides utility VPlan to VPlan transformations.

This file declares the class VPlanVerifier, which contains utility functions to check the consistency...

This file contains the declarations of the Vectorization Plan base classes:

static const X86InstrFMA3Group Groups[]

static const uint32_t IV[8]

Class for arbitrary precision integers.

LLVM_ABI APInt zext(unsigned width) const

Zero extend to a new width.

unsigned getActiveBits() const

Compute the number of active bits in the value.

unsigned getBitWidth() const

Return the number of bits in the APInt.

LLVM_ABI APInt sext(unsigned width) const

Sign extend to a new width.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

const T & back() const

back - Get the last element.

const T & front() const

front - Get the first element.

LLVM Basic Block Representation.

const Function * getParent() const

Return the enclosing method, or null if none.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

This class represents a function call, abstracting a target machine's calling convention.

@ ICMP_ULT

unsigned less than

@ ICMP_ULE

unsigned less or equal

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

A parsed version of the target data layout string in and methods for querying it.

static DebugLoc getCompilerGenerated()

static DebugLoc getUnknown()

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const

dominates - Returns true iff A dominates B.

constexpr bool isVector() const

One or more elements.

static constexpr ElementCount getScalable(ScalarTy MinVal)

Utility class for floating point operations which can have information about relaxed accuracy require...

Represents flags for the getelementptr instruction/expression.

GEPNoWrapFlags withoutNoUnsignedWrap() const

static GEPNoWrapFlags none()

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

A struct for saving information about induction variables.

InductionKind

This enum represents the kinds of inductions that we support.

@ IK_PtrInduction

Pointer induction var. Step = C.

@ IK_IntInduction

Integer induction variable. Step = C.

InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

The group of interleaved loads/stores sharing the same stride and close to each other.

InstTy * getMember(uint32_t Index) const

Get the member with the given index Index.

uint32_t getNumMembers() const

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)

Test a Predicate on a Range of VF's.

Represents a single loop in the control flow graph.

LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)

Return metadata containing two branch weights.

This class implements a map that also provides access to all stored values in a deterministic order.

ValueT lookup(const KeyT &Key) const

Representation for a specific memory location.

AAMDNodes AATags

The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

LLVM_ABI const SCEV * getSCEV(Value *V)

Returns the SCEV expression of V, in the context of the current SCEV predicate.

static LLVM_ABI unsigned getOpcode(RecurKind Kind)

Returns the opcode corresponding to the RecurrenceKind.

unsigned getOpcode() const

RegionT * getParent() const

Get the parent of the Region.

This class uses information about analyze scalars to rewrite expressions in canonical form.

LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)

Insert code to directly compute the specified SCEV expression into the program.

static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)

This class represents an analyzed expression in the program.

LLVM_ABI Type * getType() const

Return the LLVM type of this SCEV expression.

The main scalar evolution driver.

const DataLayout & getDataLayout() const

Return the DataLayout associated with the module this SCEV instance is operating on.

LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

Return the SCEV object corresponding to -V.

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)

Get a canonical unsigned division expression, or something simpler if possible.

LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)

Test if the given expression is known to satisfy the condition described by Pred, LHS,...

static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)

This class represents the LLVM 'select' instruction.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Provides information about what library functions are available for the current target.

static LLVM_ABI PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I)

Get the kind of extension that an instruction represents.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

PartialReductionExtendKind

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

static constexpr TypeSize get(ScalarTy Quantity, bool Scalable)

This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...

TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)

Add a case on the given type.

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isPointerTy() const

True if this is an instance of PointerType.

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

bool isStructTy() const

True if this is an instance of StructType.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...

VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.

void appendRecipe(VPRecipeBase *Recipe)

Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.

RecipeListTy::iterator iterator

Instruction iterators...

iterator begin()

Recipe iterator methods.

iterator_range< iterator > phis()

Returns an iterator range over the PHI-like recipes in the block.

iterator getFirstNonPhi()

Return the position of the first non-phi node recipe in the block.

VPRegionBlock * getEnclosingLoopRegion()

VPBasicBlock * splitAt(iterator SplitAt)

Split current block at SplitAt by inserting a new block between the current block and its successors ...

VPRecipeBase * getTerminator()

If the block has multiple successors, return the branch recipe terminating the block.

const VPRecipeBase & back() const

A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.

VPValue * getMask(unsigned Idx) const

Return mask number Idx.

unsigned getNumIncomingValues() const

Return the number of incoming values, taking into account when normalized the first incoming value wi...

void setMask(unsigned Idx, VPValue *V)

Set mask number Idx to V.

bool isNormalized() const

A normalized blend is one that has an odd number of operands, whereby the first operand does not have...

VPBlockBase is the building block of the Hierarchical Control-Flow Graph.

VPRegionBlock * getParent()

const VPBasicBlock * getExitingBasicBlock() const

size_t getNumSuccessors() const

void swapSuccessors()

Swap successors of the block. The block must have exactly 2 successors.

size_t getNumPredecessors() const

const VPBlocksTy & getPredecessors() const

VPBlockBase * getSinglePredecessor() const

const VPBasicBlock * getEntryBasicBlock() const

VPBlockBase * getSingleHierarchicalPredecessor()

VPBlockBase * getSingleSuccessor() const

const VPBlocksTy & getSuccessors() const

static auto blocksOnly(const T &Range)

Return an iterator range over Range which only includes BlockTy blocks.

static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)

Inserts BlockPtr on the edge between From and To.

static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)

Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.

static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)

Connect VPBlockBases From and To bi-directionally.

static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)

Disconnect VPBlockBases From and To bi-directionally.

A recipe for generating conditional branches on the bits of a mask.

RAII object that stores the current insertion point and restores it when the object is destroyed.

VPlan-based builder utility analogous to IRBuilder.

VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)

VPValue * createElementCount(Type *Ty, ElementCount EC)

VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})

VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")

Convert the input value Current to the corresponding value of an induction with Start and Step values...

static VPBuilder getToInsertAfter(VPRecipeBase *R)

Create a VPBuilder to insert after R.

VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")

void setInsertPoint(VPBasicBlock *TheBB)

This specifies that created VPInstructions should be appended to the end of the specified block.

VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")

Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.

Canonical scalar induction phi of the vector loop.

unsigned getNumDefinedValues() const

Returns the number of values defined by the VPDef.

ArrayRef< VPValue * > definedValues()

Returns an ArrayRef of the values defined by the VPDef.

VPValue * getVPSingleValue()

Returns the only VPValue defined by the VPDef.

VPValue * getVPValue(unsigned I)

Returns the VPValue with index I defined by the VPDef.

A recipe for converting the input value IV value to the corresponding value of an IV with different s...

Template specialization of the standard LLVM dominator tree utility for VPBlockBases.

bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)

A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...

A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...

A special type of VPBasicBlock that wraps an existing IR basic block.

BasicBlock * getIRBasicBlock() const

Class to record and manage LLVM IR flags.

static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)

Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.

This is a concrete Recipe that models a single VPlan-level instruction.

@ ExtractLane

Extracts a single lane (first operand) from a set of vector operands.

@ ExtractPenultimateElement

@ Unpack

Extracts all lanes from its (non-scalable) vector operand.

@ FirstOrderRecurrenceSplice

@ BuildVector

Creates a fixed-width vector containing all operands.

@ BuildStructVector

Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...

@ CanonicalIVIncrementForPart

@ CalculateTripCountMinusVF

const InterleaveGroup< Instruction > * getInterleaveGroup() const

VPValue * getMask() const

Return the mask used by this recipe.

ArrayRef< VPValue * > getStoredValues() const

Return the VPValues stored by this interleave group.

A recipe for interleaved memory operations with vector-predication intrinsics.

VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...

VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...

VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.

VPRegionBlock * getRegion()

VPBasicBlock * getParent()

DebugLoc getDebugLoc() const

Returns the debug location of the recipe.

void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)

Unlink this recipe and insert into BB before I.

void insertBefore(VPRecipeBase *InsertPos)

Insert an unlinked recipe into a basic block immediately before the specified recipe.

void insertAfter(VPRecipeBase *InsertPos)

Insert an unlinked Recipe into a basic block immediately after the specified Recipe.

iplist< VPRecipeBase >::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Helper class to create VPRecipies from IR instructions.

VPRecipeBase * getRecipe(Instruction *I)

Return the recipe created for given ingredient.

A recipe to represent inloop reduction operations with vector-predication intrinsics,...

A recipe to represent inloop, ordered or partial reduction operations.

VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...

const VPBlockBase * getEntry() const

Type * getCanonicalIVType()

Return the type of the canonical IV for loop regions.

bool isReplicator() const

An indicator whether this region is to generate multiple replicated instances of output IR correspond...

void setExiting(VPBlockBase *ExitingBlock)

Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.

VPCanonicalIVPHIRecipe * getCanonicalIV()

Returns the canonical induction recipe of the region.

const VPBlockBase * getExiting() const

VPBasicBlock * getPreheaderVPBB()

Returns the pre-header VPBasicBlock of the loop region.

VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...

bool isSingleScalar() const

VPValue * getMask()

Return the mask of a predicated VPReplicateRecipe.

A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...

VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...

Instruction * getUnderlyingInstr()

Returns the underlying instruction.

VPSingleDefRecipe * clone() override=0

Clone the current recipe.

An analysis for type-inference for VPValues.

LLVMContext & getContext()

Return the LLVMContext used by the analysis.

Type * inferScalarType(const VPValue *V)

Infer the type of V. Returns the scalar type of V.

This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...

void setOperand(unsigned I, VPValue *New)

VPValue * getOperand(unsigned N) const

void addOperand(VPValue *Operand)

This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...

bool isDefinedOutsideLoopRegions() const

Returns true if the VPValue is defined outside any loop.

VPRecipeBase * getDefiningRecipe()

Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...

Value * getLiveInIRValue() const

Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.

Value * getUnderlyingValue() const

Return the underlying Value attached to this VPValue.

void setUnderlyingValue(Value *Val)

void replaceAllUsesWith(VPValue *New)

unsigned getNumUsers() const

bool isLiveIn() const

Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.

void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)

Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...

A recipe to compute a pointer to the last element of each part of a widened memory access for widened...

A Recipe for widening the canonical induction variable of the vector loop.

VPWidenCastRecipe is a recipe to create vector cast instructions.

Instruction::CastOps getOpcode() const

A recipe for handling GEP instructions.

Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...

PHINode * getPHINode() const

VPValue * getStepValue()

Returns the step value of the induction.

const InductionDescriptor & getInductionDescriptor() const

Returns the induction descriptor for the recipe.

A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...

VPValue * getLastUnrolledPartOperand()

Returns the VPValue representing the value of this induction at the last unrolled part,...

VPValue * getSplatVFValue()

A recipe for widening vector intrinsics.

A common base class for widening memory operations.

A recipe for widened phis.

VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...

VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...

bool hasVF(ElementCount VF) const

LLVMContext & getContext() const

VPBasicBlock * getEntry()

VPValue & getVectorTripCount()

The vector trip count.

bool hasScalableVF() const

VPValue & getVFxUF()

Returns VF * UF of the vector loop region.

VPValue & getVF()

Returns the VF of the vector loop region.

VPValue * getTripCount() const

The trip count of the original loop.

VPValue * getTrue()

Return a VPValue wrapping i1 true.

VPValue * getOrCreateBackedgeTakenCount()

The backedge taken count of the original loop.

VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")

Create a new replicate region with Entry, Exiting and Name.

bool hasUF(unsigned UF) const

ArrayRef< VPIRBasicBlock * > getExitBlocks() const

Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.

VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)

Return a VPValue wrapping a ConstantInt with the given type and value.

void setVF(ElementCount VF)

bool isUnrolled() const

Returns true if the VPlan already has been unrolled, i.e.

LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()

Returns the VPRegionBlock of the vector loop.

void resetTripCount(VPValue *NewTripCount)

Resets the trip count for the VPlan.

VPBasicBlock * getMiddleBlock()

Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...

VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)

Create a new VPBasicBlock with Name and containing Recipe if present.

VPValue * getFalse()

Return a VPValue wrapping i1 false.

VPValue * getOrAddLiveIn(Value *V)

Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.

bool hasScalarVFOnly() const

VPBasicBlock * getScalarPreheader() const

Return the VPBasicBlock for the preheader of the scalar loop.

ArrayRef< VPValue * > getLiveIns() const

Return the list of live-in VPValues available in the VPlan.

VPIRBasicBlock * getScalarHeader() const

Return the VPIRBasicBlock wrapping the header of the scalar loop.

VPValue * getLiveIn(Value *V) const

Return the live-in VPValue for V, if there is one or nullptr otherwise.

VPBasicBlock * getVectorPreheader()

Returns the preheader of the vector loop region, if one exists, or null otherwise.

bool hasScalarTail() const

Returns true if the scalar tail may execute after the vector loop.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

iterator_range< user_iterator > users()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const

constexpr bool isFixed() const

Returns true if the quantity is not scaled by vscale.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)

Return A unsign-divided by B, rounded by the given rounding mode.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)

Matches a register not-ed by a G_XOR.

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)

Matches MaskedStore Intrinsic.

ap_match< APInt > m_APInt(const APInt *&Res)

Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.

CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)

Matches Trunc.

LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)

Matches L && R either in the form of L & R or L ?

match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)

Matches MaskedLoad Intrinsic.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

deferredval_ty< Value > m_Deferred(Value *const &V)

Like m_Specific(), but works if the specific value to match is determined as part of the same match()...

SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

auto m_LogicalAnd()

Matches L && R where L and R are arbitrary values.

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)

Matches a Mul with LHS and RHS in either order.

MatchFunctor< Val, Pattern > match_fn(const Pattern &P)

A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.

BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)

AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)

Match a binary AND operation.

AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)

Match a binary OR operation.

VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()

AllRecipe_commutative_match< Opcode, Op0_t, Op1_t > m_c_Binary(const Op0_t &Op0, const Op1_t &Op1)

AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)

GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)

AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)

VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)

VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)

specific_intval< 1 > m_False()

VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)

VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)

VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)

VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()

specific_intval< 1 > m_True()

VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)

VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)

VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)

class_match< VPValue > m_VPValue()

Match an arbitrary VPValue and ignore it.

VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)

VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()

BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...

VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)

VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)

bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)

Match a VPInstruction, capturing if we match.

VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()

VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)

NodeAddr< DefNode * > Def

bool isSingleScalar(const VPValue *VPV)

Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...

bool isUniformAcrossVFsAndUFs(VPValue *V)

Checks if V is uniform across all VF lanes and UF parts.

VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)

Get or create a VPValue that corresponds to the expansion of Expr.

std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)

Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...

bool onlyFirstLaneUsed(const VPValue *Def)

Returns true if only the first lane of Def is used.

VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)

Extracts and returns NoWrap and FastMath flags from the induction binop in ID.

bool onlyScalarValuesUsed(const VPValue *Def)

Returns true if only scalar values of Def are used by all users.

bool isHeaderMask(const VPValue *V, const VPlan &Plan)

Return true if V is a header mask in Plan.

const SCEV * getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, const Loop *L=nullptr)

Return the SCEV expression for V.

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

auto min_element(R &&Range)

Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

constexpr from_range_t from_range

auto dyn_cast_if_present(const Y &Val)

dyn_cast_if_present - Functionally identical to dyn_cast, except that a null (or none in the case ...

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

auto cast_or_null(const Y &Val)

iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order.

iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...

detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)

Returns a concatenated range across two or more ranges.

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

auto reverse(ContainerTy &&C)

iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)

Returns an iterator range to traverse the graph starting at G in post order while traversing through ...

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)

Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...

bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)

Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

auto drop_end(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the last N elements excluded.

RecurKind

These are the kinds of recurrences that we support.

@ Mul

Product of integers.

@ Sub

Subtraction of integers.

@ AddChainWithSubs

A chain of adds and subs.

FunctionAddr VTableAddr Next

auto count(R &&Range, const E &Element)

Wrapper function around std::count to count the number of times an element Element occurs in the give...

DWARFExpression::Operation Op

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

@ DataAndControlFlowWithoutRuntimeCheck

Use predicate to control both data and control flow, but modify the trip count so that a runtime over...

hash_code hash_combine(const Ts &...args)

Combine values into a single hash_code.

bool equal(L &&LRange, R &&RRange)

Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.

Type * toVectorTy(Type *Scalar, ElementCount EC)

A helper function for converting Scalar types to vector types.

@ Default

The result values are uniform if and only if all operands are uniform.

constexpr detail::IsaCheckPredicate< Types... > IsaPred

Function object wrapper for the llvm::isa type check.

hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)

Compute a hash_code for a sequence of values.

RemoveMask_match(const Op0_t &In, Op1_t &Out)

Definition VPlanTransforms.cpp:2745

bool match(OpTy *V) const

Definition VPlanTransforms.cpp:2747

Op0_t In

Definition VPlanTransforms.cpp:2742

Op1_t & Out

Definition VPlanTransforms.cpp:2743

A collection of metadata nodes that might be associated with a memory access used by the alias-analys...

MDNode * Scope

The tag for alias scope specification (used with noalias).

MDNode * NoAlias

The tag specifying the noalias scope.

This struct is a compact representation of a valid (non-zero power of two) alignment.

An information struct used to provide DenseMap with the various necessary components for a given valu...

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

A range of powers-of-2 vectorization factors with fixed start and adjustable end.

Struct to hold various analysis needed for cost computations.

A recipe for handling first-order recurrence phis.

A recipe for widening load operations with vector-predication intrinsics, using the address to load f...

A recipe for widening load operations, using the address to load from and an optional mask.

A recipe for widening select instructions.

A recipe for widening store operations with vector-predication intrinsics, using the value to store,...

A recipe for widening store operations, using the stored value, the address to store to and an option...

static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, const Loop *L)

Hoist predicated loads from the same address to the loop entry block, if they are guaranteed to execu...

Definition VPlanTransforms.cpp:4249

static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, const Loop *L)

Sink predicated stores to the same address with complementary predicates (P and NOT P) to an uncondit...

Definition VPlanTransforms.cpp:4316

static void materializeBroadcasts(VPlan &Plan)

Add explicit broadcasts for live-ins and VPValues defined in Plan's entry block if they are used as v...

Definition VPlanTransforms.cpp:4062

static void materializePacksAndUnpacks(VPlan &Plan)

Add explicit Build[Struct]Vector recipes to Pack multiple scalar values into vectors and Unpack recip...

Definition VPlanTransforms.cpp:4413

static void materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH)

Materialize the backedge-taken count to be computed explicitly using VPInstructions.

Definition VPlanTransforms.cpp:4399

static void optimizeInductionExitUsers(VPlan &Plan, DenseMap< VPValue *, VPValue * > &EndValues, ScalarEvolution &SE)

If there's a single exit block, optimize its phi recipes that use exiting IV values by feeding them p...

Definition VPlanTransforms.cpp:991

static void hoistInvariantLoads(VPlan &Plan)

Hoist single-scalar loads with invariant addresses out of the vector loop to the preheader,...

Definition VPlanTransforms.cpp:4107

static void canonicalizeEVLLoops(VPlan &Plan)

Transform EVL loops to use variable-length stepping after region dissolution.

Definition VPlanTransforms.cpp:3079

static void dropPoisonGeneratingRecipes(VPlan &Plan, const std::function< bool(BasicBlock *)> &BlockNeedsPredication)

Drop poison flags from recipes that may generate a poison value that is used after vectorization,...

Definition VPlanTransforms.cpp:3215

static void createAndOptimizeReplicateRegions(VPlan &Plan)

Wrap predicated VPReplicateRecipes with a mask operand in an if-then region block and remove the mask...

Definition VPlanTransforms.cpp:534

static void createInterleaveGroups(VPlan &Plan, const SmallPtrSetImpl< const InterleaveGroup< Instruction > * > &InterleaveGroups, VPRecipeBuilder &RecipeBuilder, const bool &ScalarEpilogueAllowed)

Definition VPlanTransforms.cpp:3312

static bool runPass(bool(*Transform)(VPlan &, ArgsTy...), VPlan &Plan, typename std::remove_reference< ArgsTy >::type &...Args)

Helper to run a VPlan transform Transform on VPlan, forwarding extra arguments to the transform.

static void addBranchWeightToMiddleTerminator(VPlan &Plan, ElementCount VF, std::optional< unsigned > VScaleForTuning)

Add branch weight metadata, if the Plan's middle block is terminated by a BranchOnCond recipe.

Definition VPlanTransforms.cpp:4899

static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF, TypeSize VectorRegWidth)

Try to convert a plan with interleave groups with VF elements to a plan with the interleave groups re...

Definition VPlanTransforms.cpp:4761

static DenseMap< const SCEV *, Value * > expandSCEVs(VPlan &Plan, ScalarEvolution &SE)

Expand VPExpandSCEVRecipes in Plan's entry block.

Definition VPlanTransforms.cpp:4602

static void convertToConcreteRecipes(VPlan &Plan)

Lower abstract recipes to concrete ones, that can be codegen'd.

Definition VPlanTransforms.cpp:3602

static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, VFRange &Range)

This function converts initial recipes to the abstract recipes and clamps Range based on cost model f...

Definition VPlanTransforms.cpp:4051

static void materializeConstantVectorTripCount(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)

Definition VPlanTransforms.cpp:4369

static LLVM_ABI_FOR_TEST bool tryToConvertVPInstructionsToVPRecipes(VPlan &Plan, function_ref< const InductionDescriptor *(PHINode *)> GetIntOrFpInductionDescriptor, const TargetLibraryInfo &TLI)

Replaces the VPInstructions in Plan with corresponding widen recipes.

Definition VPlanTransforms.cpp:48

static void addExitUsersForFirstOrderRecurrences(VPlan &Plan, VFRange &Range)

Handle users in the exit block for first order reductions in the original exit block.

Definition VPlanTransforms.cpp:5006

static void addExplicitVectorLength(VPlan &Plan, const std::optional< unsigned > &MaxEVLSafeElements)

Add a VPEVLBasedIVPHIRecipe and related recipes to Plan and replaces all uses except the canonical IV...

Definition VPlanTransforms.cpp:3017

static void replaceSymbolicStrides(VPlan &Plan, PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &StridesMap)

Replace symbolic strides from StridesMap in Plan with constants when possible.

Definition VPlanTransforms.cpp:3159

static void removeBranchOnConst(VPlan &Plan)

Remove BranchOnCond recipes with true or false conditions together with removing dead edges to their ...

Definition VPlanTransforms.cpp:2497

static void removeDeadRecipes(VPlan &Plan)

Remove dead recipes from Plan.

Definition VPlanTransforms.cpp:641

static void materializeVectorTripCount(VPlan &Plan, VPBasicBlock *VectorPHVPBB, bool TailByMasking, bool RequiresScalarEpilogue)

Materialize vector trip count computations to a set of VPInstructions.

Definition VPlanTransforms.cpp:4506

static void simplifyRecipes(VPlan &Plan)

Perform instcombine-like simplifications on recipes in Plan.

Definition VPlanTransforms.cpp:1495

static void handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB, VPBasicBlock *EarlyExitVPBB, VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB)

Update Plan to account for the uncountable early exit from EarlyExitingVPBB to EarlyExitVPBB by.

Definition VPlanTransforms.cpp:3715

static void clearReductionWrapFlags(VPlan &Plan)

Clear NSW/NUW flags from reduction instructions if necessary.

Definition VPlanTransforms.cpp:2244

static void cse(VPlan &Plan)

Perform common-subexpression-elimination on Plan.

Definition VPlanTransforms.cpp:2353

static void addActiveLaneMask(VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck)

Replace (ICMP_ULE, wide canonical IV, backedge-taken-count) checks with an (active-lane-mask recipe,...

Definition VPlanTransforms.cpp:2704

static LLVM_ABI_FOR_TEST void optimize(VPlan &Plan)

Apply VPlan-to-VPlan optimizations to Plan, including induction recipe optimizations,...

Definition VPlanTransforms.cpp:2531

static void dissolveLoopRegions(VPlan &Plan)

Replace loop regions with explicit CFG.

Definition VPlanTransforms.cpp:3590

static void truncateToMinimalBitwidths(VPlan &Plan, const MapVector< Instruction *, uint64_t > &MinBWs)

Insert truncates and extends for any truncated recipe.

Definition VPlanTransforms.cpp:2404

static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder)

Try to have all users of fixed-order recurrences appear after the recipe defining their previous valu...

Definition VPlanTransforms.cpp:2170

static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE)

Optimize Plan based on BestVF and BestUF.

Definition VPlanTransforms.cpp:1998

static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH, ElementCount VF)

Materialize VF and VFxUF to be computed explicitly using VPInstructions.

Definition VPlanTransforms.cpp:4566

static void updateScalarResumePhis(VPlan &Plan, DenseMap< VPValue *, VPValue * > &IVEndValues)

Update the resume phis in the scalar preheader after creating wide recipes for first-order recurrence...

Definition VPlanTransforms.cpp:4956