LLVM: lib/Transforms/Vectorize/LoopIdiomVectorize.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

77

78using namespace llvm;

80

81#define DEBUG_TYPE "loop-idiom-vectorize"

82

85 cl::desc("Disable Loop Idiom Vectorize Pass."));

86

89 cl::desc("The vectorization style for loop idiom transform."),

91 "Use masked vector intrinsics"),

93 "predicated", "Use VP intrinsics")),

95

99 cl::desc("Proceed with Loop Idiom Vectorize Pass, but do "

100 "not convert byte-compare loop(s)."));

101

104 cl::desc("The vectorization factor for byte-compare patterns."),

106

110 cl::desc("Do not convert find-first-byte loop(s)."));

111

114 cl::desc("Verify loops generated Loop Idiom Vectorize Pass."));

115

116namespace {

117class LoopIdiomVectorize {

119 unsigned ByteCompareVF;

120 Loop *CurLoop = nullptr;

125

126

128 BasicBlock *VectorLoopPreheaderBlock = nullptr;

129 BasicBlock *VectorLoopStartBlock = nullptr;

130 BasicBlock *VectorLoopMismatchBlock = nullptr;

131 BasicBlock *VectorLoopIncBlock = nullptr;

132

133public:

137 : VectorizeStyle(S), ByteCompareVF(VF), DT(DT), LI(LI), TTI(TTI), DL(DL) {

138 }

139

140 bool run(Loop *L);

141

142private:

143

144

145

146 bool runOnCountableLoop();

147 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,

148 SmallVectorImpl<BasicBlock *> &ExitBlocks);

149

150 bool recognizeByteCompare();

151

152 Value *expandFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,

153 GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,

154 Instruction *Index, Value *Start, Value *MaxLen);

155

156 Value *createMaskedFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,

157 GetElementPtrInst *GEPA,

158 GetElementPtrInst *GEPB, Value *ExtStart,

160 Value *createPredicatedFindMismatch(IRBuilder<> &Builder, DomTreeUpdater &DTU,

161 GetElementPtrInst *GEPA,

162 GetElementPtrInst *GEPB, Value *ExtStart,

164

165 void transformByteCompare(GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,

166 PHINode *IndPhi, Value *MaxLen, Instruction *Index,

167 Value *Start, bool IncIdx, BasicBlock *FoundBB,

168 BasicBlock *EndBB);

169

170 bool recognizeFindFirstByte();

171

172 Value *expandFindFirstByte(IRBuilder<> &Builder, DomTreeUpdater &DTU,

173 unsigned VF, Type *CharTy, Value *IndPhi,

174 BasicBlock *ExitSucc, BasicBlock *ExitFail,

175 Value *SearchStart, Value *SearchEnd,

176 Value *NeedleStart, Value *NeedleEnd);

177

178 void transformFindFirstByte(PHINode *IndPhi, unsigned VF, Type *CharTy,

179 BasicBlock *ExitSucc, BasicBlock *ExitFail,

180 Value *SearchStart, Value *SearchEnd,

181 Value *NeedleStart, Value *NeedleEnd);

182

183};

184}

185

191

192 const auto *DL = &L.getHeader()->getDataLayout();

193

197

198 unsigned BCVF = ByteCompareVF;

199 if (ByteCmpVF.getNumOccurrences())

201

202 LoopIdiomVectorize LIV(VecStyle, BCVF, &AR.DT, &AR.LI, &AR.TTI, DL);

203 if (!LIV.run(&L))

205

207}

208

209

210

211

212

213

214

215bool LoopIdiomVectorize::run(Loop *L) {

216 CurLoop = L;

217

218 Function &F = *L->getHeader()->getParent();

220 return false;

221

222 if (F.hasFnAttribute(Attribute::NoImplicitFloat)) {

224 << " due to its NoImplicitFloat attribute");

225 return false;

226 }

227

228

229

230 if (!L->getLoopPreheader())

231 return false;

232

234 << CurLoop->getHeader()->getName() << "\n");

235

236 if (recognizeByteCompare())

237 return true;

238

239 if (recognizeFindFirstByte())

240 return true;

241

242 return false;

243}

244

248

249

250 bool ResPhi = false;

251 for (Value *Op : PN.incoming_values())

252 if (Op == ScalarRes) {

253 ResPhi = true;

254 break;

255 }

256

257

258

259 if (ResPhi)

260 PN.addIncoming(VectorRes, IncBB);

261 else {

262

263

264

265

266

268 if (L->contains(BB)) {

269 PN.addIncoming(PN.getIncomingValueForBlock(BB), IncBB);

270 break;

271 }

272 }

273 }

274}

275

276bool LoopIdiomVectorize::recognizeByteCompare() {

277

278

279

280

281

282

283 if (TTI->supportsScalableVectors() || TTI->getMinPageSize().has_value() ||

285 return false;

286

287 BasicBlock *Header = CurLoop->getHeader();

288

289

290

291 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 2)

292 return false;

293

296 return false;

297

298 auto LoopBlocks = CurLoop->getBlocks();

299

300

301

302

303

304

305

306

307 if (LoopBlocks[0]->sizeWithoutDebug() > 4)

308 return false;

309

310

311

312

313

314

315

316

317

318

319

320

321 if (LoopBlocks[1]->sizeWithoutDebug() > 7)

322 return false;

323

324

325 Value *StartIdx = nullptr;

330 } else {

333 }

334

335

336 if (!Index || Index->getType()->isIntegerTy(32) ||

338 return false;

339

340

341

342

345 if (&I != PN && &I != Index)

348 return false;

349

350

353 if (match(Header->getTerminator(),

357 !CurLoop->contains(WhileBB))

358 return false;

359

360

361

364 Value *LoadA, *LoadB;

369 !CurLoop->contains(TrueBB))

370 return false;

371

374 return false;

375

379 return false;

380

383

384 if (!GEPA || !GEPB)

385 return false;

386

389

390

391 if (!CurLoop->isLoopInvariant(PtrA) || !CurLoop->isLoopInvariant(PtrB) ||

396 return false;

397

398

400 return false;

401

405 return false;

406

407

408

410 return false;

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430 if (FoundBB == EndBB) {

432 Value *WhileCondVal = EndPN.getIncomingValueForBlock(Header);

433 Value *WhileBodyVal = EndPN.getIncomingValueForBlock(WhileBB);

434

435

436

437

438

439 if (WhileCondVal != WhileBodyVal &&

440 ((WhileCondVal != Index && WhileCondVal != MaxLen) ||

441 (WhileBodyVal != Index)))

442 return false;

443 }

444 }

445

447 << *(EndBB->getParent()) << "\n\n");

448

449

450

451 transformByteCompare(GEPA, GEPB, PN, MaxLen, Index, StartIdx, true,

452 FoundBB, EndBB);

453 return true;

454}

455

456Value *LoopIdiomVectorize::createMaskedFindMismatch(

464

467

469 Intrinsic::get_active_lane_mask, {PredVTy, I64Type}, {ExtStart, ExtEnd});

470

472 VecLen =

473 Builder.CreateMul(VecLen, ConstantInt::get(I64Type, ByteCompareVF), "",

474 true, true);

475

478

480 Builder.Insert(JumpToVectorLoop);

481

483 VectorLoopStartBlock}});

484

485

486

488 PHINode *LoopPred = Builder.CreatePHI(PredVTy, 2, "mismatch_vec_loop_pred");

489 LoopPred->addIncoming(InitialPred, VectorLoopPreheaderBlock);

490 PHINode *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vec_index");

491 VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock);

492 Type *VectorLoadType =

495

496 Value *VectorLhsGep =

497 Builder.CreateGEP(LoadType, PtrA, VectorIndexPhi, "", GEPA->isInBounds());

499 Align(1), LoopPred, Passthru);

500

501 Value *VectorRhsGep =

502 Builder.CreateGEP(LoadType, PtrB, VectorIndexPhi, "", GEPB->isInBounds());

504 Align(1), LoopPred, Passthru);

505

506 Value *VectorMatchCmp = Builder.CreateICmpNE(VectorLhsLoad, VectorRhsLoad);

507 VectorMatchCmp = Builder.CreateSelect(LoopPred, VectorMatchCmp, PFalse);

508 Value *VectorMatchHasActiveLanes = Builder.CreateOrReduce(VectorMatchCmp);

510 VectorLoopMismatchBlock, VectorLoopIncBlock, VectorMatchHasActiveLanes);

511 Builder.Insert(VectorEarlyExit);

512

516

517

518

519

521 Value *NewVectorIndexPhi =

522 Builder.CreateAdd(VectorIndexPhi, VecLen, "",

523 true, true);

524 VectorIndexPhi->addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);

527 {PredVTy, I64Type}, {NewVectorIndexPhi, ExtEnd});

528 LoopPred->addIncoming(NewPred, VectorLoopIncBlock);

529

530 Value *PredHasActiveLanes =

533 BranchInst::Create(VectorLoopStartBlock, EndBlock, PredHasActiveLanes);

534 Builder.Insert(VectorLoopBranchBack);

535

539

540

541

543 PHINode *FoundPred = Builder.CreatePHI(PredVTy, 1, "mismatch_vec_found_pred");

544 FoundPred->addIncoming(VectorMatchCmp, VectorLoopStartBlock);

546 Builder.CreatePHI(PredVTy, 1, "mismatch_vec_last_loop_pred");

547 LastLoopPred->addIncoming(LoopPred, VectorLoopStartBlock);

548 PHINode *VectorFoundIndex =

549 Builder.CreatePHI(I64Type, 1, "mismatch_vec_found_index");

550 VectorFoundIndex->addIncoming(VectorIndexPhi, VectorLoopStartBlock);

551

552 Value *PredMatchCmp = Builder.CreateAnd(LastLoopPred, FoundPred);

554 Ctz = Builder.CreateZExt(Ctz, I64Type);

555 Value *VectorLoopRes64 = Builder.CreateAdd(VectorFoundIndex, Ctz, "",

556 true, true);

557 return Builder.CreateTrunc(VectorLoopRes64, ResType);

558}

559

560Value *LoopIdiomVectorize::createPredicatedFindMismatch(

565 Type *ResType = I32Type;

569

571 Builder.Insert(JumpToVectorLoop);

572

574 VectorLoopStartBlock}});

575

576

577

579 auto *VectorIndexPhi = Builder.CreatePHI(I64Type, 2, "mismatch_vector_index");

580 VectorIndexPhi->addIncoming(ExtStart, VectorLoopPreheaderBlock);

581

582

583 Value *AVL = Builder.CreateSub(ExtEnd, VectorIndexPhi, "avl", true,

584 true);

585

587 auto *VF = ConstantInt::get(I32Type, ByteCompareVF);

588

589 Value *VL = Builder.CreateIntrinsic(Intrinsic::experimental_get_vector_length,

590 {I64Type}, {AVL, VF, Builder.getTrue()});

591 Value *GepOffset = VectorIndexPhi;

592

593 Value *VectorLhsGep =

599 Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->getType()},

600 {VectorLhsGep, AllTrueMask, VL}, nullptr, "lhs.load");

601

602 Value *VectorRhsGep =

605 Intrinsic::vp_load, {VectorLoadType, VectorLhsGep->getType()},

606 {VectorRhsGep, AllTrueMask, VL}, nullptr, "rhs.load");

607

608 Value *VectorMatchCmp =

609 Builder.CreateICmpNE(VectorLhsLoad, VectorRhsLoad, "mismatch.cmp");

611 Intrinsic::vp_cttz_elts, {ResType, VectorMatchCmp->getType()},

612 {VectorMatchCmp, Builder.getInt1(false), AllTrueMask,

613 VL});

616 VectorLoopIncBlock, MismatchFound);

617 Builder.Insert(VectorEarlyExit);

618

622

623

624

625

628 Value *NewVectorIndexPhi =

629 Builder.CreateAdd(VectorIndexPhi, VL64, "",

630 true, true);

631 VectorIndexPhi->addIncoming(NewVectorIndexPhi, VectorLoopIncBlock);

632 Value *ExitCond = Builder.CreateICmpNE(NewVectorIndexPhi, ExtEnd);

633 auto *VectorLoopBranchBack =

635 Builder.Insert(VectorLoopBranchBack);

636

640

641

642

644

645

646 auto *CTZLCSSAPhi = Builder.CreatePHI(CTZ->getType(), 1, "ctz");

647 CTZLCSSAPhi->addIncoming(CTZ, VectorLoopStartBlock);

648 auto *VectorIndexLCSSAPhi =

649 Builder.CreatePHI(VectorIndexPhi->getType(), 1, "mismatch_vector_index");

650 VectorIndexLCSSAPhi->addIncoming(VectorIndexPhi, VectorLoopStartBlock);

651

652 Value *CTZI64 = Builder.CreateZExt(CTZLCSSAPhi, I64Type);

653 Value *VectorLoopRes64 = Builder.CreateAdd(VectorIndexLCSSAPhi, CTZI64, "",

654 true, true);

655 return Builder.CreateTrunc(VectorLoopRes64, ResType);

656}

657

658Value *LoopIdiomVectorize::expandFindMismatch(

663

664

665 BasicBlock *Preheader = CurLoop->getLoopPreheader();

670

671

672 EndBlock = SplitBlock(Preheader, PHBranch, DT, LI, nullptr, "mismatch_end");

673

674

675

676

677

678

679

680

681

682

683

684

685

686

688 Ctx, "mismatch_min_it_check", EndBlock->getParent(), EndBlock);

689

690

692

694 Ctx, "mismatch_mem_check", EndBlock->getParent(), EndBlock);

695

697 Ctx, "mismatch_vec_loop_preheader", EndBlock->getParent(), EndBlock);

698

700 EndBlock->getParent(), EndBlock);

701

702 VectorLoopIncBlock = BasicBlock::Create(Ctx, "mismatch_vec_loop_inc",

703 EndBlock->getParent(), EndBlock);

704

705 VectorLoopMismatchBlock = BasicBlock::Create(Ctx, "mismatch_vec_loop_found",

706 EndBlock->getParent(), EndBlock);

707

709 Ctx, "mismatch_loop_pre", EndBlock->getParent(), EndBlock);

710

712 BasicBlock::Create(Ctx, "mismatch_loop", EndBlock->getParent(), EndBlock);

713

715 Ctx, "mismatch_loop_inc", EndBlock->getParent(), EndBlock);

716

719

720

721 auto VectorLoop = LI->AllocateLoop();

722 auto ScalarLoop = LI->AllocateLoop();

723

724 if (CurLoop->getParentLoop()) {

725 CurLoop->getParentLoop()->addBasicBlockToLoop(MinItCheckBlock, *LI);

726 CurLoop->getParentLoop()->addBasicBlockToLoop(MemCheckBlock, *LI);

727 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopPreheaderBlock,

728 *LI);

729 CurLoop->getParentLoop()->addChildLoop(VectorLoop);

730 CurLoop->getParentLoop()->addBasicBlockToLoop(VectorLoopMismatchBlock, *LI);

731 CurLoop->getParentLoop()->addBasicBlockToLoop(LoopPreHeaderBlock, *LI);

732 CurLoop->getParentLoop()->addChildLoop(ScalarLoop);

733 } else {

734 LI->addTopLevelLoop(VectorLoop);

735 LI->addTopLevelLoop(ScalarLoop);

736 }

737

738

739 VectorLoop->addBasicBlockToLoop(VectorLoopStartBlock, *LI);

740 VectorLoop->addBasicBlockToLoop(VectorLoopIncBlock, *LI);

741

742 ScalarLoop->addBasicBlockToLoop(LoopStartBlock, *LI);

743 ScalarLoop->addBasicBlockToLoop(LoopIncBlock, *LI);

744

745

747

748

752

753

758 LLVMContext::MD_prof,

760 Builder.Insert(MinItCheckBr);

761

765

766

767

769

770

771

772

773

774

775

776

777

778

779

780

781

782 Value *LhsStartGEP = Builder.CreateGEP(LoadType, PtrA, ExtStart);

783 Value *RhsStartGEP = Builder.CreateGEP(LoadType, PtrB, ExtStart);

786 Value *LhsEndGEP = Builder.CreateGEP(LoadType, PtrA, ExtEnd);

787 Value *RhsEndGEP = Builder.CreateGEP(LoadType, PtrB, ExtEnd);

790

791 const uint64_t MinPageSize = TTI->getMinPageSize().value();

793 Value *LhsStartPage = Builder.CreateLShr(LhsStart, AddrShiftAmt);

794 Value *LhsEndPage = Builder.CreateLShr(LhsEnd, AddrShiftAmt);

795 Value *RhsStartPage = Builder.CreateLShr(RhsStart, AddrShiftAmt);

796 Value *RhsEndPage = Builder.CreateLShr(RhsEnd, AddrShiftAmt);

797 Value *LhsPageCmp = Builder.CreateICmpNE(LhsStartPage, LhsEndPage);

798 Value *RhsPageCmp = Builder.CreateICmpNE(RhsStartPage, RhsEndPage);

799

800 Value *CombinedPageCmp = Builder.CreateOr(LhsPageCmp, RhsPageCmp);

802 LoopPreHeaderBlock, VectorLoopPreheaderBlock, CombinedPageCmp);

806 Builder.Insert(CombinedPageCmpCmpBr);

807

811

812

813

814

816

817

818

819

820

821

822 Value *VectorLoopRes = nullptr;

823 switch (VectorizeStyle) {

825 VectorLoopRes =

826 createMaskedFindMismatch(Builder, DTU, GEPA, GEPB, ExtStart, ExtEnd);

827 break;

829 VectorLoopRes = createPredicatedFindMismatch(Builder, DTU, GEPA, GEPB,

830 ExtStart, ExtEnd);

831 break;

832 }

833

835

838

839

842

845

847 PHINode *IndexPhi = Builder.CreatePHI(ResType, 2, "mismatch_index");

848 IndexPhi->addIncoming(Start, LoopPreHeaderBlock);

849

850

851

852 Value *GepOffset = Builder.CreateZExt(IndexPhi, I64Type);

853

857

861

863

865 Builder.Insert(MatchCmpBr);

866

869

870

872 Value *PhiInc = Builder.CreateAdd(IndexPhi, ConstantInt::get(ResType, 1), "",

873 Index->hasNoUnsignedWrap(),

874 Index->hasNoSignedWrap());

875 IndexPhi->addIncoming(PhiInc, LoopIncBlock);

878 Builder.Insert(IVCmpBr);

879

882

883

884

885

886

887

888

889

890 Builder.SetInsertPoint(EndBlock, EndBlock->getFirstInsertionPt());

891 PHINode *ResPhi = Builder.CreatePHI(ResType, 4, "mismatch_result");

892 ResPhi->addIncoming(MaxLen, LoopIncBlock);

893 ResPhi->addIncoming(IndexPhi, LoopStartBlock);

894 ResPhi->addIncoming(MaxLen, VectorLoopIncBlock);

895 ResPhi->addIncoming(VectorLoopRes, VectorLoopMismatchBlock);

896

898

900 ScalarLoop->verifyLoop();

901 VectorLoop->verifyLoop();

902 if (!VectorLoop->isRecursivelyLCSSAForm(*DT, *LI))

904 if (!ScalarLoop->isRecursivelyLCSSAForm(*DT, *LI))

906 }

907

908 return FinalRes;

909}

910

911void LoopIdiomVectorize::transformByteCompare(GetElementPtrInst *GEPA,

917

918

919 BasicBlock *Preheader = CurLoop->getLoopPreheader();

920 BasicBlock *Header = CurLoop->getHeader();

923 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

925

926

927 if (IncIdx)

929

930 Value *ByteCmpRes =

931 expandFindMismatch(Builder, DTU, GEPA, GEPB, Index, Start, MaxLen);

932

933

934

935 assert(IndPhi->hasOneUse() && "Index phi node has more than one use!");

936 Index->replaceAllUsesWith(ByteCmpRes);

937

939 "Expected preheader to terminate with an unconditional branch.");

940

941

942

945 CmpBB->moveBefore(EndBB);

946

947

948

951

954

955

956

958 if (FoundBB != EndBB) {

960 Builder.CreateCondBr(FoundCmp, EndBB, FoundBB);

963

964 } else {

967 }

968

969

970 fixSuccessorPhis(CurLoop, ByteCmpRes, ByteCmpRes, EndBB, CmpBB);

971 if (EndBB != FoundBB)

972 fixSuccessorPhis(CurLoop, ByteCmpRes, ByteCmpRes, FoundBB, CmpBB);

973

974

975

976 if (!CurLoop->isOutermost())

977 CurLoop->getParentLoop()->addBasicBlockToLoop(CmpBB, *LI);

978

979 if (VerifyLoops && CurLoop->getParentLoop()) {

980 CurLoop->getParentLoop()->verifyLoop();

981 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))

983 }

984}

985

986bool LoopIdiomVectorize::recognizeFindFirstByte() {

987

988

989

990

991 if (TTI->supportsScalableVectors() || TTI->getMinPageSize().has_value() ||

993 return false;

994

995

996 BasicBlock *Header = CurLoop->getHeader();

998

999

1000

1001

1002 if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 4 ||

1003 CurLoop->getSubLoops().size() != 1)

1004 return false;

1005

1006 auto *InnerLoop = CurLoop->getSubLoops().front();

1009 return false;

1010

1011

1012 auto LoopBlocks = CurLoop->getBlocks();

1013 if (LoopBlocks[0]->sizeWithoutDebug() > 3 ||

1014 LoopBlocks[1]->sizeWithoutDebug() > 4 ||

1015 LoopBlocks[2]->sizeWithoutDebug() > 3 ||

1016 LoopBlocks[3]->sizeWithoutDebug() > 3)

1017 return false;

1018

1019

1022 if (&I != IndPhi)

1025 return false;

1026

1027

1028

1029

1030

1031

1032

1033

1036 !InnerLoop->contains(MatchBB))

1037 return false;

1038

1039

1040

1041

1042

1043

1044

1045

1046

1048 Value *LoadSearch, *LoadNeedle;

1053 MatchPred != ICmpInst::ICMP_EQ || !InnerLoop->contains(InnerBB))

1054 return false;

1055

1056

1060 if (!PN || PN->getParent() != ExitSucc)

1061 return false;

1062 }

1063

1064

1065 Value *Search, *Needle;

1070 return false;

1071

1072

1075 return false;

1076

1077

1078

1079

1085 Args);

1087 return false;

1088

1089

1094 return false;

1095

1096

1097

1098 if (InnerLoop->contains(PSearch))

1100 if (PSearch != &Header->front() || PNeedle != &MatchBB->front())

1101 return false;

1102

1103

1107 std::swap(SearchStart, SearchIndex);

1108

1112 std::swap(NeedleStart, NeedleIndex);

1113

1114

1117 return false;

1118

1119

1124 return false;

1125

1126

1127

1128

1129

1130

1131

1133 Value *NeedleEnd;

1138 !CurLoop->contains(OuterBB))

1139 return false;

1140

1141

1142

1143

1144

1145

1146

1148 Value *SearchEnd;

1153 return false;

1154

1155 if (!CurLoop->isLoopInvariant(SearchStart) ||

1156 !CurLoop->isLoopInvariant(SearchEnd) ||

1157 !CurLoop->isLoopInvariant(NeedleStart) ||

1158 !CurLoop->isLoopInvariant(NeedleEnd))

1159 return false;

1160

1161 LLVM_DEBUG(dbgs() << "Found idiom in loop: \n" << *CurLoop << "\n\n");

1162

1163 transformFindFirstByte(IndPhi, VF, CharTy, ExitSucc, ExitFail, SearchStart,

1164 SearchEnd, NeedleStart, NeedleEnd);

1165 return true;

1166}

1167

1168Value *LoopIdiomVectorize::expandFindFirstByte(

1171 Value *SearchStart, Value *SearchEnd, Value *NeedleStart,

1172 Value *NeedleEnd) {

1173

1174 auto *PtrTy = Builder.getPtrTy();

1178 auto *ConstVF = ConstantInt::get(I64Ty, VF);

1179

1180

1181 BasicBlock *Preheader = CurLoop->getLoopPreheader();

1184

1185

1186

1188 nullptr, "scalar_preheader");

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208

1209

1221

1222

1223 auto OuterLoop = LI->AllocateLoop();

1224 auto InnerLoop = LI->AllocateLoop();

1225

1226 if (auto ParentLoop = CurLoop->getParentLoop()) {

1227 ParentLoop->addBasicBlockToLoop(BB0, *LI);

1228 ParentLoop->addChildLoop(OuterLoop);

1229 ParentLoop->addBasicBlockToLoop(BB3, *LI);

1230 } else {

1231 LI->addTopLevelLoop(OuterLoop);

1232 }

1233

1234

1235 OuterLoop->addChildLoop(InnerLoop);

1236

1237

1238 OuterLoop->addBasicBlockToLoop(BB1, *LI);

1239 OuterLoop->addBasicBlockToLoop(BB5, *LI);

1240 InnerLoop->addBasicBlockToLoop(BB2, *LI);

1241 InnerLoop->addBasicBlockToLoop(BB4, *LI);

1242

1243

1247

1248

1249

1250

1252 Value *ISearchStart =

1253 Builder.CreatePtrToInt(SearchStart, I64Ty, "search_start_int");

1254 Value *ISearchEnd =

1255 Builder.CreatePtrToInt(SearchEnd, I64Ty, "search_end_int");

1256 Value *INeedleStart =

1257 Builder.CreatePtrToInt(NeedleStart, I64Ty, "needle_start_int");

1258 Value *INeedleEnd =

1259 Builder.CreatePtrToInt(NeedleEnd, I64Ty, "needle_end_int");

1261 Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},

1262 {ConstantInt::get(I64Ty, 0), ConstVF});

1263

1264 const uint64_t MinPageSize = TTI->getMinPageSize().value();

1266 Value *SearchStartPage =

1267 Builder.CreateLShr(ISearchStart, AddrShiftAmt, "search_start_page");

1268 Value *SearchEndPage =

1269 Builder.CreateLShr(ISearchEnd, AddrShiftAmt, "search_end_page");

1270 Value *NeedleStartPage =

1271 Builder.CreateLShr(INeedleStart, AddrShiftAmt, "needle_start_page");

1272 Value *NeedleEndPage =

1273 Builder.CreateLShr(INeedleEnd, AddrShiftAmt, "needle_end_page");

1274 Value *SearchPageCmp =

1275 Builder.CreateICmpNE(SearchStartPage, SearchEndPage, "search_page_cmp");

1276 Value *NeedlePageCmp =

1277 Builder.CreateICmpNE(NeedleStartPage, NeedleEndPage, "needle_page_cmp");

1278

1279 Value *CombinedPageCmp =

1280 Builder.CreateOr(SearchPageCmp, NeedlePageCmp, "combined_page_cmp");

1282 CombinedPageBr->setMetadata(LLVMContext::MD_prof,

1286

1287

1291 Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},

1292 {Builder.CreatePtrToInt(Search, I64Ty), ISearchEnd}, nullptr,

1293 "search_pred");

1294 PredSearch = Builder.CreateAnd(PredVF, PredSearch, "search_masked");

1296 CharVTy, Search, Align(1), PredSearch, Passthru, "search_load_vec");

1299

1300

1303

1304

1306 Intrinsic::get_active_lane_mask, {PredVTy, I64Ty},

1307 {Builder.CreatePtrToInt(Needle, I64Ty), INeedleEnd}, nullptr,

1308 "needle_pred");

1309 PredNeedle = Builder.CreateAnd(PredVF, PredNeedle, "needle_masked");

1311 CharVTy, Needle, Align(1), PredNeedle, Passthru, "needle_load_vec");

1312

1313

1314 Value *Needle0 =

1317 Needle0, "needle0");

1318 LoadNeedle = Builder.CreateSelect(PredNeedle, LoadNeedle, Needle0Splat,

1319 "needle_splat");

1322

1323

1325 Intrinsic::experimental_vector_match, {CharVTy, LoadNeedle->getType()},

1326 {LoadSearch, LoadNeedle, PredSearch}, nullptr, "match_pred");

1331

1332

1334 PHINode *MatchLCSSA = Builder.CreatePHI(PtrTy, 1, "match_start");

1335 PHINode *MatchPredLCSSA =

1338 Intrinsic::experimental_cttz_elts, {I64Ty, MatchPred->getType()},

1339 {MatchPredLCSSA, Builder.getInt1(true)}, nullptr,

1340 "match_idx");

1341 Value *MatchVal =

1342 Builder.CreateGEP(CharTy, MatchLCSSA, MatchCnt, "match_res");

1345

1346

1348 Value *NextNeedle =

1349 Builder.CreateGEP(CharTy, Needle, ConstVF, "needle_next_vec");

1353

1354

1356 Value *NextSearch =

1357 Builder.CreateGEP(CharTy, Search, ConstVF, "search_next_vec");

1359 ExitFail);

1362

1363

1368

1370 MatchPredLCSSA->addIncoming(MatchPred, BB2);

1371

1372

1373

1375 if (ExitSucc != ExitFail)

1377

1379 OuterLoop->verifyLoop();

1380 InnerLoop->verifyLoop();

1381 if (!OuterLoop->isRecursivelyLCSSAForm(*DT, *LI))

1383 }

1384

1385 return MatchVal;

1386}

1387

1388void LoopIdiomVectorize::transformFindFirstByte(

1391 Value *NeedleStart, Value *NeedleEnd) {

1392

1393 BasicBlock *Preheader = CurLoop->getLoopPreheader();

1396 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

1398

1399 expandFindFirstByte(Builder, DTU, VF, CharTy, IndPhi, ExitSucc, ExitFail,

1400 SearchStart, SearchEnd, NeedleStart, NeedleEnd);

1401

1403 "Expected preheader to terminate with an unconditional branch.");

1404

1405 if (VerifyLoops && CurLoop->getParentLoop()) {

1406 CurLoop->getParentLoop()->verifyLoop();

1407 if (!CurLoop->getParentLoop()->isRecursivelyLCSSAForm(*DT, *LI))

1409 }

1410}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

#define clEnumValN(ENUMVAL, FLAGNAME, DESC)

static cl::opt< bool > VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false), cl::desc("Verify loops generated Loop Idiom Vectorize Pass."))

static cl::opt< bool > DisableAll("disable-loop-idiom-vectorize-all", cl::Hidden, cl::init(false), cl::desc("Disable Loop Idiom Vectorize Pass."))

static void fixSuccessorPhis(Loop *L, Value *ScalarRes, Value *VectorRes, BasicBlock *SuccBB, BasicBlock *IncBB)

Definition LoopIdiomVectorize.cpp:245

static cl::opt< LoopIdiomVectorizeStyle > LITVecStyle("loop-idiom-vectorize-style", cl::Hidden, cl::desc("The vectorization style for loop idiom transform."), cl::values(clEnumValN(LoopIdiomVectorizeStyle::Masked, "masked", "Use masked vector intrinsics"), clEnumValN(LoopIdiomVectorizeStyle::Predicated, "predicated", "Use VP intrinsics")), cl::init(LoopIdiomVectorizeStyle::Masked))

static cl::opt< bool > DisableFindFirstByte("disable-loop-idiom-vectorize-find-first-byte", cl::Hidden, cl::init(false), cl::desc("Do not convert find-first-byte loop(s)."))

static cl::opt< unsigned > ByteCmpVF("loop-idiom-vectorize-bytecmp-vf", cl::Hidden, cl::desc("The vectorization factor for byte-compare patterns."), cl::init(16))

static cl::opt< bool > DisableByteCmp("disable-loop-idiom-vectorize-bytecmp", cl::Hidden, cl::init(false), cl::desc("Proceed with Loop Idiom Vectorize Pass, but do " "not convert byte-compare loop(s)."))

static bool isSimple(Instruction *I)

static cl::opt< unsigned > MinPageSize("min-page-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target's minimum page size."))

This pass exposes codegen information to IR-level passes.

LLVM Basic Block Representation.

iterator_range< const_phi_iterator > phis() const

Returns a range that iterates over the phis in the basic block.

const Function * getParent() const

Return the enclosing method, or null if none.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

const Instruction & front() const

LLVM_ABI LLVMContext & getContext() const

Get the context in which this basic block lives.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Conditional or Unconditional Branch instruction.

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

bool isUnconditional() const

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

A parsed version of the target data layout string in and methods for querying it.

static constexpr UpdateKind Delete

static constexpr UpdateKind Insert

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

static constexpr ElementCount getScalable(ScalarTy MinVal)

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

void applyUpdates(ArrayRef< UpdateT > Updates)

Submit updates to all available trees.

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

LLVM_ABI bool isInBounds() const

Determine whether the GEP has the inbounds flag.

Value * getPointerOperand()

Type * getResultElementType() const

unsigned getNumIndices() const

ConstantInt * getInt1(bool V)

Get a constant value representing either true or false.

Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")

CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")

Create a call to the vector.extract intrinsic.

IntegerType * getInt1Ty()

Fetch the type representing a single bit.

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

ConstantInt * getTrue()

Get the constant value for i1 true.

LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")

Create a call to Masked Load intrinsic.

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

Value * CreateVScale(Type *Ty, const Twine &Name="")

Create a call to llvm.vscale.().

void SetCurrentDebugLocation(DebugLoc L)

Set location information used by debugging information.

IntegerType * getInt64Ty()

Fetch the type representing a 64-bit integer.

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

LLVM_ABI CallInst * CreateOrReduce(Value *Src)

Create a vector int OR reduction intrinsic of the source vector.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")

InstTy * Insert(InstTy *I, const Twine &Name="") const

Insert and return the specified instruction.

Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")

Create a call to llvm.experimental_cttz_elts.

Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)

Create a conditional 'br Cond, TrueDest, FalseDest' instruction.

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

PointerType * getPtrTy(unsigned AddrSpace=0)

Fetch the type representing a pointer.

BranchInst * CreateBr(BasicBlock *Dest)

Create an unconditional 'br label X' instruction.

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)

IntegerType * getInt8Ty()

Fetch the type representing an 8-bit integer.

Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)

Update the specified successor to point at the provided block.

This is an important class for using LLVM in a threaded context.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

An instruction for reading from memory.

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

Definition LoopIdiomVectorize.cpp:186

Represents a single loop in the control flow graph.

LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)

Return metadata containing two branch weights.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

unsigned getNumIncomingValues() const

Return the number of incoming edges.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Class to represent scalable SIMD vectors.

static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getIntegerBitWidth() const

static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)

static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

bool hasOneUse() const

Return true if there is exactly one use of this value.

iterator_range< user_iterator > users()

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

Base class of all SIMD vector types.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

const ParentTy * getParent() const

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

constexpr char Attrs[]

Key for Kernel::Metadata::mAttrs.

br_match m_UnconditionalBr(BasicBlock *&Succ)

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

auto m_GEP(const OperandTypes &...Ops)

Matches GetElementPtrInst.

SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

class_match< BasicBlock > m_BasicBlock()

Match an arbitrary basic block value and ignore it.

ValuesClass values(OptsTy... Options)

Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

unsigned Log2_64(uint64_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

AnalysisManager< Loop, LoopStandardAnalysisResults & > LoopAnalysisManager

The loop analysis manager.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

DWARFExpression::Operation Op

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...

TargetTransformInfo & TTI