LLVM: lib/Transforms/Scalar/LoopIdiomRecognize.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

87#include

88#include

89#include

90#include

91#include

92

93using namespace llvm;

94

95#define DEBUG_TYPE "loop-idiom"

96

97STATISTIC(NumMemSet, "Number of memset's formed from loop stores");

98STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");

99STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");

101 NumShiftUntilBitTest,

102 "Number of uncountable loops recognized as 'shift until bitttest' idiom");

104 "Number of uncountable loops recognized as 'shift until zero' idiom");

105

109 cl::desc("Options to disable Loop Idiom Recognize Pass."),

112

116 cl::desc("Proceed with loop idiom recognize pass, but do "

117 "not convert loop(s) to memset."),

120

124 cl::desc("Proceed with loop idiom recognize pass, but do "

125 "not convert loop(s) to memcpy."),

128

130 "use-lir-code-size-heurs",

131 cl::desc("Use loop idiom recognition code size heuristics when compiling "

132 "with -Os/-Oz"),

134

135namespace {

136

137class LoopIdiomRecognize {

138 Loop *CurLoop = nullptr;

147 bool ApplyCodeSizeHeuristics;

148 std::unique_ptr MSSAU;

149

150public:

157 : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {

158 if (MSSA)

159 MSSAU = std::make_unique(MSSA);

160 }

161

162 bool runOnLoop(Loop *L);

163

164private:

167

168 StoreListMap StoreRefsForMemset;

169 StoreListMap StoreRefsForMemsetPattern;

170 StoreList StoreRefsForMemcpy;

171 bool HasMemset;

172 bool HasMemsetPattern;

173 bool HasMemcpy;

174

175

176 enum LegalStoreKind {

178 Memset,

179 MemsetPattern,

180 Memcpy,

181 UnorderedAtomicMemcpy,

182 DontUse

183

184 };

185

186

187

188

189 bool runOnCountableLoop();

190 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,

192

194 LegalStoreKind isLegalStore(StoreInst *SI);

195 enum class ForMemset { No, Yes };

197 ForMemset For);

198

199 template

200 bool processLoopMemIntrinsic(

202 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),

203 const SCEV *BECount);

204 bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);

205 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);

206

207 bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,

212 bool IsNegStride, bool IsLoopMemset = false);

213 bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);

214 bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,

220 const SCEV *BECount);

221 bool avoidLIRForMultiBlockLoop(bool IsMemset = false,

222 bool IsLoopMemset = false);

223

224

225

226

227

228 bool runOnNoncountableLoop();

229

230 bool recognizePopcount();

234 bool ZeroCheck, size_t CanonicalSize);

238 bool recognizeAndInsertFFS();

239 bool recognizeShiftUntilLessThan();

243 const DebugLoc &DL, bool ZeroCheck,

244 bool IsCntPhiUsedOutsideLoop,

245 bool InsertSub = false);

246

247 bool recognizeShiftUntilBitTest();

248 bool recognizeShiftUntilZero();

249

250

251};

252}

253

259

260 const auto *DL = &L.getHeader()->getDataLayout();

261

262

263

264

266

267 LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI,

269 if (!LIR.runOnLoop(&L))

271

275 return PA;

276}

277

280 I->eraseFromParent();

281}

282

283

284

285

286

287

288

289bool LoopIdiomRecognize::runOnLoop(Loop *L) {

290 CurLoop = L;

291

292

293 if (L->getLoopPreheader())

294 return false;

295

296

297 StringRef Name = L->getHeader()->getParent()->getName();

298 if (Name == "memset" || Name == "memcpy")

299 return false;

300

301

302 ApplyCodeSizeHeuristics =

304

305 HasMemset = TLI->has(LibFunc_memset);

306 HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);

307 HasMemcpy = TLI->has(LibFunc_memcpy);

308

309 if (HasMemset || HasMemsetPattern || HasMemcpy)

311 return runOnCountableLoop();

312

313 return runOnNoncountableLoop();

314}

315

316bool LoopIdiomRecognize::runOnCountableLoop() {

318 assert(!isa(BECount) &&

319 "runOnCountableLoop() called on a loop without a predictable"

320 "backedge-taken count");

321

322

323

324 if (const SCEVConstant *BECst = dyn_cast(BECount))

325 if (BECst->getAPInt() == 0)

326 return false;

327

330

334 << "\n");

335

336

337

341 return false;

342

343 bool MadeChange = false;

344

345

346 for (auto *BB : CurLoop->getBlocks()) {

347

349 continue;

350

351 MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);

352 }

353 return MadeChange;

354}

355

358 return ConstStride->getAPInt();

359}

360

361

362

363

364

365

366

368

369

370

371

372

373

374 Constant *C = dyn_cast(V);

375 if (C || isa(C))

376 return nullptr;

377

378

381 return nullptr;

382

383

384 if (DL->isBigEndian())

385 return nullptr;

386

387

389

390

391

392 if (Size > 16)

393 return nullptr;

394

395

396 if (Size == 16)

397 return C;

398

399

400 unsigned ArraySize = 16 / Size;

403}

404

405LoopIdiomRecognize::LegalStoreKind

406LoopIdiomRecognize::isLegalStore(StoreInst *SI) {

407

408 if (SI->isVolatile())

409 return LegalStoreKind::None;

410

411 if (SI->isUnordered())

412 return LegalStoreKind::None;

413

414

415 if (SI->getMetadata(LLVMContext::MD_nontemporal))

416 return LegalStoreKind::None;

417

418 Value *StoredVal = SI->getValueOperand();

419 Value *StorePtr = SI->getPointerOperand();

420

421

422

424 return LegalStoreKind::None;

425

426

427

428

429 TypeSize SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());

432 return LegalStoreKind::None;

433

434

435

436

438 dyn_cast(SE->getSCEV(StorePtr));

439 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())

440 return LegalStoreKind::None;

441

442

443 if (!isa(StoreEv->getOperand(1)))

444 return LegalStoreKind::None;

445

446

447

448

449

450

451

453

454

455 bool UnorderedAtomic = SI->isUnordered() && SI->isSimple();

456

457

458

459 if (!UnorderedAtomic && HasMemset && SplatValue && DisableLIRP::Memset &&

460

461

463

464 return LegalStoreKind::Memset;

465 }

467

470

471 return LegalStoreKind::MemsetPattern;

472 }

473

474

476

477

479 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());

480 if (StoreSize != Stride && StoreSize != -Stride)

481 return LegalStoreKind::None;

482

483

484 LoadInst *LI = dyn_cast(SI->getValueOperand());

485

486

488 return LegalStoreKind::None;

489

491 return LegalStoreKind::None;

492

493

494

495

498 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())

499 return LegalStoreKind::None;

500

501

503 return LegalStoreKind::None;

504

505

506 UnorderedAtomic = UnorderedAtomic || LI->isAtomic();

507 return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy

508 : LegalStoreKind::Memcpy;

509 }

510

511 return LegalStoreKind::None;

512}

513

514void LoopIdiomRecognize::collectStores(BasicBlock *BB) {

515 StoreRefsForMemset.clear();

516 StoreRefsForMemsetPattern.clear();

517 StoreRefsForMemcpy.clear();

520 if (!SI)

521 continue;

522

523

524 switch (isLegalStore(SI)) {

525 case LegalStoreKind::None:

526

527 break;

528 case LegalStoreKind::Memset: {

529

531 StoreRefsForMemset[Ptr].push_back(SI);

532 } break;

533 case LegalStoreKind::MemsetPattern: {

534

536 StoreRefsForMemsetPattern[Ptr].push_back(SI);

537 } break;

538 case LegalStoreKind::Memcpy:

539 case LegalStoreKind::UnorderedAtomicMemcpy:

540 StoreRefsForMemcpy.push_back(SI);

541 break;

542 default:

543 assert(false && "unhandled return value");

544 break;

545 }

546 }

547}

548

549

550

551

552bool LoopIdiomRecognize::runOnLoopBlock(

555

556

557

558 for (BasicBlock *ExitBlock : ExitBlocks)

559 if (!DT->dominates(BB, ExitBlock))

560 return false;

561

562 bool MadeChange = false;

563

564 collectStores(BB);

565

566

567

568

569 for (auto &SL : StoreRefsForMemset)

570 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::Yes);

571

572 for (auto &SL : StoreRefsForMemsetPattern)

573 MadeChange |= processLoopStores(SL.second, BECount, ForMemset::No);

574

575

576 for (auto &SI : StoreRefsForMemcpy)

577 MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);

578

579 MadeChange |= processLoopMemIntrinsic(

580 BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);

581 MadeChange |= processLoopMemIntrinsic(

582 BB, &LoopIdiomRecognize::processLoopMemSet, BECount);

583

584 return MadeChange;

585}

586

587

589 const SCEV *BECount, ForMemset For) {

590

593

594

595

597 for (unsigned i = 0, e = SL.size(); i < e; ++i) {

598 assert(SL[i]->isSimple() && "Expected only non-volatile stores.");

599

600 Value *FirstStoredVal = SL[i]->getValueOperand();

601 Value *FirstStorePtr = SL[i]->getPointerOperand();

603 cast(SE->getSCEV(FirstStorePtr));

605 unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType());

606

607

608 if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) {

610 continue;

611 }

612

613 Value *FirstSplatValue = nullptr;

614 Constant *FirstPatternValue = nullptr;

615

616 if (For == ForMemset::Yes)

618 else

620

621 assert((FirstSplatValue || FirstPatternValue) &&

622 "Expected either splat value or pattern value.");

623

624 IndexQueue.clear();

625

626

627

628

629 unsigned j = 0;

630 for (j = i + 1; j < e; ++j)

632 for (j = i; j > 0; --j)

634

635 for (auto &k : IndexQueue) {

636 assert(SL[k]->isSimple() && "Expected only non-volatile stores.");

637 Value *SecondStorePtr = SL[k]->getPointerOperand();

639 cast(SE->getSCEV(SecondStorePtr));

641

642 if (FirstStride != SecondStride)

643 continue;

644

645 Value *SecondStoredVal = SL[k]->getValueOperand();

646 Value *SecondSplatValue = nullptr;

647 Constant *SecondPatternValue = nullptr;

648

649 if (For == ForMemset::Yes)

651 else

653

654 assert((SecondSplatValue || SecondPatternValue) &&

655 "Expected either splat value or pattern value.");

656

658 if (For == ForMemset::Yes) {

659 if (isa(FirstSplatValue))

660 FirstSplatValue = SecondSplatValue;

661 if (FirstSplatValue != SecondSplatValue)

662 continue;

663 } else {

664 if (isa(FirstPatternValue))

665 FirstPatternValue = SecondPatternValue;

666 if (FirstPatternValue != SecondPatternValue)

667 continue;

668 }

671 ConsecutiveChain[SL[i]] = SL[k];

672 break;

673 }

674 }

675 }

676

677

678

680 bool Changed = false;

681

682

685 continue;

686

687

688

691 unsigned StoreSize = 0;

692

693

694 while (Tails.count(I) || Heads.count(I)) {

695 if (TransformedStores.count(I))

696 break;

697 AdjacentStores.insert(I);

698

699 StoreSize += DL->getTypeStoreSize(I->getValueOperand()->getType());

700

701 I = ConsecutiveChain[I];

702 }

703

708

709

710

711 if (StoreSize != Stride && StoreSize != -Stride)

712 continue;

713

714 bool IsNegStride = StoreSize == -Stride;

715

716 Type *IntIdxTy = DL->getIndexType(StorePtr->getType());

717 const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);

718 if (processLoopStridedStore(StorePtr, StoreSizeSCEV,

720 HeadStore, AdjacentStores, StoreEv, BECount,

721 IsNegStride)) {

722 TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());

723 Changed = true;

724 }

725 }

726

727 return Changed;

728}

729

730

731

732template

733bool LoopIdiomRecognize::processLoopMemIntrinsic(

735 bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),

736 const SCEV *BECount) {

737 bool MadeChange = false;

740

741 if (MemInst *MI = dyn_cast(Inst)) {

743 if (!(this->*Processor)(MI, BECount))

744 continue;

745 MadeChange = true;

746

747

748

749 if (!InstPtr)

751 }

752 }

753 return MadeChange;

754}

755

756

757bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,

758 const SCEV *BECount) {

759

761 return false;

762

763

765 return false;

766

769 if (!Dest || !Source)

770 return false;

771

772

773

774

776 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())

777 return false;

779 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())

780 return false;

781

782

783 uint64_t SizeInBytes = cast(MCI->getLength())->getZExtValue();

784 if ((SizeInBytes >> 32) != 0)

785 return false;

786

787

788

790 dyn_cast(StoreEv->getOperand(1));

792 dyn_cast(LoadEv->getOperand(1));

793 if (!ConstStoreStride || !ConstLoadStride)

794 return false;

795

796 APInt StoreStrideValue = ConstStoreStride->getAPInt();

797 APInt LoadStrideValue = ConstLoadStride->getAPInt();

798

800 return false;

801

802 if (SizeInBytes != StoreStrideValue && SizeInBytes != -StoreStrideValue) {

803 ORE.emit([&]() {

805 << ore::NV("Inst", "memcpy") << " in "

807 << " function will not be hoisted: "

808 << ore::NV("Reason", "memcpy size is not equal to stride");

809 });

810 return false;

811 }

812

813 int64_t StoreStrideInt = StoreStrideValue.getSExtValue();

814 int64_t LoadStrideInt = LoadStrideValue.getSExtValue();

815

816 if (StoreStrideInt != LoadStrideInt)

817 return false;

818

819 return processLoopStoreOfLoopLoad(

822 BECount);

823}

824

825

826bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,

827 const SCEV *BECount) {

828

830 return false;

831

832

834 return false;

835

837

838

839

840

842 if (!Ev || Ev->getLoop() != CurLoop)

843 return false;

845 LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");

846 return false;

847 }

848

851 if (!PointerStrideSCEV || !MemsetSizeSCEV)

852 return false;

853

854 bool IsNegStride = false;

855 const bool IsConstantSize = isa(MSI->getLength());

856

857 if (IsConstantSize) {

858

859

860

862 uint64_t SizeInBytes = cast(MSI->getLength())->getZExtValue();

864 if (!ConstStride)

865 return false;

866

868 if (SizeInBytes != Stride && SizeInBytes != -Stride)

869 return false;

870

871 IsNegStride = SizeInBytes == -Stride;

872 } else {

873

874

875

876

877

878 LLVM_DEBUG(dbgs() << " memset size is non-constant\n");

879 if (Pointer->getType()->getPointerAddressSpace() != 0) {

880 LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "

881 << "abort\n");

882 return false;

883 }

885 LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "

886 << "abort\n");

887 return false;

888 }

889

890

892 const SCEV *PositiveStrideSCEV =

894 : PointerStrideSCEV;

895 LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"

896 << " PositiveStrideSCEV: " << *PositiveStrideSCEV

897 << "\n");

898

899 if (PositiveStrideSCEV != MemsetSizeSCEV) {

900

901

902 const SCEV *FoldedPositiveStride =

904 const SCEV *FoldedMemsetSize =

906

907 LLVM_DEBUG(dbgs() << " Try to fold SCEV based on loop guard\n"

908 << " FoldedMemsetSize: " << *FoldedMemsetSize << "\n"

909 << " FoldedPositiveStride: " << *FoldedPositiveStride

910 << "\n");

911

912 if (FoldedPositiveStride != FoldedMemsetSize) {

914 return false;

915 }

916 }

917 }

918

919

920

922 if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))

923 return false;

924

927 return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),

928 MSI->getDestAlign(), SplatValue, MSI, MSIs, Ev,

929 BECount, IsNegStride, true);

930}

931

932

933

934

935static bool

937 const SCEV *BECount, const SCEV *StoreSizeSCEV,

940

941

942

944

945

946

947 const SCEVConstant *BECst = dyn_cast(BECount);

948 const SCEVConstant *ConstSize = dyn_cast(StoreSizeSCEV);

949 if (BECst && ConstSize) {

952

953 if (BEInt && SizeInt)

955 }

956

957

958

959

960

962

965 if (!IgnoredInsts.contains(&I) &&

967 return true;

968 return false;

969}

970

971

972

973

975 Type *IntPtr, const SCEV *StoreSizeSCEV,

978 if (!StoreSizeSCEV->isOne()) {

979

983 }

984

986}

987

988

989

990

991

993 const SCEV *StoreSizeSCEV, Loop *CurLoop,

995 const SCEV *TripCountSCEV =

1000}

1001

1002

1003

1004bool LoopIdiomRecognize::processLoopStridedStore(

1008 const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {

1011 Constant *PatternValue = nullptr;

1012

1013 if (!SplatValue)

1015

1016 assert((SplatValue || PatternValue) &&

1017 "Expected either splat value or pattern value.");

1018

1019

1020

1021

1027

1028 Type *DestInt8PtrTy = Builder.getPtrTy(DestAS);

1029 Type *IntIdxTy = DL->getIndexType(DestPtr->getType());

1030

1031 bool Changed = false;

1033

1034 if (IsNegStride)

1036

1037

1038

1039 if (!Expander.isSafeToExpand(Start))

1040 return Changed;

1041

1042

1043

1044

1045

1046

1048 Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());

1049

1050

1051

1052

1053

1054

1055

1056

1057 Changed = true;

1058

1060 StoreSizeSCEV, *AA, Stores))

1061 return Changed;

1062

1063 if (avoidLIRForMultiBlockLoop(true, IsLoopMemset))

1064 return Changed;

1065

1066

1067

1068 const SCEV *NumBytesS =

1069 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);

1070

1071

1072

1073 if (!Expander.isSafeToExpand(NumBytesS))

1074 return Changed;

1075

1076 Value *NumBytes =

1077 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());

1078

1079 if (!SplatValue && isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))

1080 return Changed;

1081

1084 AATags = AATags.merge(Store->getAAMetadata());

1085 if (auto CI = dyn_cast(NumBytes))

1086 AATags = AATags.extendTo(CI->getZExtValue());

1087 else

1088 AATags = AATags.extendTo(-1);

1089

1091 if (SplatValue) {

1092 NewCall = Builder.CreateMemSet(

1093 BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),

1094 false, AATags.TBAA, AATags.Scope, AATags.NoAlias);

1095 } else {

1097

1098 Type *Int8PtrTy = DestInt8PtrTy;

1099

1100 StringRef FuncName = "memset_pattern16";

1102 Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);

1104

1105

1106

1109 PatternValue, ".memset_pattern");

1112 Value *PatternPtr = GV;

1113 NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});

1114

1115

1116 if (AATags.TBAA)

1117 NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);

1118

1119 if (AATags.Scope)

1120 NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope);

1121

1124 }

1125

1127

1128 if (MSSAU) {

1129 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(

1131 MSSAU->insertDef(cast(NewMemAcc), true);

1132 }

1133

1134 LLVM_DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n"

1135 << " from store to: " << *Ev << " at: " << *TheStore

1136 << "\n");

1137

1138 ORE.emit([&]() {

1141 R << "Transformed loop-strided store in "

1143 << " function into a call to "

1145 << "() intrinsic";

1146 if (!Stores.empty())

1148 for (auto *I : Stores) {

1149 R << ore::NV("FromBlock", I->getParent()->getName())

1151 }

1152 return R;

1153 });

1154

1155

1156

1157 for (auto *I : Stores) {

1158 if (MSSAU)

1159 MSSAU->removeMemoryAccess(I, true);

1161 }

1163 MSSAU->getMemorySSA()->verifyMemorySSA();

1164 ++NumMemSet;

1165 ExpCleaner.markResultUsed();

1166 return true;

1167}

1168

1169

1170

1171

1172bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,

1173 const SCEV *BECount) {

1174 assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores.");

1175

1176 Value *StorePtr = SI->getPointerOperand();

1178 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());

1179

1180

1181 LoadInst *LI = cast(SI->getValueOperand());

1182 assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads.");

1183

1184

1185

1186

1189

1191 return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,

1192 SI->getAlign(), LI->getAlign(), SI, LI,

1193 StoreEv, LoadEv, BECount);

1194}

1195

1196namespace {

1197class MemmoveVerifier {

1198public:

1199 explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,

1202 LoadBasePtr.stripPointerCasts(), LoadOff, DL)),

1204 StoreBasePtr.stripPointerCasts(), StoreOff, DL)),

1205 IsSameObject(BP1 == BP2) {}

1206

1207 bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,

1209 bool IsMemCpy) const {

1210 if (IsMemCpy) {

1211

1212

1213 if ((!IsNegStride && LoadOff <= StoreOff) ||

1214 (IsNegStride && LoadOff >= StoreOff))

1215 return false;

1216 } else {

1217

1218

1219 int64_t LoadSize =

1220 DL.getTypeSizeInBits(TheLoad.getType()).getFixedValue() / 8;

1221 if (BP1 != BP2 || LoadSize != int64_t(StoreSize))

1222 return false;

1223 if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||

1224 (IsNegStride && LoadOff + LoadSize > StoreOff))

1225 return false;

1226 }

1227 return true;

1228 }

1229

1230private:

1232 int64_t LoadOff = 0;

1233 int64_t StoreOff = 0;

1234 const Value *BP1;

1235 const Value *BP2;

1236

1237public:

1238 const bool IsSameObject;

1239};

1240}

1241

1242bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(

1243 Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,

1247

1248

1249

1250

1251 if (isa(TheStore))

1252 return false;

1253

1254

1255

1256

1260

1262

1263 bool Changed = false;

1266 Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));

1267

1269 const SCEVConstant *ConstStoreSize = dyn_cast(StoreSizeSCEV);

1270

1271

1272 assert(ConstStoreSize && "store size is expected to be a constant");

1273

1275 bool IsNegStride = StoreSize == -Stride;

1276

1277

1278 if (IsNegStride)

1279 StrStart =

1281

1282

1283

1284

1285

1286

1287

1288 Value *StoreBasePtr = Expander.expandCodeFor(

1289 StrStart, Builder.getPtrTy(StrAS), Preheader->getTerminator());

1290

1291

1292

1293

1294

1295

1296

1297

1298 Changed = true;

1299

1301 IgnoredInsts.insert(TheStore);

1302

1303 bool IsMemCpy = isa(TheStore);

1304 const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";

1305

1306 bool LoopAccessStore =

1308 StoreSizeSCEV, *AA, IgnoredInsts);

1309 if (LoopAccessStore) {

1310

1311

1312

1314 return Changed;

1315 IgnoredInsts.insert(TheLoad);

1317 BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {

1318 ORE.emit([&]() {

1320 TheStore)

1321 << ore::NV("Inst", InstRemark) << " in "

1323 << " function will not be hoisted: "

1324 << ore::NV("Reason", "The loop may access store location");

1325 });

1326 return Changed;

1327 }

1328 IgnoredInsts.erase(TheLoad);

1329 }

1330

1333

1334

1335 if (IsNegStride)

1336 LdStart =

1338

1339

1340

1341 Value *LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getPtrTy(LdAS),

1343

1344

1345

1346 MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);

1347 if (IsMemCpy && Verifier.IsSameObject)

1348 IgnoredInsts.erase(TheStore);

1350 StoreSizeSCEV, *AA, IgnoredInsts)) {

1351 ORE.emit([&]() {

1353 << ore::NV("Inst", InstRemark) << " in "

1355 << " function will not be hoisted: "

1356 << ore::NV("Reason", "The loop may access load location");

1357 });

1358 return Changed;

1359 }

1360

1361 bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;

1362 if (UseMemMove)

1363 if (Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,

1364 IsMemCpy))

1365 return Changed;

1366

1367 if (avoidLIRForMultiBlockLoop())

1368 return Changed;

1369

1370

1371

1372 const SCEV *NumBytesS =

1373 getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);

1374

1375 Value *NumBytes =

1376 Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());

1377

1380 AATags = AATags.merge(StoreAATags);

1381 if (auto CI = dyn_cast(NumBytes))

1382 AATags = AATags.extendTo(CI->getZExtValue());

1383 else

1384 AATags = AATags.extendTo(-1);

1385

1386 CallInst *NewCall = nullptr;

1387

1388

1389

1391 if (UseMemMove)

1392 NewCall = Builder.CreateMemMove(

1393 StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign, NumBytes,

1394 false, AATags.TBAA, AATags.Scope, AATags.NoAlias);

1395 else

1396 NewCall =

1397 Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, LoadAlign,

1398 NumBytes, false, AATags.TBAA,

1400 } else {

1401

1402 if (UseMemMove)

1403 return Changed;

1404

1405

1406 assert((StoreAlign && LoadAlign) &&

1407 "Expect unordered load/store to have align.");

1408 if (*StoreAlign < StoreSize || *LoadAlign < StoreSize)

1409 return Changed;

1410

1411

1412

1413

1414

1416 return Changed;

1417

1418

1419

1420

1421 NewCall = Builder.CreateElementUnorderedAtomicMemCpy(

1422 StoreBasePtr, *StoreAlign, LoadBasePtr, *LoadAlign, NumBytes, StoreSize,

1424 }

1426

1427 if (MSSAU) {

1428 MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB(

1430 MSSAU->insertDef(cast(NewMemAcc), true);

1431 }

1432

1433 LLVM_DEBUG(dbgs() << " Formed new call: " << *NewCall << "\n"

1434 << " from load ptr=" << *LoadEv << " at: " << *TheLoad

1435 << "\n"

1436 << " from store ptr=" << *StoreEv << " at: " << *TheStore

1437 << "\n");

1438

1439 ORE.emit([&]() {

1442 << "Formed a call to "

1444 << "() intrinsic from " << ore::NV("Inst", InstRemark)

1445 << " instruction in " << ore::NV("Function", TheStore->getFunction())

1446 << " function"

1450 });

1451

1452

1453

1454 if (MSSAU)

1455 MSSAU->removeMemoryAccess(TheStore, true);

1458 MSSAU->getMemorySSA()->verifyMemorySSA();

1459 if (UseMemMove)

1460 ++NumMemMove;

1461 else

1462 ++NumMemCpy;

1463 ExpCleaner.markResultUsed();

1464 return true;

1465}

1466

1467

1468

1469

1470bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,

1471 bool IsLoopMemset) {

1472 if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {

1473 if (CurLoop->isOutermost() && (!IsMemset || !IsLoopMemset)) {

1475 << " : LIR " << (IsMemset ? "Memset" : "Memcpy")

1476 << " avoided: multi-block top-level loop\n");

1477 return true;

1478 }

1479 }

1480

1481 return false;

1482}

1483

1484bool LoopIdiomRecognize::runOnNoncountableLoop() {

1487 << "] Noncountable Loop %"

1489

1490 return recognizePopcount() || recognizeAndInsertFFS() ||

1491 recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||

1492 recognizeShiftUntilLessThan();

1493}

1494

1495

1496

1497

1498

1499

1500

1502 bool JmpOnZero = false) {

1504 return nullptr;

1505

1508 return nullptr;

1509

1510 ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1));

1511 if (!CmpZero || !CmpZero->isZero())

1512 return nullptr;

1513

1516 if (JmpOnZero)

1518

1522 return Cond->getOperand(0);

1523

1524 return nullptr;

1525}

1526

1527

1528

1529

1530

1532 APInt &Threshold) {

1534 return nullptr;

1535

1538 return nullptr;

1539

1540 ConstantInt *CmpConst = dyn_cast(Cond->getOperand(1));

1541 if (!CmpConst)

1542 return nullptr;

1543

1546

1548 Threshold = CmpConst->getValue();

1549 return Cond->getOperand(0);

1550 }

1551

1552 return nullptr;

1553}

1554

1555

1556

1559 auto *PhiX = dyn_cast(VarX);

1560 if (PhiX && PhiX->getParent() == LoopEntry &&

1561 (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX))

1562 return PhiX;

1563 return nullptr;

1564}

1565

1566

1567

1568

1569

1570

1571

1572

1573

1574

1575

1576

1577

1578

1579

1580

1581

1582

1583

1584

1585

1586

1587

1588

1589

1590

1591

1592

1597 APInt &Threshold) {

1599

1600 DefX = nullptr;

1601 CntInst = nullptr;

1602 CntPhi = nullptr;

1604

1605

1607 dyn_cast(LoopEntry->getTerminator()), LoopEntry,

1608 Threshold))

1609 DefX = dyn_cast(T);

1610 else

1611 return false;

1612

1613

1614 if (!DefX || !isa(DefX))

1615 return false;

1616

1617 PHINode *VarPhi = cast(DefX);

1619 if (Idx == -1)

1620 return false;

1621

1624 return false;

1625

1626

1627 if (DefX->getOpcode() != Instruction::LShr)

1628 return false;

1629

1630 IntrinID = Intrinsic::ctlz;

1632 if (!Shft || !Shft->isOne())

1633 return false;

1634

1636

1637

1638

1639

1640

1641

1642

1643

1646 if (Inst.getOpcode() != Instruction::Add)

1647 continue;

1648

1651 continue;

1652

1654 if (!Phi)

1655 continue;

1656

1657 CntInst = &Inst;

1658 CntPhi = Phi;

1659 break;

1660 }

1661 if (!CntInst)

1662 return false;

1663

1664 return true;

1665}

1666

1667

1668

1669

1670

1671

1672

1673

1674

1675

1676

1677

1678

1679

1680

1681

1682

1683

1684

1685

1686

1687

1688

1689

1690

1694

1695

1698 Value *VarX1, *VarX0;

1699 PHINode *PhiX, *CountPhi;

1700

1701 DefX2 = CountInst = nullptr;

1702 VarX1 = VarX0 = nullptr;

1703 PhiX = CountPhi = nullptr;

1705

1706

1707 {

1709 dyn_cast(LoopEntry->getTerminator()), LoopEntry))

1710 DefX2 = dyn_cast(T);

1711 else

1712 return false;

1713 }

1714

1715

1716 {

1717 if (!DefX2 || DefX2->getOpcode() != Instruction::And)

1718 return false;

1719

1721

1722 if ((SubOneOp = dyn_cast(DefX2->getOperand(0))))

1724 else {

1726 SubOneOp = dyn_cast(DefX2->getOperand(1));

1727 }

1728 if (!SubOneOp || SubOneOp->getOperand(0) != VarX1)

1729 return false;

1730

1732 if (!Dec ||

1733 !((SubOneOp->getOpcode() == Instruction::Sub && Dec->isOne()) ||

1734 (SubOneOp->getOpcode() == Instruction::Add &&

1736 return false;

1737 }

1738 }

1739

1740

1742 if (!PhiX)

1743 return false;

1744

1745

1746 {

1747 CountInst = nullptr;

1750 if (Inst.getOpcode() != Instruction::Add)

1751 continue;

1752

1754 if (!Inc || !Inc->isOne())

1755 continue;

1756

1758 if (!Phi)

1759 continue;

1760

1761

1762 bool LiveOutLoop = false;

1764 if ((cast(U))->getParent() != LoopEntry) {

1765 LiveOutLoop = true;

1766 break;

1767 }

1768 }

1769

1770 if (LiveOutLoop) {

1771 CountInst = &Inst;

1772 CountPhi = Phi;

1773 break;

1774 }

1775 }

1776

1777 if (!CountInst)

1778 return false;

1779 }

1780

1781

1782

1783 {

1784 auto *PreCondBr = dyn_cast(PreCondBB->getTerminator());

1787 return false;

1788

1789 CntInst = CountInst;

1790 CntPhi = CountPhi;

1791 Var = T;

1792 }

1793

1794 return true;

1795}

1796

1797

1798

1799

1800

1801

1802

1803

1804

1805

1806

1807

1808

1809

1810

1811

1812

1813

1814

1815

1816

1817

1818

1819

1820

1821

1822

1823

1829 Value *VarX = nullptr;

1830

1831 DefX = nullptr;

1832 CntInst = nullptr;

1833 CntPhi = nullptr;

1835

1836

1838 dyn_cast(LoopEntry->getTerminator()), LoopEntry))

1839 DefX = dyn_cast(T);

1840 else

1841 return false;

1842

1843

1844 if (!DefX || !DefX->isShift())

1845 return false;

1846 IntrinID = DefX->getOpcode() == Instruction::Shl ? Intrinsic::cttz :

1847 Intrinsic::ctlz;

1849 if (!Shft || !Shft->isOne())

1850 return false;

1852

1853

1855 if (!PhiX)

1856 return false;

1857

1859

1860

1861

1863 return false;

1864

1865

1866

1867

1868

1869

1870

1871

1874 if (Inst.getOpcode() != Instruction::Add)

1875 continue;

1876

1879 continue;

1880

1882 if (!Phi)

1883 continue;

1884

1885 CntInst = &Inst;

1886 CntPhi = Phi;

1887 break;

1888 }

1889 if (!CntInst)

1890 return false;

1891

1892 return true;

1893}

1894

1895

1896

1897bool LoopIdiomRecognize::isProfitableToInsertFFS(Intrinsic::ID IntrinID,

1898 Value *InitX, bool ZeroCheck,

1899 size_t CanonicalSize) {

1902

1903

1906 std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());

1907

1912 return false;

1913

1914 return true;

1915}

1916

1917

1918

1919

1920bool LoopIdiomRecognize::insertFFSIfProfitable(Intrinsic::ID IntrinID,

1924 bool IsCntPhiUsedOutsideLoop = false;

1926 if (!CurLoop->contains(cast(U))) {

1927 IsCntPhiUsedOutsideLoop = true;

1928 break;

1929 }

1930 bool IsCntInstUsedOutsideLoop = false;

1931 for (User *U : CntInst->users())

1932 if (!CurLoop->contains(cast(U))) {

1933 IsCntInstUsedOutsideLoop = true;

1934 break;

1935 }

1936

1937

1938 if (IsCntInstUsedOutsideLoop && IsCntPhiUsedOutsideLoop)

1939 return false;

1940

1941

1942

1943

1944 bool ZeroCheck = false;

1945

1946

1948

1949

1950

1951

1952

1953 if (!IsCntPhiUsedOutsideLoop) {

1955 if (!PreCondBB)

1956 return false;

1957 auto *PreCondBI = dyn_cast(PreCondBB->getTerminator());

1958 if (!PreCondBI)

1959 return false;

1961 return false;

1962 ZeroCheck = true;

1963 }

1964

1965

1966

1967

1968

1969

1970

1971

1972 size_t IdiomCanonicalSize = 6;

1973 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))

1974 return false;

1975

1976 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,

1978 IsCntPhiUsedOutsideLoop);

1979 return true;

1980}

1981

1982

1983

1984

1985bool LoopIdiomRecognize::recognizeAndInsertFFS() {

1986

1988 return false;

1989

1993 PHINode *CntPhi = nullptr;

1995

1997 DefX))

1998 return false;

1999

2000 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);

2001}

2002

2003bool LoopIdiomRecognize::recognizeShiftUntilLessThan() {

2004

2006 return false;

2007

2011 PHINode *CntPhi = nullptr;

2013

2014 APInt LoopThreshold;

2016 CntPhi, DefX, LoopThreshold))

2017 return false;

2018

2019 if (LoopThreshold == 2) {

2020

2021 return insertFFSIfProfitable(IntrinID, InitX, DefX, CntPhi, CntInst);

2022 }

2023

2024

2025 if (LoopThreshold != 4)

2026 return false;

2027

2028

2030 if (!CurLoop->contains(cast(U)))

2031 return false;

2032

2033

2034

2037 if (!PreCondBB)

2038 return false;

2039 auto *PreCondBI = dyn_cast(PreCondBB->getTerminator());

2040 if (!PreCondBI)

2041 return false;

2042

2043 APInt PreLoopThreshold;

2045 PreLoopThreshold != 2)

2046 return false;

2047

2048 bool ZeroCheck = true;

2049

2050

2051

2052

2053

2054

2055

2056

2057 size_t IdiomCanonicalSize = 6;

2058 if (!isProfitableToInsertFFS(IntrinID, InitX, ZeroCheck, IdiomCanonicalSize))

2059 return false;

2060

2061

2062 transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,

2064 false,

2065 true);

2066 return true;

2067}

2068

2069

2070

2071

2072

2073bool LoopIdiomRecognize::recognizePopcount() {

2075 return false;

2076

2077

2078

2079

2080

2081

2082

2084 return false;

2085

2087 if (LoopBody->size() >= 20) {

2088

2089 return false;

2090 }

2091

2092

2095 return false;

2096 auto *EntryBI = dyn_cast(PH->getTerminator());

2097 if (!EntryBI || EntryBI->isConditional())

2098 return false;

2099

2100

2101

2103 if (!PreCondBB)

2104 return false;

2105 auto *PreCondBI = dyn_cast(PreCondBB->getTerminator());

2106 if (!PreCondBI || PreCondBI->isUnconditional())

2107 return false;

2108

2113 return false;

2114

2115 transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);

2116 return true;

2117}

2118

2121 Value *Ops[] = {Val};

2123

2126

2127 return CI;

2128}

2129

2135

2138

2139 return CI;

2140}

2141

2142

2143

2144

2145

2146

2147

2148

2149

2150

2151

2152

2153

2154

2155

2156

2157

2158

2159

2160

2161

2162

2163

2164

2165

2166

2167

2168

2169

2170

2171

2172

2173void LoopIdiomRecognize::transformLoopToCountable(

2176 bool ZeroCheck, bool IsCntPhiUsedOutsideLoop, bool InsertSub) {

2178

2179

2181 Builder.SetCurrentDebugLocation(DL);

2182

2183

2184

2185

2186

2187

2188

2189 Value *InitXNext;

2190 if (IsCntPhiUsedOutsideLoop) {

2191 if (DefX->getOpcode() == Instruction::AShr)

2192 InitXNext = Builder.CreateAShr(InitX, 1);

2193 else if (DefX->getOpcode() == Instruction::LShr)

2194 InitXNext = Builder.CreateLShr(InitX, 1);

2195 else if (DefX->getOpcode() == Instruction::Shl)

2196 InitXNext = Builder.CreateShl(InitX, 1);

2197 else

2199 } else

2200 InitXNext = InitX;

2204 Count = Builder.CreateSub(

2206 if (InsertSub)

2207 Count = Builder.CreateSub(Count, ConstantInt::get(CountTy, 1));

2208 Value *NewCount = Count;

2209 if (IsCntPhiUsedOutsideLoop)

2210 Count = Builder.CreateAdd(Count, ConstantInt::get(CountTy, 1));

2211

2212 NewCount = Builder.CreateZExtOrTrunc(NewCount, CntInst->getType());

2213

2215 if (cast(CntInst->getOperand(1))->isOne()) {

2216

2217

2218 ConstantInt *InitConst = dyn_cast(CntInitVal);

2219 if (!InitConst || !InitConst->isZero())

2220 NewCount = Builder.CreateAdd(NewCount, CntInitVal);

2221 } else {

2222

2223

2224 NewCount = Builder.CreateSub(CntInitVal, NewCount);

2225 }

2226

2227

2228

2229

2230

2231

2232

2233

2234

2236 auto *LbBr = cast(Body->getTerminator());

2237 ICmpInst *LbCond = cast(LbBr->getCondition());

2238

2241

2242 Builder.SetInsertPoint(LbCond);

2243 Instruction *TcDec = cast(Builder.CreateSub(

2244 TcPhi, ConstantInt::get(CountTy, 1), "tcdec", false, true));

2245

2248

2253 LbCond->setOperand(1, ConstantInt::get(CountTy, 0));

2254

2255

2256

2257 if (IsCntPhiUsedOutsideLoop)

2259 else

2261

2262

2263

2265}

2266

2267void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,

2271 auto *PreCondBr = cast(PreCondBB->getTerminator());

2273

2274

2275

2276

2277

2278

2280 Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;

2281 {

2283 NewCount = PopCntZext =

2284 Builder.CreateZExtOrTrunc(PopCnt, cast(CntPhi->getType()));

2285

2286 if (NewCount != PopCnt)

2287 (cast(NewCount))->setDebugLoc(DL);

2288

2289

2290 TripCnt = NewCount;

2291

2292

2294 ConstantInt *InitConst = dyn_cast(CntInitVal);

2295 if (!InitConst || !InitConst->isZero()) {

2296 NewCount = Builder.CreateAdd(NewCount, CntInitVal);

2297 (cast(NewCount))->setDebugLoc(DL);

2298 }

2299 }

2300

2301

2302

2303

2304

2305 {

2306 ICmpInst *PreCond = cast(PreCondBr->getCondition());

2307

2308 Value *Opnd0 = PopCntZext;

2309 Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);

2312

2313 ICmpInst *NewPreCond = cast(

2314 Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));

2315 PreCondBr->setCondition(NewPreCond);

2316

2318 }

2319

2320

2321

2322

2323

2324

2325

2326

2327

2328

2329

2330

2331

2332

2333

2334

2335

2336

2337

2338

2339

2341 {

2342 auto *LbBr = cast(Body->getTerminator());

2343 ICmpInst *LbCond = cast(LbBr->getCondition());

2345

2348

2349 Builder.SetInsertPoint(LbCond);

2351 Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),

2352 "tcdec", false, true));

2353

2356

2361 LbCond->setOperand(1, ConstantInt::get(Ty, 0));

2362 }

2363

2364

2365

2366

2368

2369

2370

2372}

2373

2374

2378

2380 : SubPattern(SP), L(L) {}

2381

2382 template bool match(ITy *V) {

2383 return L->isLoopInvariant(V) && SubPattern.match(V);

2384 }

2385};

2386

2387

2388template

2391}

2392

2393

2394

2395

2396

2397

2398

2399

2400

2401

2402

2403

2404

2405

2406

2407

2408

2409

2410

2411

2412

2413

2414

2419 " Performing shift-until-bittest idiom detection.\n");

2420

2421

2424 return false;

2425 }

2426

2429 assert(LoopPreheaderBB && "There is always a loop preheader.");

2430

2431 using namespace PatternMatch;

2432

2433

2434

2436 Value *CmpLHS, *CmpRHS;

2442 return false;

2443 }

2444

2445

2446

2447 auto MatchVariableBitMask = [&]() {

2454 CurLoop))));

2455 };

2456 auto MatchConstantBitMask = [&]() {

2461 };

2462 auto MatchDecomposableConstantBitMask = [&]() {

2464 if (Res && Res->Mask.isPowerOf2()) {

2466 Pred = Res->Pred;

2467 CurrX = Res->X;

2468 BitMask = ConstantInt::get(CurrX->getType(), Res->Mask);

2469 BitPos = ConstantInt::get(CurrX->getType(), Res->Mask.logBase2());

2470 return true;

2471 }

2472 return false;

2473 };

2474

2475 if (!MatchVariableBitMask() && !MatchConstantBitMask() &&

2476 !MatchDecomposableConstantBitMask()) {

2478 return false;

2479 }

2480

2481

2482 auto *CurrXPN = dyn_cast(CurrX);

2483 if (!CurrXPN || CurrXPN->getParent() != LoopHeaderBB) {

2485 return false;

2486 }

2487

2488 BaseX = CurrXPN->getIncomingValueForBlock(LoopPreheaderBB);

2489 NextX =

2490 dyn_cast(CurrXPN->getIncomingValueForBlock(LoopHeaderBB));

2491

2493 "Expected BaseX to be available in the preheader!");

2494

2496

2498 return false;

2499 }

2500

2501

2502

2504 "Should only get equality predicates here.");

2505

2506

2510 }

2511

2512

2513

2514 if (TrueBB != LoopHeaderBB) {

2516 return false;

2517 }

2518

2519

2520 return true;

2521}

2522

2523

2524

2525

2526

2527

2528

2529

2530

2531

2532

2533

2534

2535

2536

2537

2538

2539

2540

2541

2542

2543

2544

2545

2546

2547

2548

2549

2550

2551

2552

2553

2554

2555

2556

2557

2558

2559

2560

2561

2562

2563

2564

2565

2566

2567

2568

2569

2570

2571

2572

2573bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {

2574 bool MadeChange = false;

2575

2576 Value *X, *BitMask, *BitPos, *XCurr;

2579 XNext)) {

2581 " shift-until-bittest idiom detection failed.\n");

2582 return MadeChange;

2583 }

2585

2586

2587

2588

2591 assert(LoopPreheaderBB && "There is always a loop preheader.");

2592

2594 assert(SuccessorBB && "There is only a single successor.");

2595

2597 Builder.SetCurrentDebugLocation(cast(XCurr)->getDebugLoc());

2598

2600 Type *Ty = X->getType();

2602

2605

2606

2607

2608

2610 IntrID, Ty, {PoisonValue::get(Ty), Builder.getTrue()});

2614 " Intrinsic is too costly, not beneficial\n");

2615 return MadeChange;

2616 }

2620 return MadeChange;

2621 }

2622

2623

2624 MadeChange = true;

2625

2627

2628

2629 std::optionalBasicBlock::iterator InsertPt = std::nullopt;

2630 if (auto *BitPosI = dyn_cast(BitPos))

2631 InsertPt = BitPosI->getInsertionPointAfterDef();

2632 else

2634 if (!InsertPt)

2635 return false;

2639 return U.getUser() != BitPosFrozen;

2640 });

2641 BitPos = BitPosFrozen;

2642 }

2643

2644

2645

2647 BitPos->getName() + ".lowbitmask");

2649 Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");

2650 Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");

2651 CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(

2652 IntrID, Ty, {XMasked, Builder.getTrue()},

2653 nullptr, XMasked->getName() + ".numleadingzeros");

2654 Value *XMaskedNumActiveBits = Builder.CreateSub(

2655 ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,

2656 XMasked->getName() + ".numactivebits", true,

2657 Bitwidth != 2);

2658 Value *XMaskedLeadingOnePos =

2660 XMasked->getName() + ".leadingonepos", false,

2661 Bitwidth > 2);

2662

2663 Value *LoopBackedgeTakenCount = Builder.CreateSub(

2664 BitPos, XMaskedLeadingOnePos, CurLoop->getName() + ".backedgetakencount",

2665 true, true);

2666

2667

2668 Value *LoopTripCount =

2669 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),

2670 CurLoop->getName() + ".tripcount", true,

2671 Bitwidth != 2);

2672

2673

2674

2675

2676

2677 Value *NewX = Builder.CreateShl(X, LoopBackedgeTakenCount);

2679 if (auto *I = dyn_cast(NewX))

2680 I->copyIRFlags(XNext, true);

2681

2682 Value *NewXNext;

2683

2684

2685

2686

2692 NewXNext = Builder.CreateShl(X, LoopTripCount);

2693 else {

2694

2695

2696

2697 NewXNext = Builder.CreateShl(NewX, ConstantInt::get(Ty, 1));

2698 }

2699

2701 if (auto *I = dyn_cast(NewXNext))

2702 I->copyIRFlags(XNext, true);

2703

2704

2705

2706

2709

2710

2711

2712

2713 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());

2714 auto *IV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");

2715

2716

2717

2718 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());

2719 auto *IVNext =

2720 Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",

2721 true, Bitwidth != 2);

2722

2723

2724 auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,

2725 CurLoop->getName() + ".ivcheck");

2726 Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);

2728

2729

2730 IV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);

2731 IV->addIncoming(IVNext, LoopHeaderBB);

2732

2733

2734

2735

2737

2738

2739

2741

2742 ++NumShiftUntilBitTest;

2743 return MadeChange;

2744}

2745

2746

2747

2748

2749

2750

2751

2752

2753

2754

2755

2756

2757

2758

2759

2760

2761

2762

2763

2764

2765

2766

2767

2768

2769

2770

2771

2772

2773

2778 const SCEV *&ExtraOffsetExpr,

2779 bool &InvertedCond) {

2781 " Performing shift-until-zero idiom detection.\n");

2782

2783

2786 return false;

2787 }

2788

2789 Instruction *ValShifted, *NBits, *IVNext;

2790 Value *ExtraOffset;

2791

2794 assert(LoopPreheaderBB && "There is always a loop preheader.");

2795

2796 using namespace PatternMatch;

2797

2798

2799

2805 match(ValShiftedIsZero,

2809 return false;

2810 }

2811

2812

2813

2817 return false;

2818 }

2819 IntrinID = ValShifted->getOpcode() == Instruction::Shl ? Intrinsic::cttz

2820 : Intrinsic::ctlz;

2821

2822

2823

2828 else if (match(NBits,

2832 ExtraOffsetExpr = SE->getSCEV(ExtraOffset);

2833 else {

2834 IV = NBits;

2836 }

2837

2838

2839 auto *IVPN = dyn_cast(IV);

2840 if (!IVPN || IVPN->getParent() != LoopHeaderBB) {

2842 return false;

2843 }

2844

2845 Start = IVPN->getIncomingValueForBlock(LoopPreheaderBB);

2846 IVNext = dyn_cast(IVPN->getIncomingValueForBlock(LoopHeaderBB));

2847

2850 return false;

2851 }

2852

2853

2854

2856 "Should only get equality predicates here.");

2857

2858

2860 if (InvertedCond) {

2863 }

2864

2865

2866

2867 if (FalseBB != LoopHeaderBB) {

2869 return false;

2870 }

2871

2872

2873

2874

2875

2876

2877

2878 if (ValShifted->getOpcode() == Instruction::AShr &&

2881 return false;

2882 }

2883

2884

2885 return true;

2886}

2887

2888

2889

2890

2891

2892

2893

2894

2895

2896

2897

2898

2899

2900

2901

2902

2903

2904

2905

2906

2907

2908

2909

2910

2911

2912

2913

2914

2915

2916

2917

2918

2919

2920

2921

2922

2923

2924

2925

2926

2927

2928

2929

2930

2931

2932

2933

2934

2935

2936

2937

2938

2939

2940

2941

2942bool LoopIdiomRecognize::recognizeShiftUntilZero() {

2943 bool MadeChange = false;

2944

2948 Value *Start, *Val;

2949 const SCEV *ExtraOffsetExpr;

2950 bool InvertedCond;

2952 Start, Val, ExtraOffsetExpr, InvertedCond)) {

2954 " shift-until-zero idiom detection failed.\n");

2955 return MadeChange;

2956 }

2958

2959

2960

2961

2964 assert(LoopPreheaderBB && "There is always a loop preheader.");

2965

2967 assert(SuccessorBB && "There is only a single successor.");

2968

2970 Builder.SetCurrentDebugLocation(IV->getDebugLoc());

2971

2974

2977

2978

2979

2980

2982 IntrID, Ty, {PoisonValue::get(Ty), Builder.getFalse()});

2986 " Intrinsic is too costly, not beneficial\n");

2987 return MadeChange;

2988 }

2989

2990

2991 MadeChange = true;

2992

2993 bool OffsetIsZero = false;

2994 if (auto *ExtraOffsetExprC = dyn_cast(ExtraOffsetExpr))

2995 OffsetIsZero = ExtraOffsetExprC->isZero();

2996

2997

2998

2999 CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(

3000 IntrID, Ty, {Val, Builder.getFalse()},

3001 nullptr, Val->getName() + ".numleadingzeros");

3002 Value *ValNumActiveBits = Builder.CreateSub(

3004 Val->getName() + ".numactivebits", true,

3005 Bitwidth != 2);

3006

3008 Expander.setInsertPoint(&*Builder.GetInsertPoint());

3009 Value *ExtraOffset = Expander.expandCodeFor(ExtraOffsetExpr);

3010

3011 Value *ValNumActiveBitsOffset = Builder.CreateAdd(

3012 ValNumActiveBits, ExtraOffset, ValNumActiveBits->getName() + ".offset",

3013 OffsetIsZero, true);

3014 Value *IVFinal = Builder.CreateIntrinsic(Intrinsic::smax, {Ty},

3015 {ValNumActiveBitsOffset, Start},

3016 nullptr, "iv.final");

3017

3018 auto *LoopBackedgeTakenCount = cast(Builder.CreateSub(

3019 IVFinal, Start, CurLoop->getName() + ".backedgetakencount",

3020 OffsetIsZero, true));

3021

3022

3023

3024 Value *LoopTripCount =

3025 Builder.CreateAdd(LoopBackedgeTakenCount, ConstantInt::get(Ty, 1),

3026 CurLoop->getName() + ".tripcount", true,

3027 Bitwidth != 2);

3028

3029

3030

3031

3032 IV->replaceUsesOutsideBlock(IVFinal, LoopHeaderBB);

3033

3034

3035

3036

3037 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->begin());

3038 auto *CIV = Builder.CreatePHI(Ty, 2, CurLoop->getName() + ".iv");

3039

3040

3041 Builder.SetInsertPoint(LoopHeaderBB, LoopHeaderBB->getFirstNonPHIIt());

3042 auto *CIVNext =

3043 Builder.CreateAdd(CIV, ConstantInt::get(Ty, 1), CIV->getName() + ".next",

3044 true, Bitwidth != 2);

3045

3046

3047 auto *CIVCheck = Builder.CreateICmpEQ(CIVNext, LoopTripCount,

3048 CurLoop->getName() + ".ivcheck");

3049 auto *NewIVCheck = CIVCheck;

3050 if (InvertedCond) {

3051 NewIVCheck = Builder.CreateNot(CIVCheck);

3052 NewIVCheck->takeName(ValShiftedIsZero);

3053 }

3054

3055

3056 auto *IVDePHId = Builder.CreateAdd(CIV, Start, "", false,

3057 true);

3058 IVDePHId->takeName(IV);

3059

3060

3061 Builder.SetInsertPoint(LoopHeaderBB->getTerminator());

3062 Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);

3064

3065

3066 CIV->addIncoming(ConstantInt::get(Ty, 0), LoopPreheaderBB);

3067 CIV->addIncoming(CIVNext, LoopHeaderBB);

3068

3069

3070

3071

3073

3074

3075 IV->replaceAllUsesWith(IVDePHId);

3076 IV->eraseFromParent();

3077

3080

3081

3082

3084

3085 ++NumShiftUntilZero;

3086 return MadeChange;

3087}

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static const Function * getParent(const Value *V)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines the DenseMap class.

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, unsigned StoreSize, AliasAnalysis &AA, SmallPtrSetImpl< Instruction * > &Ignored)

mayLoopAccessLocation - Return true if the specified loop might access the specified pointer location...

Module.h This file contains the declarations for the Module class.

This header defines various interfaces for pass management in LLVM.

This file defines an InstructionCost class that is used when calculating the cost of an instruction,...

static Value * matchCondition(BranchInst *BI, BasicBlock *LoopEntry, bool JmpOnZero=false)

Check if the given conditional branch is based on the comparison between a variable and zero,...

static PHINode * getRecurrenceVar(Value *VarX, Instruction *DefX, BasicBlock *LoopEntry)

static cl::opt< bool, true > DisableLIRPMemset("disable-" DEBUG_TYPE "-memset", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memset."), cl::location(DisableLIRP::Memset), cl::init(false), cl::ReallyHidden)

static CallInst * createFFSIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL, bool ZeroCheck, Intrinsic::ID IID)

static bool detectShiftUntilLessThanIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX, APInt &Threshold)

Return true if the idiom is detected in the loop.

static bool detectShiftUntilBitTestIdiom(Loop *CurLoop, Value *&BaseX, Value *&BitMask, Value *&BitPos, Value *&CurrX, Instruction *&NextX)

Return true if the idiom is detected in the loop.

static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, Instruction *&CntInst, PHINode *&CntPhi, Value *&Var)

Return true iff the idiom is detected in the loop.

static Constant * getMemSetPatternValue(Value *V, const DataLayout *DL)

getMemSetPatternValue - If a strided store of the specified value is safe to turn into a memset_patte...

static cl::opt< bool, true > DisableLIRPMemcpy("disable-" DEBUG_TYPE "-memcpy", cl::desc("Proceed with loop idiom recognize pass, but do " "not convert loop(s) to memcpy."), cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden)

static CallInst * createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, const DebugLoc &DL)

static const SCEV * getNumBytes(const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, Loop *CurLoop, const DataLayout *DL, ScalarEvolution *SE)

Compute the number of bytes as a SCEV from the backedge taken count.

static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL, Intrinsic::ID &IntrinID, Value *&InitX, Instruction *&CntInst, PHINode *&CntPhi, Instruction *&DefX)

Return true if the idiom is detected in the loop.

static const SCEV * getStartForNegStride(const SCEV *Start, const SCEV *BECount, Type *IntPtr, const SCEV *StoreSizeSCEV, ScalarEvolution *SE)

static APInt getStoreStride(const SCEVAddRecExpr *StoreEv)

static Value * matchShiftULTCondition(BranchInst *BI, BasicBlock *LoopEntry, APInt &Threshold)

Check if the given conditional branch is based on an unsigned less-than comparison between a variable...

match_LoopInvariant< Ty > m_LoopInvariant(const Ty &M, const Loop *L)

Matches if the value is loop-invariant.

static cl::opt< bool, true > DisableLIRPAll("disable-" DEBUG_TYPE "-all", cl::desc("Options to disable Loop Idiom Recognize Pass."), cl::location(DisableLIRP::All), cl::init(false), cl::ReallyHidden)

static void deleteDeadInstruction(Instruction *I)

static cl::opt< bool > UseLIRCodeSizeHeurs("use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " "with -Os/-Oz"), cl::init(true), cl::Hidden)

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first found DebugLoc that has a DILocation, given a range of instructions.

This file implements a map that provides insertion order iteration.

This file provides utility analysis objects describing memory locations.

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...

const SmallVectorImpl< MachineOperand > & Cond

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool isSimple(Instruction *I)

verify safepoint Safepoint IR Verifier

This file implements a set that has insertion order iteration characteristics.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

static const uint32_t IV[8]

ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)

Check whether or not an instruction may read or write the optionally specified memory location.

Class for arbitrary precision integers.

std::optional< uint64_t > tryZExtValue() const

Get zero extended value if possible.

unsigned getBitWidth() const

Return the number of bits in the APInt.

int64_t getSExtValue() const

Get sign extended value.

A container for analyses that lazily runs them and caches their results.

static ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const

Return a const iterator range over the instructions in the block, skipping any debug instructions.

InstListType::const_iterator getFirstNonPHIIt() const

Iterator returning form of getFirstNonPHI.

const Instruction * getFirstNonPHI() const

Returns a pointer to the first instruction in this block that is not a PHINode instruction.

const Instruction & front() const

const BasicBlock * getSinglePredecessor() const

Return the predecessor of this block if it has a single predecessor block.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

const_iterator getFirstNonPHIOrDbgOrAlloca() const

Returns an iterator to the first instruction in this block that is not a PHINode, a debug intrinsic,...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

BinaryOps getOpcode() const

Conditional or Unconditional Branch instruction.

bool isConditional() const

BasicBlock * getSuccessor(unsigned i) const

Value * getCondition() const

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

This class represents a function call, abstracting a target machine's calling convention.

void setPredicate(Predicate P)

Set the predicate for this instruction to the specified value.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ ICMP_SLE

signed less or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_ULT

unsigned less than

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

Predicate getPredicate() const

Return the predicate for this instruction.

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

static Constant * get(ArrayType *T, ArrayRef< Constant * > V)

static Constant * getExactLogBase2(Constant *C)

If C is a scalar/fixed width vector of known powers of 2, then this function returns a new scalar/fix...

This is the shared class of boolean and integer constants.

bool isMinusOne() const

This function will return true iff every bit in this constant is set to true.

bool isOne() const

This is just a convenience method to make client code smaller for a common case.

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

const APInt & getValue() const

Return the constant as an APInt value reference.

static ConstantInt * getBool(LLVMContext &Context, bool V)

This is an important base class in LLVM.

static Constant * getAllOnesValue(Type *Ty)

A parsed version of the target data layout string in and methods for querying it.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

This class represents a freeze function that returns random concrete value if an operand is either a ...

A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...

void setAlignment(Align Align)

Sets the alignment attribute of the GlobalObject.

void setUnnamedAddr(UnnamedAddr Val)

@ PrivateLinkage

Like Internal, but omit from symbol table.

This instruction compares its operands according to the predicate given to the constructor.

bool isEquality() const

Return true if this predicate is either EQ or NE.

ConstantInt * getInt1(bool V)

Get a constant value representing either true or false.

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

bool hasNoUnsignedWrap() const LLVM_READONLY

Determine whether the no unsigned wrap flag is set.

bool hasNoSignedWrap() const LLVM_READONLY

Determine whether the no signed wrap flag is set.

void insertBefore(Instruction *InsertPos)

Insert an unlinked instruction into a basic block immediately before the specified instruction.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

bool isAtomic() const LLVM_READONLY

Return true if this instruction has an AtomicOrdering of unordered or higher.

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

const Function * getFunction() const

Return the function this instruction belongs to.

void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

AAMDNodes getAAMetadata() const

Returns the AA metadata for this instruction.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

This class provides an interface for updating the loop pass manager based on mutations to the loop ne...

An instruction for reading from memory.

Value * getPointerOperand()

bool isVolatile() const

Return true if this is a load from a volatile memory location.

Align getAlign() const

Return the alignment of the access that is being performed.

static LocationSize precise(uint64_t Value)

static constexpr LocationSize afterPointer()

Any location after the base pointer (but still within the underlying object).

bool contains(const LoopT *L) const

Return true if the specified loop is contained within in this loop.

bool isOutermost() const

Return true if the loop does not have a parent (natural) loop.

unsigned getNumBlocks() const

Get the number of blocks in this loop in constant time.

unsigned getNumBackEdges() const

Calculate the number of back edges to the loop header.

BlockT * getHeader() const

BlockT * getExitBlock() const

If getExitBlocks would return exactly one block, return that block.

BlockT * getLoopPreheader() const

If there is a preheader for this loop, return it.

ArrayRef< BlockT * > getBlocks() const

Get a list of the basic blocks which make up this loop.

void getUniqueExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const

Return all unique successor blocks of this loop.

block_iterator block_begin() const

PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U)

LoopT * getLoopFor(const BlockT *BB) const

Return the inner most loop that BB lives in.

Represents a single loop in the control flow graph.

bool isLoopInvariant(const Value *V) const

Return true if the specified value is loop invariant.

StringRef getName() const

This class implements a map that also provides access to all stored values in a deterministic order.

This class wraps the llvm.memcpy intrinsic.

Value * getLength() const

Value * getDest() const

This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...

MaybeAlign getDestAlign() const

This class wraps the llvm.memset and llvm.memset.inline intrinsics.

MaybeAlign getSourceAlign() const

Value * getSource() const

This is just like getRawSource, but it strips off any cast instructions that feed it,...

Representation for a specific memory location.

An analysis that produces MemorySSA for a function.

Encapsulates MemorySSA, including all data associated with memory accesses.

A Module instance is used to store all the information related to an LLVM module.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

Value * getIncomingValueForBlock(const BasicBlock *BB) const

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

int getBasicBlockIndex(const BasicBlock *BB) const

Return the first index of the specified basic block in the value list for this PHI.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

This node represents a polynomial recurrence on the trip count of the specified loop.

const SCEV * getStart() const

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

const Loop * getLoop() const

This class represents a constant integer value.

ConstantInt * getValue() const

const APInt & getAPInt() const

Helper to remove instructions inserted during SCEV expansion, unless they are marked as used.

This class uses information about analyze scalars to rewrite expressions in canonical form.

const SCEV * getOperand(unsigned i) const

This class represents an analyzed expression in the program.

bool isOne() const

Return true if the expression is a constant one.

bool isNonConstantNegative() const

Return true if the specified scev is negated, but not a constant.

The main scalar evolution driver.

bool isKnownNonNegative(const SCEV *S)

Test if the given expression is known to be non-negative.

const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)

Return the SCEV object corresponding to -V.

const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

const SCEV * getZero(Type *Ty)

Return a SCEV for the constant 0 of a specific type.

const SCEV * getConstant(ConstantInt *V)

const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)

A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...

void forgetLoop(const Loop *L)

This method should be called by the client when it has changed a loop in a way that may effect Scalar...

bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

bool hasLoopInvariantBackedgeTakenCount(const Loop *L)

Return true if the specified loop has an analyzable loop-invariant backedge-taken count.

const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)

Try to apply information from loop guards for L to Expr.

const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Get a canonical multiply expression, or something simpler if possible.

const SCEV * getTruncateOrZeroExtend(const SCEV *V, Type *Ty, unsigned Depth=0)

Return a SCEV corresponding to a conversion of the input value to the specified type.

A vector that has set insertion semantics.

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

Simple and conservative implementation of LoopSafetyInfo that can give false-positive answers to its ...

void computeLoopSafetyInfo(const Loop *CurLoop) override

Computes safety information for a loop checks loop body & header for the possibility of may throw exc...

bool anyBlockMayThrow() const override

Returns true iff any block of the loop for which this info is contains an instruction that may throw ...

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Value * getValueOperand()

Value * getPointerOperand()

StringRef - Represent a constant reference to a string, i.e.

Provides information about what library functions are available for the current target.

bool has(LibFunc F) const

Tests whether a library function is available.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const

unsigned getAtomicMemIntrinsicMaxElementSize() const

PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const

Return hardware support for population count.

TargetCostKind

The kind of cost model.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const

This is an approximation of reciprocal throughput of a math/logic op.

@ TCC_Basic

The cost of a typical 'add' instruction.

The instances of the Type class are immutable: once they are created, they are never changed.

unsigned getIntegerBitWidth() const

unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

A Use represents the edge between a Value definition and its users.

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

unsigned getNumOperands() const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

bool hasOneUse() const

Return true if there is exactly one use of this value.

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

iterator_range< user_iterator > users()

void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)

Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...

void replaceUsesOutsideBlock(Value *V, BasicBlock *BB)

replaceUsesOutsideBlock - Go through the uses list for this definition and make each use point to "V"...

LLVMContext & getContext() const

All values hold a context through their type.

StringRef getName() const

Return a constant reference to the value's name.

void takeName(Value *V)

Transfer the name from V to this value.

Value handle that is nullable, but tries to track the Value.

constexpr ScalarTy getFixedValue() const

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

const ParentTy * getParent() const

self_iterator getIterator()

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

constexpr char Attrs[]

Key for Kernel::Metadata::mAttrs.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

cst_pred_ty< is_power2 > m_Power2()

Match an integer or vector power-of-2.

BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)

Matches an And with LHS and RHS in either order.

bool match(Val *V, const Pattern &P)

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)

Combine two pattern matchers matching L && R.

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)

Matches shift operations.

BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)

class_match< BasicBlock > m_BasicBlock()

Match an arbitrary basic block value and ignore it.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)

cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)

Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

DiagnosticInfoOptimizationBase::setExtraArgs setExtraArgs

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)

Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.

const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)

This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....

bool inferNonMandatoryLibFuncAttrs(Module *M, StringRef Name, const TargetLibraryInfo &TLI)

Analyze the name and prototype of the given function and set any applicable attributes.

bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)

Check whether the library function is available on target and also that it in the current Module is a...

bool isMustProgress(const Loop *L)

Return true if this loop can be assumed to make progress.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool isModOrRefSet(const ModRefInfo MRI)

FunctionCallee getOrInsertLibFunc(Module *M, const TargetLibraryInfo &TLI, LibFunc TheLibFunc, FunctionType *T, AttributeList AttributeList)

Calls getOrInsertFunction() and then makes sure to add mandatory argument attributes.

ModRefInfo

Flags indicating whether a memory access modifies or references memory.

@ ModRef

The access may reference and may modify the value stored in memory.

@ Mod

The access may modify the value stored in memory.

bool VerifyMemorySSA

Enables verification of MemorySSA.

std::optional< DecomposedBitTest > decomposeBitTestICmp(Value *LHS, Value *RHS, CmpInst::Predicate Pred, bool LookThroughTrunc=true, bool AllowNonZeroC=false)

Decompose an icmp into the form ((X & Mask) pred C) if possible.

bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)

Returns true if the memory operations A and B are consecutive.

bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Return true if this function can prove that V does not have undef bits and is never poison.

PreservedAnalyses getLoopPassPreservedAnalyses()

Returns the minimum set of Analyses that all loop passes must preserve.

Value * isBytewiseValue(Value *V, const DataLayout &DL)

If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...

bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)

Returns true if the give value is known to be non-negative.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

A collection of metadata nodes that might be associated with a memory access used by the alias-analys...

MDNode * TBAAStruct

The tag for type-based alias analysis (tbaa struct).

MDNode * Scope

The tag for alias scope specification (used with noalias).

MDNode * TBAA

The tag for type-based alias analysis.

AAMDNodes merge(const AAMDNodes &Other) const

Given two sets of AAMDNodes applying to potentially different locations, determine the best AAMDNodes...

MDNode * NoAlias

The tag specifying the noalias scope.

AAMDNodes extendTo(ssize_t Len) const

Create a new AAMDNode that describes this AAMDNode after extending it to apply to a series of bytes o...

This struct is a compact representation of a valid (non-zero power of two) alignment.

static bool Memset

When true, Memset is disabled.

static bool All

When true, the entire pass is disabled.

static bool Memcpy

When true, Memcpy is disabled.

The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...

TargetTransformInfo & TTI

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

Match loop-invariant value.

match_LoopInvariant(const SubPattern_t &SP, const Loop *L)