LLVM: lib/CodeGen/ExpandMemCmp.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

34#include

35

36using namespace llvm;

38

39namespace llvm {

41}

42

43#define DEBUG_TYPE "expand-memcmp"

44

45STATISTIC(NumMemCmpCalls, "Number of memcmp calls");

46STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");

48 "Number of memcmp calls with size greater than max size");

49STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");

50

53 cl::desc("The number of loads per basic block for inline expansion of "

54 "memcmp that is only being compared against zero."));

55

58 cl::desc("Set maximum number of loads used in expanded memcmp"));

59

61 "max-loads-per-memcmp-opt-size", cl::Hidden,

62 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));

63

64namespace {

65

66

67

68

69class MemCmpExpansion {

70 struct ResultBlock {

72 PHINode *PhiSrc1 = nullptr;

73 PHINode *PhiSrc2 = nullptr;

74

75 ResultBlock() = default;

76 };

77

78 CallInst *const CI = nullptr;

79 ResultBlock ResBlock;

80 const uint64_t Size;

81 unsigned MaxLoadSize = 0;

82 uint64_t NumLoadsNonOneByte = 0;

83 const uint64_t NumLoadsPerBlockForZeroCmp;

84 std::vector<BasicBlock *> LoadCmpBlocks;

86 PHINode *PhiRes = nullptr;

87 const bool IsUsedForZeroCmp;

88 const DataLayout &DL;

89 DomTreeUpdater *DTU = nullptr;

91

92

93

94 struct LoadEntry {

95 LoadEntry(unsigned LoadSize, uint64_t Offset)

97 }

98

99

100 unsigned LoadSize;

101

103 };

104 using LoadEntryVector = SmallVector<LoadEntry, 8>;

105 LoadEntryVector LoadSequence;

106

107 void createLoadCmpBlocks();

108 void createResultBlock();

109 void setupResultBlockPHINodes();

110 void setupEndBlockPHINodes();

111 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);

112 void emitLoadCompareBlock(unsigned BlockIndex);

113 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,

114 unsigned &LoadIndex);

115 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);

116 void emitMemCmpResultBlock();

117 Value *getMemCmpExpansionZeroCase();

118 Value *getMemCmpEqZeroOneBlock();

119 Value *getMemCmpOneBlock();

120 struct LoadPair {

121 Value *Lhs = nullptr;

122 Value *Rhs = nullptr;

123 };

124 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,

125 Type *CmpSizeType, unsigned OffsetBytes);

126

127 static LoadEntryVector

128 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes,

129 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);

130 static LoadEntryVector

131 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,

132 unsigned MaxNumLoads,

133 unsigned &NumLoadsNonOneByte);

134

135 static void optimiseLoadSequence(

136 LoadEntryVector &LoadSequence,

137 const TargetTransformInfo::MemCmpExpansionOptions &Options,

138 bool IsUsedForZeroCmp);

139

140public:

141 MemCmpExpansion(CallInst *CI, uint64_t Size,

142 const TargetTransformInfo::MemCmpExpansionOptions &Options,

143 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,

144 DomTreeUpdater *DTU);

145

146 unsigned getNumBlocks();

147 uint64_t getNumLoads() const { return LoadSequence.size(); }

148

149 Value *getMemCmpExpansion();

150};

151

152MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(

153 uint64_t Size, llvm::ArrayRef LoadSizes,

154 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {

155 NumLoadsNonOneByte = 0;

156 LoadEntryVector LoadSequence;

158 while (Size && !LoadSizes.empty()) {

159 const unsigned LoadSize = LoadSizes.front();

160 const uint64_t NumLoadsForThisSize = Size / LoadSize;

161 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {

162

163

164

165

166 return {};

167 }

168 if (NumLoadsForThisSize > 0) {

169 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {

170 LoadSequence.push_back({LoadSize, Offset});

172 }

173 if (LoadSize > 1)

174 ++NumLoadsNonOneByte;

176 }

178 }

179 return LoadSequence;

180}

181

182MemCmpExpansion::LoadEntryVector

183MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,

184 const unsigned MaxLoadSize,

185 const unsigned MaxNumLoads,

186 unsigned &NumLoadsNonOneByte) {

187

188 if (Size < 2 || MaxLoadSize < 2)

189 return {};

190

191

192

193 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;

194 assert(NumNonOverlappingLoads && "there must be at least one load");

195

196

197 Size = Size - NumNonOverlappingLoads * MaxLoadSize;

198

199

200 if (Size == 0)

201 return {};

202

203

204 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)

205 return {};

206

207

208 LoadEntryVector LoadSequence;

210 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {

211 LoadSequence.push_back({MaxLoadSize, Offset});

212 Offset += MaxLoadSize;

213 }

214

215

216 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");

217 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});

218 NumLoadsNonOneByte = 1;

219 return LoadSequence;

220}

221

222void MemCmpExpansion::optimiseLoadSequence(

223 LoadEntryVector &LoadSequence,

224 const TargetTransformInfo::MemCmpExpansionOptions &Options,

225 bool IsUsedForZeroCmp) {

226

227

228

229

230 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())

231 return;

232

233 while (LoadSequence.size() >= 2) {

234 auto Last = LoadSequence[LoadSequence.size() - 1];

235 auto PreLast = LoadSequence[LoadSequence.size() - 2];

236

237

238 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)

239 break;

240

241 auto LoadSize = Last.LoadSize + PreLast.LoadSize;

242 if (find(Options.AllowedTailExpansions, LoadSize) ==

243 Options.AllowedTailExpansions.end())

244 break;

245

246

247 LoadSequence.pop_back();

248 LoadSequence.pop_back();

249 LoadSequence.emplace_back(PreLast.Offset, LoadSize);

250 }

251}

252

253

254

255

256

257

258

259

260

261MemCmpExpansion::MemCmpExpansion(

262 CallInst *const CI, uint64_t Size,

263 const TargetTransformInfo::MemCmpExpansionOptions &Options,

264 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,

265 DomTreeUpdater *DTU)

266 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),

267 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),

268 Builder(CI) {

270

272 while (!LoadSizes.empty() && LoadSizes.front() > Size) {

274 }

275 assert(!LoadSizes.empty() && "cannot load Size bytes");

276 MaxLoadSize = LoadSizes.front();

277

278 unsigned GreedyNumLoadsNonOneByte = 0;

279 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,

280 GreedyNumLoadsNonOneByte);

281 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;

282 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");

283

284

285 if (Options.AllowOverlappingLoads &&

286 (LoadSequence.empty() || LoadSequence.size() > 2)) {

287 unsigned OverlappingNumLoadsNonOneByte = 0;

288 auto OverlappingLoads = computeOverlappingLoadSequence(

289 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);

290 if (!OverlappingLoads.empty() &&

291 (LoadSequence.empty() ||

292 OverlappingLoads.size() < LoadSequence.size())) {

293 LoadSequence = OverlappingLoads;

294 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;

295 }

296 }

297 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");

298 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);

299}

300

301unsigned MemCmpExpansion::getNumBlocks() {

302 if (IsUsedForZeroCmp)

303 return getNumLoads() / NumLoadsPerBlockForZeroCmp +

304 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);

305 return getNumLoads();

306}

307

308void MemCmpExpansion::createLoadCmpBlocks() {

309 for (unsigned i = 0; i < getNumBlocks(); i++) {

311 EndBlock->getParent(), EndBlock);

312 LoadCmpBlocks.push_back(BB);

313 }

314}

315

316void MemCmpExpansion::createResultBlock() {

318 EndBlock->getParent(), EndBlock);

319}

320

321MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,

322 Type *BSwapSizeType,

323 Type *CmpSizeType,

324 unsigned OffsetBytes) {

325

330 if (OffsetBytes > 0) {

331 auto *ByteType = Type::getInt8Ty(CI->getContext());

332 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);

333 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);

336 }

337

338

339 Value *Lhs = nullptr;

342 if (!Lhs)

343 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);

344

345 Value *Rhs = nullptr;

348 if (!Rhs)

349 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);

350

351

352 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {

353 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);

354 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);

355 }

356

357

358 if (BSwapSizeType) {

360 CI->getModule(), Intrinsic::bswap, BSwapSizeType);

361 Lhs = Builder.CreateCall(Bswap, Lhs);

362 Rhs = Builder.CreateCall(Bswap, Rhs);

363 }

364

365

366 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {

367 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);

368 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);

369 }

370 return {Lhs, Rhs};

371}

372

373

374

375

376

377void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,

378 unsigned OffsetBytes) {

379 BasicBlock *BB = LoadCmpBlocks[BlockIndex];

381 const LoadPair Loads =

382 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,

383 Type::getInt32Ty(CI->getContext()), OffsetBytes);

384 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);

385

387

388 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {

389

390

392 ConstantInt::get(Diff->getType(), 0));

393 BranchInst *CmpBr =

395 Builder.Insert(CmpBr);

396 if (DTU)

398 {{DominatorTree::Insert, BB, EndBlock},

399 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});

400 } else {

401

403 Builder.Insert(CmpBr);

404 if (DTU)

405 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});

406 }

407}

408

409

410

411

412Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,

413 unsigned &LoadIndex) {

414 assert(LoadIndex < getNumLoads() &&

415 "getCompareLoadPairs() called with no remaining loads");

416 std::vector<Value *> XorList, OrList;

417 Value *Diff = nullptr;

418

419 const unsigned NumLoads =

420 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);

421

422

423 if (LoadCmpBlocks.empty())

425 else

427

429

430

431

432 IntegerType *const MaxLoadType =

433 NumLoads == 1 ? nullptr

435

436 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {

437 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];

438 const LoadPair Loads = getLoadPair(

440 MaxLoadType, CurLoadEntry.Offset);

441

442 if (NumLoads != 1) {

443

444

445 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);

446 Diff = Builder.CreateZExt(Diff, MaxLoadType);

447 XorList.push_back(Diff);

448 } else {

449

451 }

452 }

453

454 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {

455 std::vector<Value *> OutList;

456 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {

458 OutList.push_back(Or);

459 }

460 if (InList.size() % 2 != 0)

461 OutList.push_back(InList.back());

462 return OutList;

463 };

464

465 if (!Cmp) {

466

467 OrList = pairWiseOr(XorList);

468

469

470 while (OrList.size() != 1) {

471 OrList = pairWiseOr(OrList);

472 }

473

474 assert(Diff && "Failed to find comparison diff");

476 }

477

478 return Cmp;

479}

480

481void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,

482 unsigned &LoadIndex) {

483 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);

484

485 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))

486 ? EndBlock

487 : LoadCmpBlocks[BlockIndex + 1];

488

489

494 Builder.Insert(CmpBr);

495 if (DTU)

496 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},

497 {DominatorTree::Insert, BB, NextBB}});

498

499

500

501

502 if (BlockIndex == LoadCmpBlocks.size() - 1) {

504 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);

505 }

506}

507

508

509

510

511

512

513

514

515

516

517void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {

518

519 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];

520

521 if (CurLoadEntry.LoadSize == 1) {

522 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);

523 return;

524 }

525

526 Type *LoadSizeType =

528 Type *BSwapSizeType =

529 DL.isLittleEndian()

532 : nullptr;

535 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);

536 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");

537

539

540 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,

541 CurLoadEntry.Offset);

542

543

544

545 if (!IsUsedForZeroCmp) {

546 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);

547 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);

548 }

549

550 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);

551 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))

552 ? EndBlock

553 : LoadCmpBlocks[BlockIndex + 1];

554

555

560 Builder.Insert(CmpBr);

561 if (DTU)

562 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},

563 {DominatorTree::Insert, BB, ResBlock.BB}});

564

565

566

567

568 if (BlockIndex == LoadCmpBlocks.size() - 1) {

570 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);

571 }

572}

573

574

575

576

577void MemCmpExpansion::emitMemCmpResultBlock() {

578

579

580 if (IsUsedForZeroCmp) {

583 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);

586 Builder.Insert(NewBr);

587 if (DTU)

588 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});

589 return;

590 }

593

594 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,

595 ResBlock.PhiSrc2);

596

599 ConstantInt::get(Builder.getInt32Ty(), 1));

602

605 Builder.Insert(NewBr);

606 if (DTU)

607 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});

608}

609

610void MemCmpExpansion::setupResultBlockPHINodes() {

613

614 ResBlock.PhiSrc1 =

615 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");

616 ResBlock.PhiSrc2 =

617 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");

618}

619

620void MemCmpExpansion::setupEndBlockPHINodes() {

622 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");

623}

624

625Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {

626 unsigned LoadIndex = 0;

627

628

629 for (unsigned I = 0; I < getNumBlocks(); ++I) {

630 emitLoadCompareBlockMultipleLoads(I, LoadIndex);

631 }

632

633 emitMemCmpResultBlock();

634 return PhiRes;

635}

636

637

638

639

640Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {

641 unsigned LoadIndex = 0;

642 Value *Cmp = getCompareLoadPairs(0, LoadIndex);

643 assert(LoadIndex == getNumLoads() && "some entries were not consumed");

645}

646

647

648

649

650

651

652Value *MemCmpExpansion::getMemCmpOneBlock() {

653 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;

655 Type *BSwapSizeType =

657 : nullptr;

658 Type *MaxLoadType =

661

662

663

664 if (Size == 1 || Size == 2) {

665 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,

667 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);

668 }

669

670 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,

671 0);

672

673

674

675

678 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;

679 bool NeedsZExt = false;

680

681

682

683

684

688 Pred = ICmpInst::ICMP_SLT;

689 NeedsZExt = true;

692

693 Pred = ICmpInst::ICMP_SGE;

696

697 Pred = ICmpInst::ICMP_SLE;

698 } else {

699

701 }

702

703 if (ICmpInst::isSigned(Pred)) {

705 Loads.Lhs, Loads.Rhs);

707 UI->replaceAllUsesWith(Result);

708 UI->eraseFromParent();

710 return nullptr;

711 }

712 }

713

714

716 {Loads.Lhs, Loads.Rhs});

717}

718

719

720

721Value *MemCmpExpansion::getMemCmpExpansion() {

722

723 if (getNumBlocks() != 1) {

725 EndBlock = SplitBlock(StartBlock, CI, DTU, nullptr,

726 nullptr, "endblock");

727 setupEndBlockPHINodes();

728 createResultBlock();

729

730

731

732

733

734 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();

735

736

737 createLoadCmpBlocks();

738

739

740

742 if (DTU)

743 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},

744 {DominatorTree::Delete, StartBlock, EndBlock}});

745 }

746

748

749 if (IsUsedForZeroCmp)

750 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()

751 : getMemCmpExpansionZeroCase();

752

753 if (getNumBlocks() == 1)

754 return getMemCmpOneBlock();

755

756 for (unsigned I = 0; I < getNumBlocks(); ++I) {

757 emitLoadCompareBlock(I);

758 }

759

760 emitMemCmpResultBlock();

761 return PhiRes;

762}

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,

838 const TargetLowering *TLI, const DataLayout *DL,

839 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,

840 DomTreeUpdater *DTU, const bool IsBCmp) {

841 NumMemCmpCalls++;

842

843

845 return false;

846

847

849 if (!SizeCast) {

850 NumMemCmpNotConstant++;

851 return false;

852 }

853 const uint64_t SizeVal = SizeCast->getZExtValue();

854

855 if (SizeVal == 0) {

856 return false;

857 }

858

859

860 const bool IsUsedForZeroCmp =

864 IsUsedForZeroCmp);

865 if (Options) return false;

866

869

870 if (OptForSize &&

873

876

877 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);

878

879

880 if (Expansion.getNumLoads() == 0) {

881 NumMemCmpGreaterThanMax++;

882 return false;

883 }

884

885 NumMemCmpInlined++;

886

888

891 }

892

893 return true;

894}

895

896

897static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,

898 const TargetTransformInfo *TTI, const TargetLowering *TL,

899 const DataLayout &DL, ProfileSummaryInfo *PSI,

900 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);

901

902static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,

903 const TargetTransformInfo *TTI,

904 const TargetLowering *TL,

905 ProfileSummaryInfo *PSI,

906 BlockFrequencyInfo *BFI, DominatorTree *DT);

907

908class ExpandMemCmpLegacyPass : public FunctionPass {

909public:

910 static char ID;

911

912 ExpandMemCmpLegacyPass() : FunctionPass(ID) {

914 }

915

917 if (skipFunction(F)) return false;

918

919 auto *TPC = getAnalysisIfAvailable();

920 if (!TPC) {

921 return false;

922 }

923 const TargetLowering* TL =

924 TPC->getTM().getSubtargetImpl(F)->getTargetLowering();

925

926 const TargetLibraryInfo *TLI =

927 &getAnalysis().getTLI(F);

928 const TargetTransformInfo *TTI =

929 &getAnalysis().getTTI(F);

930 auto *PSI = &getAnalysis().getPSI();

932 &getAnalysis().getBFI() :

933 nullptr;

934 DominatorTree *DT = nullptr;

935 if (auto *DTWP = getAnalysisIfAvailable())

936 DT = &DTWP->getDomTree();

937 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);

938 return !PA.areAllPreserved();

939 }

940

941private:

942 void getAnalysisUsage(AnalysisUsage &AU) const override {

943 AU.addRequired();

944 AU.addRequired();

945 AU.addRequired();

948 FunctionPass::getAnalysisUsage(AU);

949 }

950};

951

952bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,

953 const TargetTransformInfo *TTI, const TargetLowering *TL,

954 const DataLayout &DL, ProfileSummaryInfo *PSI,

955 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {

956 for (Instruction &I : BB) {

958 if (!CI) {

959 continue;

960 }

961 LibFunc Func;

963 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&

964 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {

965 return true;

966 }

967 }

968 return false;

969}

970

971PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,

972 const TargetTransformInfo *TTI,

973 const TargetLowering *TL, ProfileSummaryInfo *PSI,

974 BlockFrequencyInfo *BFI, DominatorTree *DT) {

975 std::optional DTU;

976 if (DT)

977 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);

978

979 const DataLayout& DL = F.getDataLayout();

980 bool MadeChanges = false;

981 for (auto BBIt = F.begin(); BBIt != F.end();) {

982 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {

983 MadeChanges = true;

984

985

986 BBIt = F.begin();

987 } else {

988 ++BBIt;

989 }

990 }

991 if (MadeChanges)

992 for (BasicBlock &BB : F)

994 if (!MadeChanges)

996 PreservedAnalyses PA;

997 PA.preserve();

998 return PA;

999}

1000

1001}

1002

1005 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();

1009 .getCachedResult(*F.getParent());

1012 : nullptr;

1014

1015 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);

1016}

1017

1018char ExpandMemCmpLegacyPass::ID = 0;

1020 "Expand memcmp() to load/stores", false, false)

1028

1030 return new ExpandMemCmpLegacyPass();

1031}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static bool runOnFunction(Function &F, bool PostInlining)

static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)

static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))

static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))

static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file contains the declarations for profiling metadata utility functions.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

ArrayRef< T > drop_front(size_t N=1) const

Drop the first N elements of the array.

const T & front() const

front - Get the first element.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

const Function * getParent() const

Return the enclosing method, or null if none.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes BlockFrequencyInfo.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

Value * getArgOperand(unsigned i) const

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)

Definition ExpandMemCmp.cpp:1003

FunctionPass class - This class is used to implement most global optimizations.

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

void applyUpdates(ArrayRef< UpdateT > Updates)

Submit updates to all available trees.

Predicate getUnsignedPredicate() const

For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.

Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

BasicBlock * GetInsertBlock() const

void SetCurrentDebugLocation(DebugLoc L)

Set location information used by debugging information.

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

InstTy * Insert(InstTy *I, const Twine &Name="") const

Insert and return the specified instruction.

Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)

Update the specified successor to point at the provided block.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.

static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)

Helper for client passes to set up the analysis usage on behalf of this pass.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

bool hasProfileSummary() const

Returns true if profile summary is available.

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

bool getLibFunc(StringRef funcName, LibFunc &F) const

Searches for a particular function name.

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Wrapper pass for TargetTransformInfo.

LLVM_ABI MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const

LLVM_ABI unsigned getIntegerBitWidth() const

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

LLVM_ABI bool hasOneUser() const

Return true if there is exactly one user of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const

Returns an alignment of the pointer value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

const ParentTy * getParent() const

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

initializer< Ty > init(const Ty &Val)

NodeAddr< FuncNode * > Func

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)

LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)

Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)

Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

LLVM_ABI void initializeExpandMemCmpLegacyPassPass(PassRegistry &)

LLVM_ABI FunctionPass * createExpandMemCmpLegacyPass()

Definition ExpandMemCmp.cpp:1029

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

@ Or

Bitwise or logical OR of integers.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)

Return the value that a load from C with offset Offset would produce if it is constant and determinab...

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

unsigned NumLoadsPerBlock