LLVM: lib/CodeGen/ExpandMemCmp.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

33#include

34

35using namespace llvm;

37

38namespace llvm {

40}

41

42#define DEBUG_TYPE "expand-memcmp"

43

44STATISTIC(NumMemCmpCalls, "Number of memcmp calls");

45STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");

47 "Number of memcmp calls with size greater than max size");

48STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");

49

52 cl::desc("The number of loads per basic block for inline expansion of "

53 "memcmp that is only being compared against zero."));

54

57 cl::desc("Set maximum number of loads used in expanded memcmp"));

58

60 "max-loads-per-memcmp-opt-size", cl::Hidden,

61 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));

62

63namespace {

64

65

66

67

68class MemCmpExpansion {

69 struct ResultBlock {

71 PHINode *PhiSrc1 = nullptr;

72 PHINode *PhiSrc2 = nullptr;

73

74 ResultBlock() = default;

75 };

76

78 ResultBlock ResBlock;

80 unsigned MaxLoadSize = 0;

81 uint64_t NumLoadsNonOneByte = 0;

82 const uint64_t NumLoadsPerBlockForZeroCmp;

83 std::vector<BasicBlock *> LoadCmpBlocks;

85 PHINode *PhiRes = nullptr;

86 const bool IsUsedForZeroCmp;

90

91

92

93 struct LoadEntry {

96 }

97

98

99 unsigned LoadSize;

100

102 };

104 LoadEntryVector LoadSequence;

105

106 void createLoadCmpBlocks();

107 void createResultBlock();

108 void setupResultBlockPHINodes();

109 void setupEndBlockPHINodes();

110 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);

111 void emitLoadCompareBlock(unsigned BlockIndex);

112 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,

113 unsigned &LoadIndex);

114 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);

115 void emitMemCmpResultBlock();

116 Value *getMemCmpExpansionZeroCase();

117 Value *getMemCmpEqZeroOneBlock();

118 Value *getMemCmpOneBlock();

119 struct LoadPair {

120 Value *Lhs = nullptr;

121 Value *Rhs = nullptr;

122 };

123 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,

124 Type *CmpSizeType, unsigned OffsetBytes);

125

126 static LoadEntryVector

128 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);

129 static LoadEntryVector

130 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,

131 unsigned MaxNumLoads,

132 unsigned &NumLoadsNonOneByte);

133

134 static void optimiseLoadSequence(

135 LoadEntryVector &LoadSequence,

137 bool IsUsedForZeroCmp);

138

139public:

142 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,

144

145 unsigned getNumBlocks();

146 uint64_t getNumLoads() const { return LoadSequence.size(); }

147

148 Value *getMemCmpExpansion();

149};

150

153 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {

154 NumLoadsNonOneByte = 0;

155 LoadEntryVector LoadSequence;

157 while (Size && !LoadSizes.empty()) {

158 const unsigned LoadSize = LoadSizes.front();

159 const uint64_t NumLoadsForThisSize = Size / LoadSize;

160 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {

161

162

163

164

165 return {};

166 }

167 if (NumLoadsForThisSize > 0) {

168 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {

169 LoadSequence.push_back({LoadSize, Offset});

171 }

172 if (LoadSize > 1)

173 ++NumLoadsNonOneByte;

175 }

177 }

178 return LoadSequence;

179}

180

182MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,

183 const unsigned MaxLoadSize,

184 const unsigned MaxNumLoads,

185 unsigned &NumLoadsNonOneByte) {

186

187 if (Size < 2 || MaxLoadSize < 2)

188 return {};

189

190

191

192 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;

193 assert(NumNonOverlappingLoads && "there must be at least one load");

194

195

196 Size = Size - NumNonOverlappingLoads * MaxLoadSize;

197

198

199 if (Size == 0)

200 return {};

201

202

203 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)

204 return {};

205

206

207 LoadEntryVector LoadSequence;

209 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {

210 LoadSequence.push_back({MaxLoadSize, Offset});

211 Offset += MaxLoadSize;

212 }

213

214

215 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");

216 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});

217 NumLoadsNonOneByte = 1;

218 return LoadSequence;

219}

220

221void MemCmpExpansion::optimiseLoadSequence(

222 LoadEntryVector &LoadSequence,

224 bool IsUsedForZeroCmp) {

225

226

227

228

229 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())

230 return;

231

232 while (LoadSequence.size() >= 2) {

233 auto Last = LoadSequence[LoadSequence.size() - 1];

234 auto PreLast = LoadSequence[LoadSequence.size() - 2];

235

236

237 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)

238 break;

239

240 auto LoadSize = Last.LoadSize + PreLast.LoadSize;

241 if (find(Options.AllowedTailExpansions, LoadSize) ==

242 Options.AllowedTailExpansions.end())

243 break;

244

245

246 LoadSequence.pop_back();

247 LoadSequence.pop_back();

248 LoadSequence.emplace_back(PreLast.Offset, LoadSize);

249 }

250}

251

252

253

254

255

256

257

258

259

260MemCmpExpansion::MemCmpExpansion(

263 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,

265 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),

266 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),

267 Builder(CI) {

269

271 while (!LoadSizes.empty() && LoadSizes.front() > Size) {

273 }

274 assert(!LoadSizes.empty() && "cannot load Size bytes");

275 MaxLoadSize = LoadSizes.front();

276

277 unsigned GreedyNumLoadsNonOneByte = 0;

278 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,

279 GreedyNumLoadsNonOneByte);

280 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;

281 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");

282

283

284 if (Options.AllowOverlappingLoads &&

285 (LoadSequence.empty() || LoadSequence.size() > 2)) {

286 unsigned OverlappingNumLoadsNonOneByte = 0;

287 auto OverlappingLoads = computeOverlappingLoadSequence(

288 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);

289 if (!OverlappingLoads.empty() &&

290 (LoadSequence.empty() ||

291 OverlappingLoads.size() < LoadSequence.size())) {

292 LoadSequence = OverlappingLoads;

293 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;

294 }

295 }

296 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");

297 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);

298}

299

300unsigned MemCmpExpansion::getNumBlocks() {

301 if (IsUsedForZeroCmp)

302 return getNumLoads() / NumLoadsPerBlockForZeroCmp +

303 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);

304 return getNumLoads();

305}

306

307void MemCmpExpansion::createLoadCmpBlocks() {

308 for (unsigned i = 0; i < getNumBlocks(); i++) {

310 EndBlock->getParent(), EndBlock);

311 LoadCmpBlocks.push_back(BB);

312 }

313}

314

315void MemCmpExpansion::createResultBlock() {

317 EndBlock->getParent(), EndBlock);

318}

319

320MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,

321 Type *BSwapSizeType,

322 Type *CmpSizeType,

323 unsigned OffsetBytes) {

324

329 if (OffsetBytes > 0) {

331 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);

332 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);

335 }

336

337

338 Value *Lhs = nullptr;

339 if (auto *C = dyn_cast(LhsSource))

341 if (!Lhs)

342 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);

343

344 Value *Rhs = nullptr;

345 if (auto *C = dyn_cast(RhsSource))

347 if (!Rhs)

348 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);

349

350

351 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {

352 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);

353 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);

354 }

355

356

357 if (BSwapSizeType) {

359 CI->getModule(), Intrinsic::bswap, BSwapSizeType);

360 Lhs = Builder.CreateCall(Bswap, Lhs);

361 Rhs = Builder.CreateCall(Bswap, Rhs);

362 }

363

364

365 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {

366 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);

367 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);

368 }

369 return {Lhs, Rhs};

370}

371

372

373

374

375

376void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,

377 unsigned OffsetBytes) {

378 BasicBlock *BB = LoadCmpBlocks[BlockIndex];

380 const LoadPair Loads =

383 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);

384

386

387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {

388

389

391 ConstantInt::get(Diff->getType(), 0));

394 Builder.Insert(CmpBr);

395 if (DTU)

397 {{DominatorTree::Insert, BB, EndBlock},

398 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});

399 } else {

400

402 Builder.Insert(CmpBr);

403 if (DTU)

404 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});

405 }

406}

407

408

409

410

411Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,

412 unsigned &LoadIndex) {

413 assert(LoadIndex < getNumLoads() &&

414 "getCompareLoadPairs() called with no remaining loads");

415 std::vector<Value *> XorList, OrList;

416 Value *Diff = nullptr;

417

418 const unsigned NumLoads =

419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);

420

421

422 if (LoadCmpBlocks.empty())

424 else

426

428

429

430

432 NumLoads == 1 ? nullptr

434

435 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {

436 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];

437 const LoadPair Loads = getLoadPair(

439 MaxLoadType, CurLoadEntry.Offset);

440

441 if (NumLoads != 1) {

442

443

444 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);

445 Diff = Builder.CreateZExt(Diff, MaxLoadType);

446 XorList.push_back(Diff);

447 } else {

448

450 }

451 }

452

453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {

454 std::vector<Value *> OutList;

455 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {

457 OutList.push_back(Or);

458 }

459 if (InList.size() % 2 != 0)

460 OutList.push_back(InList.back());

461 return OutList;

462 };

463

464 if (!Cmp) {

465

466 OrList = pairWiseOr(XorList);

467

468

469 while (OrList.size() != 1) {

470 OrList = pairWiseOr(OrList);

471 }

472

473 assert(Diff && "Failed to find comparison diff");

475 }

476

477 return Cmp;

478}

479

480void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,

481 unsigned &LoadIndex) {

482 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);

483

484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))

485 ? EndBlock

486 : LoadCmpBlocks[BlockIndex + 1];

487

488

491 Builder.Insert(CmpBr);

492 if (DTU)

493 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},

494 {DominatorTree::Insert, BB, NextBB}});

495

496

497

498

499 if (BlockIndex == LoadCmpBlocks.size() - 1) {

501 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);

502 }

503}

504

505

506

507

508

509

510

511

512

513

514void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {

515

516 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];

517

518 if (CurLoadEntry.LoadSize == 1) {

519 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);

520 return;

521 }

522

523 Type *LoadSizeType =

525 Type *BSwapSizeType =

526 DL.isLittleEndian()

529 : nullptr;

532 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);

533 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");

534

536

537 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,

538 CurLoadEntry.Offset);

539

540

541

542 if (!IsUsedForZeroCmp) {

543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);

544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);

545 }

546

547 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);

548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))

549 ? EndBlock

550 : LoadCmpBlocks[BlockIndex + 1];

551

552

555 Builder.Insert(CmpBr);

556 if (DTU)

557 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},

558 {DominatorTree::Insert, BB, ResBlock.BB}});

559

560

561

562

563 if (BlockIndex == LoadCmpBlocks.size() - 1) {

565 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);

566 }

567}

568

569

570

571

572void MemCmpExpansion::emitMemCmpResultBlock() {

573

574

575 if (IsUsedForZeroCmp) {

581 Builder.Insert(NewBr);

582 if (DTU)

583 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});

584 return;

585 }

588

589 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,

590 ResBlock.PhiSrc2);

591

594 ConstantInt::get(Builder.getInt32Ty(), 1));

595

598 Builder.Insert(NewBr);

599 if (DTU)

600 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});

601}

602

603void MemCmpExpansion::setupResultBlockPHINodes() {

606

607 ResBlock.PhiSrc1 =

608 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");

609 ResBlock.PhiSrc2 =

610 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");

611}

612

613void MemCmpExpansion::setupEndBlockPHINodes() {

616}

617

618Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {

619 unsigned LoadIndex = 0;

620

621

622 for (unsigned I = 0; I < getNumBlocks(); ++I) {

623 emitLoadCompareBlockMultipleLoads(I, LoadIndex);

624 }

625

626 emitMemCmpResultBlock();

627 return PhiRes;

628}

629

630

631

632

633Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {

634 unsigned LoadIndex = 0;

635 Value *Cmp = getCompareLoadPairs(0, LoadIndex);

636 assert(LoadIndex == getNumLoads() && "some entries were not consumed");

638}

639

640

641

642

643

644

645Value *MemCmpExpansion::getMemCmpOneBlock() {

646 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;

648 Type *BSwapSizeType =

650 : nullptr;

651 Type *MaxLoadType =

654

655

656

657 if (Size == 1 || Size == 2) {

658 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,

660 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);

661 }

662

663 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,

664 0);

665

666

667

668

670 auto *UI = cast(*CI->user_begin());

671 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;

672 bool NeedsZExt = false;

673

674

675

676

677

681 Pred = ICmpInst::ICMP_SLT;

682 NeedsZExt = true;

685

686 Pred = ICmpInst::ICMP_SGE;

689

690 Pred = ICmpInst::ICMP_SLE;

691 } else {

692

694 }

695

696 if (ICmpInst::isSigned(Pred)) {

698 Loads.Lhs, Loads.Rhs);

700 UI->replaceAllUsesWith(Result);

701 UI->eraseFromParent();

703 return nullptr;

704 }

705 }

706

707

709 {Loads.Lhs, Loads.Rhs});

710}

711

712

713

714Value *MemCmpExpansion::getMemCmpExpansion() {

715

716 if (getNumBlocks() != 1) {

718 EndBlock = SplitBlock(StartBlock, CI, DTU, nullptr,

719 nullptr, "endblock");

720 setupEndBlockPHINodes();

721 createResultBlock();

722

723

724

725

726

727 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();

728

729

730 createLoadCmpBlocks();

731

732

733

735 if (DTU)

736 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},

737 {DominatorTree::Delete, StartBlock, EndBlock}});

738 }

739

741

742 if (IsUsedForZeroCmp)

743 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()

744 : getMemCmpExpansionZeroCase();

745

746 if (getNumBlocks() == 1)

747 return getMemCmpOneBlock();

748

749 for (unsigned I = 0; I < getNumBlocks(); ++I) {

750 emitLoadCompareBlock(I);

751 }

752

753 emitMemCmpResultBlock();

754 return PhiRes;

755}

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

834 NumMemCmpCalls++;

835

836

838 return false;

839

840

842 if (!SizeCast) {

843 NumMemCmpNotConstant++;

844 return false;

845 }

847

848 if (SizeVal == 0) {

849 return false;

850 }

851

852

853 const bool IsUsedForZeroCmp =

857 IsUsedForZeroCmp);

858 if (Options) return false;

859

862

863 if (OptForSize &&

866

869

870 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);

871

872

873 if (Expansion.getNumLoads() == 0) {

874 NumMemCmpGreaterThanMax++;

875 return false;

876 }

877

878 NumMemCmpInlined++;

879

881

884 }

885

886 return true;

887}

888

889

894

900

901class ExpandMemCmpLegacyPass : public FunctionPass {

902public:

903 static char ID;

904

907 }

908

911

912 auto *TPC = getAnalysisIfAvailable();

913 if (!TPC) {

914 return false;

915 }

917 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();

918

920 &getAnalysis().getTLI(F);

922 &getAnalysis().getTTI(F);

923 auto *PSI = &getAnalysis().getPSI();

925 &getAnalysis().getBFI() :

926 nullptr;

928 if (auto *DTWP = getAnalysisIfAvailable())

929 DT = &DTWP->getDomTree();

930 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);

931 return !PA.areAllPreserved();

932 }

933

934private:

942 }

943};

944

950 CallInst *CI = dyn_cast(&I);

951 if (!CI) {

952 continue;

953 }

956 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&

957 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {

958 return true;

959 }

960 }

961 return false;

962}

963

968 std::optional DTU;

969 if (DT)

970 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);

971

973 bool MadeChanges = false;

974 for (auto BBIt = F.begin(); BBIt != F.end();) {

975 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {

976 MadeChanges = true;

977

978

979 BBIt = F.begin();

980 } else {

981 ++BBIt;

982 }

983 }

984 if (MadeChanges)

987 if (!MadeChanges)

991 return PA;

992}

993

994}

995

1002 .getCachedResult(*F.getParent());

1005 : nullptr;

1007

1008 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);

1009}

1010

1011char ExpandMemCmpLegacyPass::ID = 0;

1013 "Expand memcmp() to load/stores", false, false)

1021

1023 return new ExpandMemCmpLegacyPass();

1024}

AMDGPU Mark last scratch load

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static bool runImpl(Function &F, const TargetLowering &TLI)

static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))

static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))

static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))

Merge contiguous icmps into a memcmp

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

A container for analyses that lazily runs them and caches their results.

PassT::Result * getCachedResult(IRUnitT &IR) const

Get the cached result of an analysis pass for a given IR unit.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

ArrayRef< T > drop_front(size_t N=1) const

Drop the first N elements of the array.

const T & front() const

front - Get the first element.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Analysis pass which computes BlockFrequencyInfo.

BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...

Conditional or Unconditional Branch instruction.

static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)

Value * getArgOperand(unsigned i) const

This class represents a function call, abstracting a target machine's calling convention.

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

This is the shared class of boolean and integer constants.

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

static Constant * getAllOnesValue(Type *Ty)

A parsed version of the target data layout string in and methods for querying it.

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)

FunctionPass class - This class is used to implement most global optimizations.

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

bool skipFunction(const Function &F) const

Optional passes call this function to check whether the pass should be skipped.

bool hasMinSize() const

Optimize this function for minimum size (-Oz).

void applyUpdates(ArrayRef< UpdateT > Updates)

Submit updates to all available trees.

Predicate getUnsignedPredicate() const

For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.

Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")

LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)

Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

BasicBlock * GetInsertBlock() const

void SetCurrentDebugLocation(DebugLoc L)

Set location information used by debugging information.

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

InstTy * Insert(InstTy *I, const Twine &Name="") const

Insert and return the specified instruction.

Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

const Function * getFunction() const

Return the function this instruction belongs to.

void setSuccessor(unsigned Idx, BasicBlock *BB)

Update the specified successor to point at the provided block.

Class to represent integer types.

static IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.

static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)

Helper for client passes to set up the analysis usage on behalf of this pass.

An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

void preserve()

Mark an analysis as preserved.

An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.

Analysis providing profile information.

bool hasProfileSummary() const

Returns true if profile summary is available.

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Analysis pass providing the TargetTransformInfo.

Analysis pass providing the TargetLibraryInfo.

Provides information about what library functions are available for the current target.

bool getLibFunc(StringRef funcName, LibFunc &F) const

Searches for a particular function name.

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

virtual const TargetLowering * getTargetLowering() const

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const

The instances of the Type class are immutable: once they are created, they are never changed.

unsigned getIntegerBitWidth() const

static IntegerType * getInt8Ty(LLVMContext &C)

static IntegerType * getInt32Ty(LLVMContext &C)

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUser() const

Return true if there is exactly one user of this value.

void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

Align getPointerAlignment(const DataLayout &DL) const

Returns an alignment of the pointer value.

LLVMContext & getContext() const

All values hold a context through their type.

const ParentTy * getParent() const

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

bool match(Val *V, const Pattern &P)

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

initializer< Ty > init(const Ty &Val)

NodeAddr< FuncNode * > Func

This is an optimization pass for GlobalISel generic memory operations.

auto find(R &&Range, const T &Val)

Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.

bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)

bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)

Returns true if machine function MF is suggested to be size-optimized based on the profile.

bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)

Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

FunctionPass * createExpandMemCmpLegacyPass()

@ Or

Bitwise or logical OR of integers.

BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)

Split the specified block at the specified instruction.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)

Return the value that a load from C with offset Offset would produce if it is constant and determinab...

void initializeExpandMemCmpLegacyPassPass(PassRegistry &)

This struct is a compact representation of a valid (non-zero power of two) alignment.

Returns options for expansion of memcmp. IsZeroCmp is.