LLVM: lib/Transforms/Scalar/SROA.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

47#include "llvm/Config/llvm-config.h"

88#include

89#include

90#include

91#include

92#include

93#include

94#include

95#include

96#include

97#include

98#include

99#include

100

101using namespace llvm;

102

103#define DEBUG_TYPE "sroa"

104

105STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");

106STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");

107STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");

108STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");

109STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");

110STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");

111STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");

112STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");

114 "Number of loads rewritten into predicated loads to allow promotion");

116 NumStoresPredicated,

117 "Number of stores rewritten into predicated loads to allow promotion");

118STATISTIC(NumDeleted, "Number of instructions deleted");

119STATISTIC(NumVectorized, "Number of vectorized aggregates");

120

121namespace llvm {

122

126}

127

128namespace {

129

130class AllocaSliceRewriter;

131class AllocaSlices;

132class Partition;

133

134class SelectHandSpeculativity {

135 unsigned char Storage = 0;

138public:

139 SelectHandSpeculativity() = default;

140 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);

141 bool isSpeculatable(bool isTrueVal) const;

142 bool areAllSpeculatable() const;

143 bool areAnySpeculatable() const;

144 bool areNoneSpeculatable() const;

145

146 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }

147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}

148};

149static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));

150

151using PossiblySpeculatableLoad =

153using UnspeculatableStore = StoreInst *;

154using RewriteableMemOp =

155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176class SROA {

177 LLVMContext *const C;

178 DomTreeUpdater *const DTU;

179 AssumptionCache *const AC;

180 const bool PreserveCFG;

181

182

183

184

185

186

187

188

189 SmallSetVector<AllocaInst *, 16> Worklist;

190

191

192

193

195

196

197

198

199

200

201

202

203

204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;

205

206

207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,

208 SmallPtrSet<AllocaInst *, 16>, 16>

209 PromotableAllocas;

210

211

212

213

214

215

216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;

217

218

219

220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238 static std::optional

239 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);

240

241public:

242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,

244 : C(C), DTU(DTU), AC(AC),

245 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}

246

247

248 std::pair<bool , bool > runSROA(Function &F);

249

250private:

251 friend class AllocaSliceRewriter;

252

253 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);

254 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);

255 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);

256 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);

257 std::pair<bool , bool > runOnAlloca(AllocaInst &AI);

258 void clobberUse(Use &U);

259 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);

260 bool promoteAllocas();

261};

262

263}

264

265

266

267

268

269

270

271

272

273namespace {

274enum FragCalcResult { UseFrag, UseNoFrag, Skip };

275}

276static FragCalcResult

278 uint64_t NewStorageSliceOffsetInBits,

279 uint64_t NewStorageSliceSizeInBits,

280 std::optionalDIExpression::FragmentInfo StorageFragment,

281 std::optionalDIExpression::FragmentInfo CurrentFragment,

283

284

285 if (StorageFragment) {

287 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);

288 Target.OffsetInBits =

289 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;

290 } else {

291 Target.SizeInBits = NewStorageSliceSizeInBits;

292 Target.OffsetInBits = NewStorageSliceOffsetInBits;

293 }

294

295

296

297

298 if (!CurrentFragment) {

299 if (auto Size = Variable->getSizeInBits()) {

300

302 if (Target == CurrentFragment)

303 return UseNoFrag;

304 }

305 }

306

307

308

309 if (!CurrentFragment || *CurrentFragment == Target)

310 return UseFrag;

311

312

313

314

315 if (Target.startInBits() < CurrentFragment->startInBits() ||

316 Target.endInBits() > CurrentFragment->endInBits())

317 return Skip;

318

319

320 return UseFrag;

321}

322

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

343 uint64_t OldAllocaOffsetInBits,

347

348

349

350

352

354

355 if (DVRAssignMarkerRange.empty())

356 return;

357

359 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");

360 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");

361 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits

362 << "\n");

363 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");

364 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");

369

370

372 BaseFragments;

375 DVR->getExpression()->getFragmentInfo();

376

377

378

384

386 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign

387 << "\n");

388 auto *Expr = DbgAssign->getExpression();

389 bool SetKillLocation = false;

390

391 if (IsSplit) {

392 std::optionalDIExpression::FragmentInfo BaseFragment;

393 {

395 if (R == BaseFragments.end())

396 return;

397 BaseFragment = R->second;

398 }

399 std::optionalDIExpression::FragmentInfo CurrentFragment =

400 Expr->getFragmentInfo();

403 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,

404 BaseFragment, CurrentFragment, NewFragment);

405

406 if (Result == Skip)

407 return;

408 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {

409 if (CurrentFragment) {

410

411

412

413

414 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;

415 }

416

419 Expr = *E;

420 } else {

421

422

423

427 SetKillLocation = true;

428 }

429 }

430 }

431

432

433 if (!NewID) {

435 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);

436 }

437

439 if (IsSplit) {

442 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,

444 DbgAssign->getDebugLoc())));

445 } else {

446

447 NewAssign = DbgAssign;

448 NewAssign->setAssignId(NewID);

453 }

454

455

456

457

458

459

460

461

462

463

464

465 SetKillLocation |=

466 Value && (DbgAssign->hasArgList() ||

467 !DbgAssign->getExpression()->isSingleLocationExpression());

468 if (SetKillLocation)

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484 if (NewAssign != DbgAssign) {

485 NewAssign->moveBefore(DbgAssign->getIterator());

486 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());

487 }

488 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");

489 };

490

491 for_each(DVRAssignMarkerRange, MigrateDbgAssign);

492}

493

494namespace {

495

496

497

499 std::string Prefix;

500

501 Twine getNameWithPrefix(const Twine &Name) const {

502 return Name.isTriviallyEmpty() ? Name : Prefix + Name;

503 }

504

505public:

506 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }

507

508 void InsertHelper(Instruction *I, const Twine &Name,

511 InsertPt);

512 }

513};

514

515

517

518

519

520

521

522

523

524class Slice {

525

526 uint64_t BeginOffset = 0;

527

528

529 uint64_t EndOffset = 0;

530

531

532

533 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;

534

535public:

536 Slice() = default;

537

538 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)

539 : BeginOffset(BeginOffset), EndOffset(EndOffset),

540 UseAndIsSplittable(U, IsSplittable) {}

541

542 uint64_t beginOffset() const { return BeginOffset; }

543 uint64_t endOffset() const { return EndOffset; }

544

545 bool isSplittable() const { return UseAndIsSplittable.getInt(); }

546 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }

547

548 Use *getUse() const { return UseAndIsSplittable.getPointer(); }

549

550 bool isDead() const { return getUse() == nullptr; }

551 void kill() { UseAndIsSplittable.setPointer(nullptr); }

552

553

554

555

556

557

558

560 if (beginOffset() < RHS.beginOffset())

561 return true;

562 if (beginOffset() > RHS.beginOffset())

563 return false;

564 if (isSplittable() != RHS.isSplittable())

565 return !isSplittable();

566 if (endOffset() > RHS.endOffset())

567 return true;

568 return false;

569 }

570

571

572 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {

573 return LHS.beginOffset() < RHSOffset;

574 }

575 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {

576 return LHSOffset < RHS.beginOffset();

577 }

578

580 return isSplittable() == RHS.isSplittable() &&

581 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();

582 }

584};

585

586

587

588

589

590

591

592

593class AllocaSlices {

594public:

595

596 AllocaSlices(const DataLayout &DL, AllocaInst &AI);

597

598

599

600

601

602 bool isEscaped() const { return PointerEscapingInstr; }

603 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }

604

605

606

608 using range = iterator_range;

609

610 iterator begin() { return Slices.begin(); }

611 iterator end() { return Slices.end(); }

612

614 using const_range = iterator_range<const_iterator>;

615

616 const_iterator begin() const { return Slices.begin(); }

617 const_iterator end() const { return Slices.end(); }

618

619

620

621 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }

622

623

624

625

626

627

629 int OldSize = Slices.size();

630 Slices.append(NewSlices.begin(), NewSlices.end());

631 auto SliceI = Slices.begin() + OldSize;

632 std::stable_sort(SliceI, Slices.end());

633 std::inplace_merge(Slices.begin(), SliceI, Slices.end());

634 }

635

636

637

640

641

643

644

646 return DeadUseIfPromotable;

647 }

648

649

650

651

652

653

654

655 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }

656

657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

658 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;

659 void printSlice(raw_ostream &OS, const_iterator I,

660 StringRef Indent = " ") const;

661 void printUse(raw_ostream &OS, const_iterator I,

662 StringRef Indent = " ") const;

663 void print(raw_ostream &OS) const;

664 void dump(const_iterator I) const;

665 void dump() const;

666#endif

667

668private:

669 template <typename DerivedT, typename RetT = void> class BuilderBase;

671

672 friend class AllocaSlices::SliceBuilder;

673

674#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

675

676 AllocaInst &AI;

677#endif

678

679

680

681

682

683

684

686 Instruction *PointerEscapingInstrReadOnly;

687

688

689

690

691

692

693

695

696

697

698

699

700

701

702 SmallVector<Instruction *, 8> DeadUsers;

703

704

706

707

708

709

710

711

712

713

714

716};

717

718

719

720

721

722

723

724

725

726

727class Partition {

728private:

729 friend class AllocaSlices;

730 friend class AllocaSlices::partition_iterator;

731

732 using iterator = AllocaSlices::iterator;

733

734

735

736 uint64_t BeginOffset = 0, EndOffset = 0;

737

738

739 iterator SI, SJ;

740

741

743

744

745

746 Partition(iterator SI) : SI(SI), SJ(SI) {}

747

748public:

749

750

751

752 uint64_t beginOffset() const { return BeginOffset; }

753

754

755

756

757 uint64_t endOffset() const { return EndOffset; }

758

759

760

761

762 uint64_t size() const {

763 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");

764 return EndOffset - BeginOffset;

765 }

766

767

768

769 bool empty() const { return SI == SJ; }

770

771

772

773

774

775

776

777

778

779

780 iterator begin() const { return SI; }

781 iterator end() const { return SJ; }

782

783

784

785

786

787

788

790};

791

792}

793

794

795

796

797

798

799

800

801

802

805 Partition> {

807

808

809

810 Partition P;

811

812

813 AllocaSlices::iterator SE;

814

815

816

817 uint64_t MaxSplitSliceEndOffset = 0;

818

819

820

821 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)

822 : P(SI), SE(SE) {

823

824

825 if (SI != SE)

826 advance();

827 }

828

829

830

831

832 void advance() {

833 assert((P.SI != SE || P.SplitTails.empty()) &&

834 "Cannot advance past the end of the slices!");

835

836

837 if (P.SplitTails.empty()) {

838 if (P.EndOffset >= MaxSplitSliceEndOffset) {

839

840 P.SplitTails.clear();

841 MaxSplitSliceEndOffset = 0;

842 } else {

843

844

845

847 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });

849 [&](Slice *S) {

850 return S->endOffset() == MaxSplitSliceEndOffset;

851 }) &&

852 "Could not find the current max split slice offset!");

854 [&](Slice *S) {

855 return S->endOffset() <= MaxSplitSliceEndOffset;

856 }) &&

857 "Max split slice end offset is not actually the max!");

858 }

859 }

860

861

862

863 if (P.SI == SE) {

864 assert(P.SplitTails.empty() && "Failed to clear the split slices!");

865 return;

866 }

867

868

869

870 if (P.SI != P.SJ) {

871

872

873 for (Slice &S : P)

874 if (S.isSplittable() && S.endOffset() > P.EndOffset) {

875 P.SplitTails.push_back(&S);

876 MaxSplitSliceEndOffset =

877 std::max(S.endOffset(), MaxSplitSliceEndOffset);

878 }

879

880

881 P.SI = P.SJ;

882

883

884 if (P.SI == SE) {

885 P.BeginOffset = P.EndOffset;

886 P.EndOffset = MaxSplitSliceEndOffset;

887 return;

888 }

889

890

891

892

893 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&

894 !P.SI->isSplittable()) {

895 P.BeginOffset = P.EndOffset;

896 P.EndOffset = P.SI->beginOffset();

897 return;

898 }

899 }

900

901

902

903

904

905

906 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;

907 P.EndOffset = P.SI->endOffset();

908 ++P.SJ;

909

910

911

912 if (!P.SI->isSplittable()) {

913

914

915 assert(P.BeginOffset == P.SI->beginOffset());

916

917

918

919 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {

920 if (!P.SJ->isSplittable())

921 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());

922 ++P.SJ;

923 }

924

925

926

927 return;

928 }

929

930

931

932

933 assert(P.SI->isSplittable() && "Forming a splittable partition!");

934

935

936 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&

937 P.SJ->isSplittable()) {

938 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());

939 ++P.SJ;

940 }

941

942

943

944

945 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {

946 assert(!P.SJ->isSplittable());

947 P.EndOffset = P.SJ->beginOffset();

948 }

949 }

950

951public:

952 bool operator==(const partition_iterator &RHS) const {

953 assert(SE == RHS.SE &&

954 "End iterators don't match between compared partition iterators!");

955

956

957

958

959

960

961 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {

962 assert(P.SJ == RHS.P.SJ &&

963 "Same set of slices formed two different sized partitions!");

964 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&

965 "Same slice position with differently sized non-empty split "

966 "slice tails!");

967 return true;

968 }

969 return false;

970 }

971

973 advance();

974 return *this;

975 }

976

978};

979

980

981

982

983

984

985

986

988 return make_range(partition_iterator(begin(), end()),

989 partition_iterator(end(), end()));

990}

991

993

994

995

997 return SI.getOperand(1 + CI->isZero());

998 if (SI.getOperand(1) == SI.getOperand(2))

999 return SI.getOperand(1);

1000

1001 return nullptr;

1002}

1003

1004

1007

1008 return PN->hasConstantValue();

1009 }

1011}

1012

1013

1014

1015

1016

1020

1022

1024 AllocaSlices &AS;

1025

1028

1029

1031

1032public:

1035 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),

1036 AS(AS) {}

1037

1038private:

1040 if (VisitedDeadInsts.insert(&I).second)

1042 }

1043

1045 bool IsSplittable = false) {

1046

1047

1048 if (Size == 0 || Offset.uge(AllocSize)) {

1051 << " which has zero size or starts outside of the "

1052 << AllocSize << " byte alloca:\n"

1053 << " alloca: " << AS.AI << "\n"

1054 << " use: " << I << "\n");

1055 return markAsDead(I);

1056 }

1057

1058 uint64_t BeginOffset = Offset.getZExtValue();

1059 uint64_t EndOffset = BeginOffset + Size;

1060

1061

1062

1063

1064

1065

1066

1067 assert(AllocSize >= BeginOffset);

1068 if (Size > AllocSize - BeginOffset) {

1069 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"

1070 << Offset << " to remain within the " << AllocSize

1071 << " byte alloca:\n"

1072 << " alloca: " << AS.AI << "\n"

1073 << " use: " << I << "\n");

1074 EndOffset = AllocSize;

1075 }

1076

1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));

1078 }

1079

1080 void visitBitCastInst(BitCastInst &BC) {

1082 return markAsDead(BC);

1083

1084 return Base::visitBitCastInst(BC);

1085 }

1086

1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {

1089 return markAsDead(ASC);

1090

1091 return Base::visitAddrSpaceCastInst(ASC);

1092 }

1093

1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {

1096 return markAsDead(GEPI);

1097

1098 return Base::visitGetElementPtrInst(GEPI);

1099 }

1100

1101 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,

1102 uint64_t Size, bool IsVolatile) {

1103

1104

1105

1106 bool IsSplittable =

1108

1109 insertUse(I, Offset, Size, IsSplittable);

1110 }

1111

1112 void visitLoadInst(LoadInst &LI) {

1114 "All simple FCA loads should have been pre-split");

1115

1116

1117

1118 if (!IsOffsetKnown)

1119 return PI.setEscapedReadOnly(&LI);

1120

1121 TypeSize Size = DL.getTypeStoreSize(LI.getType());

1122 if (Size.isScalable()) {

1124 if (!VScale)

1125 return PI.setAborted(&LI);

1126

1128 }

1129

1130 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),

1132 }

1133

1134 void visitStoreInst(StoreInst &SI) {

1135 Value *ValOp = SI.getValueOperand();

1136 if (ValOp == *U)

1137 return PI.setEscapedAndAborted(&SI);

1138 if (!IsOffsetKnown)

1139 return PI.setAborted(&SI);

1140

1141 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());

1143 unsigned VScale = SI.getFunction()->getVScaleValue();

1144 if (!VScale)

1145 return PI.setAborted(&SI);

1146

1148 }

1149

1151

1152

1153

1154

1155

1156

1157

1158

1159 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {

1160 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"

1161 << Offset << " which extends past the end of the "

1162 << AllocSize << " byte alloca:\n"

1163 << " alloca: " << AS.AI << "\n"

1164 << " use: " << SI << "\n");

1165 return markAsDead(SI);

1166 }

1167

1169 "All simple FCA stores should have been pre-split");

1171 }

1172

1173 void visitMemSetInst(MemSetInst &II) {

1174 assert(II.getRawDest() == *U && "Pointer use is not the destination?");

1177 (IsOffsetKnown && Offset.uge(AllocSize)))

1178

1179 return markAsDead(II);

1180

1181 if (!IsOffsetKnown)

1182 return PI.setAborted(&II);

1183

1186 : AllocSize - Offset.getLimitedValue(),

1188 }

1189

1190 void visitMemTransferInst(MemTransferInst &II) {

1193

1194 return markAsDead(II);

1195

1196

1197

1198 if (VisitedDeadInsts.count(&II))

1199 return;

1200

1201 if (!IsOffsetKnown)

1202 return PI.setAborted(&II);

1203

1204

1205

1206

1207

1208

1209 if (Offset.uge(AllocSize)) {

1210 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =

1211 MemTransferSliceMap.find(&II);

1212 if (MTPI != MemTransferSliceMap.end())

1213 AS.Slices[MTPI->second].kill();

1214 return markAsDead(II);

1215 }

1216

1217 uint64_t RawOffset = Offset.getLimitedValue();

1218 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;

1219

1220

1221

1222 if (*U == II.getRawDest() && *U == II.getRawSource()) {

1223

1224 if (II.isVolatile())

1225 return markAsDead(II);

1226

1227 return insertUse(II, Offset, Size, false);

1228 }

1229

1230

1231

1233 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;

1234 std::tie(MTPI, Inserted) =

1235 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));

1236 unsigned PrevIdx = MTPI->second;

1237 if (!Inserted) {

1238 Slice &PrevP = AS.Slices[PrevIdx];

1239

1240

1241

1242 if (II.isVolatile() && PrevP.beginOffset() == RawOffset) {

1243 PrevP.kill();

1244 return markAsDead(II);

1245 }

1246

1247

1248

1249 PrevP.makeUnsplittable();

1250 }

1251

1252

1254

1255

1256 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&

1257 "Map index doesn't point back to a slice with this user.");

1258 }

1259

1260

1261

1262

1263 void visitIntrinsicInst(IntrinsicInst &II) {

1264 if (II.isDroppable()) {

1265 AS.DeadUseIfPromotable.push_back(U);

1266 return;

1267 }

1268

1269 if (!IsOffsetKnown)

1270 return PI.setAborted(&II);

1271

1272 if (II.isLifetimeStartOrEnd()) {

1273 insertUse(II, Offset, AllocSize, true);

1274 return;

1275 }

1276

1277 Base::visitIntrinsicInst(II);

1278 }

1279

1280 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {

1281

1282

1283

1284

1285 SmallPtrSet<Instruction *, 4> Visited;

1287 Visited.insert(Root);

1290

1291

1293 do {

1295 std::tie(UsedI, I) = Uses.pop_back_val();

1296

1298 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());

1300 PI.setAborted(LI);

1301 return nullptr;

1302 }

1304 continue;

1305 }

1308 if (Op == UsedI)

1309 return SI;

1310 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());

1312 PI.setAborted(SI);

1313 return nullptr;

1314 }

1316 continue;

1317 }

1318

1320 if (GEP->hasAllZeroIndices())

1321 return GEP;

1324 return I;

1325 }

1326

1327 for (User *U : I->users())

1330 } while (Uses.empty());

1331

1332 return nullptr;

1333 }

1334

1335 void visitPHINodeOrSelectInst(Instruction &I) {

1337 if (I.use_empty())

1338 return markAsDead(I);

1339

1340

1341

1342

1344 I.getParent()->getFirstInsertionPt() == I.getParent()->end())

1345 return PI.setAborted(&I);

1346

1347

1348

1349

1350

1351

1352

1353

1354

1356 if (Result == *U)

1357

1358

1359 enqueueUsers(I);

1360 else

1361

1362

1363 AS.DeadOperands.push_back(U);

1364

1365 return;

1366 }

1367

1368 if (!IsOffsetKnown)

1369 return PI.setAborted(&I);

1370

1371

1372 uint64_t &Size = PHIOrSelectSizes[&I];

1373 if (Size) {

1374

1375 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))

1376 return PI.setAborted(UnsafeI);

1377 }

1378

1379

1380

1381

1382

1383

1384

1385 if (Offset.uge(AllocSize)) {

1386 AS.DeadOperands.push_back(U);

1387 return;

1388 }

1389

1391 }

1392

1393 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }

1394

1395 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }

1396

1397

1398 void visitInstruction(Instruction &I) { PI.setAborted(&I); }

1399

1400 void visitCallBase(CallBase &CB) {

1401

1402

1406 PI.setEscapedReadOnly(&CB);

1407 return;

1408 }

1409

1410 Base::visitCallBase(CB);

1411 }

1412};

1413

1414AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)

1415 :

1416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1417 AI(AI),

1418#endif

1419 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {

1421 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);

1422 if (PtrI.isEscaped() || PtrI.isAborted()) {

1423

1424

1425 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()

1426 : PtrI.getAbortingInst();

1427 assert(PointerEscapingInstr && "Did not track a bad instruction");

1428 return;

1429 }

1430 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();

1431

1432 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });

1433

1434

1435

1437}

1438

1439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1440

1441void AllocaSlices::print(raw_ostream &OS, const_iterator I,

1442 StringRef Indent) const {

1443 printSlice(OS, I, Indent);

1444 OS << "\n";

1445 printUse(OS, I, Indent);

1446}

1447

1448void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,

1449 StringRef Indent) const {

1450 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"

1451 << " slice #" << (I - begin())

1452 << (I->isSplittable() ? " (splittable)" : "");

1453}

1454

1455void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,

1456 StringRef Indent) const {

1457 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";

1458}

1459

1460void AllocaSlices::print(raw_ostream &OS) const {

1461 if (PointerEscapingInstr) {

1462 OS << "Can't analyze slices for alloca: " << AI << "\n"

1463 << " A pointer to this alloca escaped by:\n"

1464 << " " << *PointerEscapingInstr << "\n";

1465 return;

1466 }

1467

1468 if (PointerEscapingInstrReadOnly)

1469 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";

1470

1471 OS << "Slices of alloca: " << AI << "\n";

1472 for (const_iterator I = begin(), E = end(); I != E; ++I)

1474}

1475

1476LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {

1478}

1480

1481#endif

1482

1483

1484

1485static std::pair<Type *, IntegerType *>

1486findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,

1488 Type *Ty = nullptr;

1489 bool TyIsCommon = true;

1491

1492

1493

1494 for (AllocaSlices::const_iterator I = B; I != E; ++I) {

1495 Use *U = I->getUse();

1497 continue;

1498 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)

1499 continue;

1500

1501 Type *UserTy = nullptr;

1505 UserTy = SI->getValueOperand()->getType();

1506 }

1507

1509

1510

1511

1512

1513 if (UserITy->getBitWidth() % 8 != 0 ||

1514 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))

1515 continue;

1516

1517

1518

1519 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())

1520 ITy = UserITy;

1521 }

1522

1523

1524

1525 if (!UserTy || (Ty && Ty != UserTy))

1526 TyIsCommon = false;

1527 else

1528 Ty = UserTy;

1529 }

1530

1531 return {TyIsCommon ? Ty : nullptr, ITy};

1532}

1533

1534

1535

1536

1537

1538

1539

1540

1541

1542

1543

1544

1545

1546

1547

1548

1549

1550

1551

1554

1555

1556

1557

1558

1562 Type *LoadType = nullptr;

1566 return false;

1567

1568

1569

1570

1572 return false;

1573

1574 if (LoadType) {

1575 if (LoadType != LI->getType())

1576 return false;

1577 } else {

1578 LoadType = LI->getType();

1579 }

1580

1581

1582

1584 if (BBI->mayWriteToMemory())

1585 return false;

1586

1587 MaxAlign = std::max(MaxAlign, LI->getAlign());

1588 }

1589

1590 if (!LoadType)

1591 return false;

1592

1593 APInt LoadSize =

1594 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());

1595

1596

1597

1598

1599 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {

1602

1603

1604

1605

1607 return false;

1608

1609

1610

1612 continue;

1613

1614

1615

1616

1618 continue;

1619

1620 return false;

1621 }

1622

1623 return true;

1624}

1625

1628

1631 IRB.SetInsertPoint(&PN);

1633 PN.getName() + ".sroa.speculated");

1634

1635

1636

1639

1640

1645 }

1646

1647

1649 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {

1652

1653

1654

1655

1656

1657 if (Value *V = InjectedLoads.lookup(Pred)) {

1659 continue;

1660 }

1661

1662 Instruction *TI = Pred->getTerminator();

1663 IRB.SetInsertPoint(TI);

1664

1665 LoadInst *Load = IRB.CreateAlignedLoad(

1666 LoadTy, InVal, Alignment,

1667 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));

1668 ++NumLoadsSpeculated;

1669 if (AATags)

1670 Load->setAAMetadata(AATags);

1672 InjectedLoads[Pred] = Load;

1673 }

1674

1675 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");

1677}

1678

1679SelectHandSpeculativity &

1680SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {

1681 if (isTrueVal)

1683 else

1685 return *this;

1686}

1687

1688bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {

1691}

1692

1693bool SelectHandSpeculativity::areAllSpeculatable() const {

1694 return isSpeculatable(true) &&

1695 isSpeculatable(false);

1696}

1697

1698bool SelectHandSpeculativity::areAnySpeculatable() const {

1699 return isSpeculatable(true) ||

1700 isSpeculatable(false);

1701}

1702bool SelectHandSpeculativity::areNoneSpeculatable() const {

1703 return !areAnySpeculatable();

1704}

1705

1706static SelectHandSpeculativity

1709 SelectHandSpeculativity Spec;

1710

1712 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})

1714 &LI))

1715 Spec.setAsSpeculatable(Value == SI.getTrueValue());

1717 return Spec;

1718

1719 return Spec;

1720}

1721

1722std::optional

1723SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {

1724 RewriteableMemOps Ops;

1725

1726 for (User *U : SI.users()) {

1729

1731

1732

1733

1735 return {};

1736 Ops.emplace_back(Store);

1737 continue;

1738 }

1739

1741

1742

1743

1745 return {};

1746

1747 PossiblySpeculatableLoad Load(LI);

1749

1750

1752 return {};

1753 Ops.emplace_back(Load);

1754 continue;

1755 }

1756

1757 SelectHandSpeculativity Spec =

1759 if (PreserveCFG && !Spec.areAllSpeculatable())

1760 return {};

1761

1762 Load.setInt(Spec);

1763 Ops.emplace_back(Load);

1764 }

1765

1766 return Ops;

1767}

1768

1770 IRBuilderTy &IRB) {

1772

1773 Value *TV = SI.getTrueValue();

1774 Value *FV = SI.getFalseValue();

1775

1776

1777 assert(LI.isSimple() && "We only speculate simple loads");

1778

1779 IRB.SetInsertPoint(&LI);

1780

1783 LI.getName() + ".sroa.speculate.load.true");

1786 LI.getName() + ".sroa.speculate.load.false");

1787 NumLoadsSpeculated += 2;

1788

1789

1792

1794 if (Tags) {

1797 }

1798

1799 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,

1800 LI.getName() + ".sroa.speculated",

1802

1803 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");

1805}

1806

1807template

1809 SelectHandSpeculativity Spec,

1812 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");

1816 if (Spec.areNoneSpeculatable())

1818 SI.getMetadata(LLVMContext::MD_prof), &DTU);

1819 else {

1821 SI.getMetadata(LLVMContext::MD_prof), &DTU,

1822 nullptr, nullptr);

1823 if (Spec.isSpeculatable(true))

1825 }

1827 Spec = {};

1829 Tail->setName(Head->getName() + ".cont");

1834 bool IsThen = SuccBB == HeadBI->getSuccessor(0);

1835 int SuccIdx = IsThen ? 0 : 1;

1836 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;

1837 auto &CondMemOp = cast(*I.clone());

1838 if (NewMemOpBB != Head) {

1839 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));

1841 ++NumLoadsPredicated;

1842 else

1843 ++NumStoresPredicated;

1844 } else {

1845 CondMemOp.dropUBImplyingAttrsAndMetadata();

1846 ++NumLoadsSpeculated;

1847 }

1848 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());

1849 Value *Ptr = SI.getOperand(1 + SuccIdx);

1850 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);

1852 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");

1853 PN->addIncoming(&CondMemOp, NewMemOpBB);

1854 } else

1856 }

1860 I.replaceAllUsesWith(PN);

1861 }

1862}

1863

1865 SelectHandSpeculativity Spec,

1871 else

1873}

1874

1876 const RewriteableMemOps &Ops,

1878 bool CFGChanged = false;

1880

1881 for (const RewriteableMemOp &Op : Ops) {

1882 SelectHandSpeculativity Spec;

1884 if (auto *const *US = std::get_if(&Op)) {

1885 I = *US;

1886 } else {

1887 auto PSL = std::get(Op);

1888 I = PSL.getPointer();

1889 Spec = PSL.getInt();

1890 }

1891 if (Spec.areAllSpeculatable()) {

1893 } else {

1894 assert(DTU && "Should not get here when not allowed to modify the CFG!");

1896 CFGChanged = true;

1897 }

1898 I->eraseFromParent();

1899 }

1900

1903 SI.eraseFromParent();

1904 return CFGChanged;

1905}

1906

1907

1908

1911 const Twine &NamePrefix) {

1913 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),

1914 NamePrefix + "sroa_idx");

1915 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,

1916 NamePrefix + "sroa_cast");

1917}

1918

1919

1923

1924

1925

1926

1927

1928

1929

1931 unsigned VScale = 0) {

1932 if (OldTy == NewTy)

1933 return true;

1934

1935

1936

1937

1941 "We can't have the same bitwidth for different int types");

1942 return false;

1943 }

1944

1945 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);

1946 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);

1947

1950

1951 if (!VScale)

1952 return false;

1953

1954

1955

1956

1957 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;

1958 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;

1959

1962 return false;

1963

1965 } else {

1967 return false;

1968

1970 }

1971 }

1972

1973 if (NewSize != OldSize)

1974 return false;

1976 return false;

1977

1978

1979

1986

1987

1988

1989 return OldAS == NewAS ||

1990 (DL.isNonIntegralAddressSpace(OldAS) &&

1991 DL.isNonIntegralAddressSpace(NewAS) &&

1992 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));

1993 }

1994

1995

1996

1998 return DL.isNonIntegralPointerType(NewTy);

1999

2000

2001

2002 if (DL.isNonIntegralPointerType(OldTy))

2004

2005 return false;

2006 }

2007

2009 return false;

2010

2011 return true;

2012}

2013

2014

2015

2016

2017

2018

2019

2021 Type *NewTy) {

2022 Type *OldTy = V->getType();

2023

2024#ifndef NDEBUG

2025 BasicBlock *BB = IRB.GetInsertBlock();

2029 "Value not convertable to type");

2030#endif

2031

2032 if (OldTy == NewTy)

2033 return V;

2034

2036 "Integer types must be the exact same to convert.");

2037

2038

2039

2040 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {

2041 Type *InTy = In->getType();

2042 if (InTy == Ty)

2043 return In;

2044

2046

2047

2049 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,

2051 IRB.getInt64(0)),

2052 Ty);

2053 }

2054

2056

2057

2059 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),

2060 IRB.getInt64(0));

2061 }

2062

2063 return IRB.CreateBitCast(In, Ty);

2064 };

2065

2066

2068

2069

2070

2071

2072 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),

2073 NewTy);

2074 }

2075

2076

2078

2079

2080

2081

2082 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),

2083 NewTy);

2084 }

2085

2089

2090

2091

2092

2093

2094

2095 if (OldAS != NewAS) {

2096 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));

2097 return IRB.CreateIntToPtr(

2098 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),

2099 DL.getIntPtrType(NewTy)),

2100 NewTy);

2101 }

2102 }

2103

2104 return CreateBitCastLike(V, NewTy);

2105}

2106

2107

2108

2109

2110

2115 unsigned VScale) {

2116

2118 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();

2119 uint64_t BeginIndex = BeginOffset / ElementSize;

2120 if (BeginIndex * ElementSize != BeginOffset ||

2122 return false;

2123 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();

2124 uint64_t EndIndex = EndOffset / ElementSize;

2125 if (EndIndex * ElementSize != EndOffset ||

2127 return false;

2128

2129 assert(EndIndex > BeginIndex && "Empty vector!");

2130 uint64_t NumElements = EndIndex - BeginIndex;

2131 Type *SliceTy = (NumElements == 1)

2132 ? Ty->getElementType()

2134

2135 Type *SplitIntTy =

2136 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);

2137

2138 Use *U = S.getUse();

2139

2141 if (MI->isVolatile())

2142 return false;

2143 if (!S.isSplittable())

2144 return false;

2146 if (II->isLifetimeStartOrEnd() && II->isDroppable())

2147 return false;

2150 return false;

2152

2153 if (LTy->isStructTy())

2154 return false;

2155 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {

2156 assert(LTy->isIntegerTy());

2157 LTy = SplitIntTy;

2158 }

2160 return false;

2162 if (SI->isVolatile())

2163 return false;

2164 Type *STy = SI->getValueOperand()->getType();

2165

2167 return false;

2168 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {

2170 STy = SplitIntTy;

2171 }

2173 return false;

2174 } else {

2175 return false;

2176 }

2177

2178 return true;

2179}

2180

2181

2182

2183

2184

2188 bool HaveCommonEltTy, Type *CommonEltTy,

2189 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,

2190 VectorType *CommonVecPtrTy, unsigned VScale) {

2191

2192 if (CandidateTys.empty())

2193 return nullptr;

2194

2195

2196

2197

2198

2199 if (HaveVecPtrTy && !HaveCommonVecPtrTy)

2200 return nullptr;

2201

2202

2203 if (!HaveCommonEltTy && HaveVecPtrTy) {

2204

2205 CandidateTys.clear();

2206 CandidateTys.push_back(CommonVecPtrTy);

2207 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {

2208

2209 for (VectorType *&VTy : CandidateTys) {

2210 if (!VTy->getElementType()->isIntegerTy())

2212 VTy->getContext(), VTy->getScalarSizeInBits())));

2213 }

2214

2215

2216

2218 (void)DL;

2219 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==

2220 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&

2221 "Cannot have vector types of different sizes!");

2222 assert(RHSTy->getElementType()->isIntegerTy() &&

2223 "All non-integer types eliminated!");

2224 assert(LHSTy->getElementType()->isIntegerTy() &&

2225 "All non-integer types eliminated!");

2228 };

2230 (void)DL;

2231 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==

2232 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&

2233 "Cannot have vector types of different sizes!");

2234 assert(RHSTy->getElementType()->isIntegerTy() &&

2235 "All non-integer types eliminated!");

2236 assert(LHSTy->getElementType()->isIntegerTy() &&

2237 "All non-integer types eliminated!");

2240 };

2241 llvm::sort(CandidateTys, RankVectorTypesComp);

2242 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),

2243 CandidateTys.end());

2244 } else {

2245

2246

2247#ifndef NDEBUG

2248 for (VectorType *VTy : CandidateTys) {

2249 assert(VTy->getElementType() == CommonEltTy &&

2250 "Unaccounted for element type!");

2251 assert(VTy == CandidateTys[0] &&

2252 "Different vector types with the same element type!");

2253 }

2254#endif

2255 CandidateTys.resize(1);

2256 }

2257

2258

2259

2262 std::numeric_limits::max();

2263 });

2264

2265

2268 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();

2269

2270

2271

2272 if (ElementSize % 8)

2273 return false;

2274 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&

2275 "vector size not a multiple of element size?");

2276 ElementSize /= 8;

2277

2278 for (const Slice &S : P)

2280 return false;

2281

2282 for (const Slice *S : P.splitSliceTails())

2284 return false;

2285

2286 return true;

2287 });

2288 return VTy != CandidateTys.end() ? *VTy : nullptr;

2289}

2290

2295 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,

2296 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {

2297 [[maybe_unused]] VectorType *OriginalElt =

2298 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;

2299

2300

2301 for (Type *Ty : OtherTys) {

2303 continue;

2304 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();

2305

2306

2307 for (VectorType *const VTy : CandidateTysCopy) {

2308

2309 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");

2310 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();

2311 unsigned ElementSize =

2312 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();

2314 VectorSize % TypeSize == 0) {

2316 CheckCandidateType(NewVTy);

2317 }

2318 }

2319 }

2320

2322 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,

2323 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);

2324}

2325

2326

2327

2328

2329

2330

2331

2332

2333

2334

2336 unsigned VScale) {

2337

2338

2342 Type *CommonEltTy = nullptr;

2343 VectorType *CommonVecPtrTy = nullptr;

2344 bool HaveVecPtrTy = false;

2345 bool HaveCommonEltTy = true;

2346 bool HaveCommonVecPtrTy = true;

2347 auto CheckCandidateType = [&](Type *Ty) {

2349

2350 if (!CandidateTys.empty()) {

2352 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=

2353 DL.getTypeSizeInBits(V).getFixedValue()) {

2354 CandidateTys.clear();

2355 return;

2356 }

2357 }

2359 Type *EltTy = VTy->getElementType();

2360

2361 if (!CommonEltTy)

2362 CommonEltTy = EltTy;

2363 else if (CommonEltTy != EltTy)

2364 HaveCommonEltTy = false;

2365

2367 HaveVecPtrTy = true;

2368 if (!CommonVecPtrTy)

2369 CommonVecPtrTy = VTy;

2370 else if (CommonVecPtrTy != VTy)

2371 HaveCommonVecPtrTy = false;

2372 }

2373 }

2374 };

2375

2376

2377 for (const Slice &S : P) {

2382 Ty = SI->getValueOperand()->getType();

2383 else

2384 continue;

2385

2386 auto CandTy = Ty->getScalarType();

2387 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||

2388 S.endOffset() != P.endOffset())) {

2389 DeferredTys.insert(Ty);

2390 continue;

2391 }

2392

2393 LoadStoreTys.insert(Ty);

2394

2395 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())

2396 CheckCandidateType(Ty);

2397 }

2398

2401 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,

2402 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,

2403 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))

2404 return VTy;

2405

2406 CandidateTys.clear();

2408 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,

2409 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,

2410 CommonVecPtrTy, VScale);

2411}

2412

2413

2414

2415

2416

2419 Type *AllocaTy,

2421 bool &WholeAllocaOp) {

2422 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();

2423

2424 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;

2425 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;

2426

2427 Use *U = S.getUse();

2428

2429

2430

2431

2432

2434 if (II->isLifetimeStartOrEnd() || II->isDroppable())

2435 return true;

2436 }

2437

2438

2439

2440 if (RelEnd > Size)

2441 return false;

2442

2445 return false;

2446

2449 return false;

2450

2451

2452 if (S.beginOffset() < AllocBeginOffset)

2453 return false;

2454

2455

2456

2458 WholeAllocaOp = true;

2460 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())

2461 return false;

2462 } else if (RelBegin != 0 || RelEnd != Size ||

2464

2465

2466 return false;

2467 }

2469 Type *ValueTy = SI->getValueOperand()->getType();

2470 if (SI->isVolatile())

2471 return false;

2472

2473 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);

2475 return false;

2476

2477

2478 if (S.beginOffset() < AllocBeginOffset)

2479 return false;

2480

2481

2482

2484 WholeAllocaOp = true;

2486 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())

2487 return false;

2488 } else if (RelBegin != 0 || RelEnd != Size ||

2490

2491

2492 return false;

2493 }

2496 return false;

2497 if (!S.isSplittable())

2498 return false;

2499 } else {

2500 return false;

2501 }

2502

2503 return true;

2504}

2505

2506

2507

2508

2509

2510

2511

2514 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();

2515

2517 return false;

2518

2519

2520 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())

2521 return false;

2522

2523

2524

2525

2529 return false;

2530

2531

2532

2533

2534

2535

2536

2537

2538 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);

2539

2540 for (const Slice &S : P)

2542 WholeAllocaOp))

2543 return false;

2544

2545 for (const Slice *S : P.splitSliceTails())

2547 WholeAllocaOp))

2548 return false;

2549

2550 return WholeAllocaOp;

2551}

2552

2555 const Twine &Name) {

2558 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=

2559 DL.getTypeStoreSize(IntTy).getFixedValue() &&

2560 "Element extends past full value");

2562 if (DL.isBigEndian())

2563 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -

2564 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);

2565 if (ShAmt) {

2566 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");

2568 }

2569 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&

2570 "Cannot extract to a larger integer!");

2571 if (Ty != IntTy) {

2572 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");

2574 }

2575 return V;

2576}

2577

2582 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&

2583 "Cannot insert a larger integer!");

2585 if (Ty != IntTy) {

2586 V = IRB.CreateZExt(V, IntTy, Name + ".ext");

2588 }

2589 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=

2590 DL.getTypeStoreSize(IntTy).getFixedValue() &&

2591 "Element store outside of alloca store");

2593 if (DL.isBigEndian())

2594 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -

2595 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);

2596 if (ShAmt) {

2597 V = IRB.CreateShl(V, ShAmt, Name + ".shift");

2599 }

2600

2601 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {

2602 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);

2603 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");

2605 V = IRB.CreateOr(Old, V, Name + ".insert");

2607 }

2608 return V;

2609}

2610

2612 unsigned EndIndex, const Twine &Name) {

2614 unsigned NumElements = EndIndex - BeginIndex;

2616

2617 if (NumElements == VecTy->getNumElements())

2618 return V;

2619

2620 if (NumElements == 1) {

2621 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),

2622 Name + ".extract");

2624 return V;

2625 }

2626

2628 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");

2630 return V;

2631}

2632

2634 unsigned BeginIndex, const Twine &Name) {

2636 assert(VecTy && "Can only insert a vector into a vector");

2637

2639 if (!Ty) {

2640

2641 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),

2642 Name + ".insert");

2644 return V;

2645 }

2646

2649 "Too many elements!");

2652 assert(V->getType() == VecTy && "Vector type mismatch");

2653 return V;

2654 }

2656

2657

2658

2659

2660

2664 if (i >= BeginIndex && i < EndIndex)

2665 Mask.push_back(i - BeginIndex);

2666 else

2667 Mask.push_back(-1);

2668 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");

2670

2674 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));

2675

2676

2677 V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,

2679

2681 return V;

2682}

2683

2684

2685

2686

2687

2688

2689

2690

2691

2692

2693

2694

2695

2696

2697

2698

2699

2700

2701

2702

2703

2704

2705

2706

2709

2710

2711

2714

2715

2716

2718 const char *DebugName) {

2719 Type *EltType = VecType->getElementType();

2720 if (EltType != NewAIEltTy) {

2721

2722 unsigned TotalBits =

2723 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);

2724 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);

2725

2727 V = Builder.CreateBitCast(V, NewVecType);

2728 VecType = NewVecType;

2729 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");

2730 }

2731 };

2732

2733 BitcastIfNeeded(V0, VecType0, "V0");

2734 BitcastIfNeeded(V1, VecType1, "V1");

2735

2736 unsigned NumElts0 = VecType0->getNumElements();

2737 unsigned NumElts1 = VecType1->getNumElements();

2738

2740

2741 if (NumElts0 == NumElts1) {

2742 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)

2743 ShuffleMask.push_back(i);

2744 } else {

2745

2746

2747 unsigned SmallSize = std::min(NumElts0, NumElts1);

2748 unsigned LargeSize = std::max(NumElts0, NumElts1);

2749 bool IsV0Smaller = NumElts0 < NumElts1;

2750 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;

2752 for (unsigned i = 0; i < SmallSize; ++i)

2754 for (unsigned i = SmallSize; i < LargeSize; ++i)

2756 ExtendedVec = Builder.CreateShuffleVector(

2758 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");

2759 for (unsigned i = 0; i < NumElts0; ++i)

2760 ShuffleMask.push_back(i);

2761 for (unsigned i = 0; i < NumElts1; ++i)

2762 ShuffleMask.push_back(LargeSize + i);

2763 }

2764

2765 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);

2766}

2767

2768namespace {

2769

2770

2771

2772

2773

2774

2775

2776class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {

2777

2778 friend class InstVisitor<AllocaSliceRewriter, bool>;

2779

2780 using Base = InstVisitor<AllocaSliceRewriter, bool>;

2781

2782 const DataLayout &DL;

2783 AllocaSlices &AS;

2785 AllocaInst &OldAI, &NewAI;

2786 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;

2787 Type *NewAllocaTy;

2788

2789

2790

2791

2792

2793 IntegerType *IntTy;

2794

2795

2796

2797

2798

2799

2800

2801

2802

2803

2805 Type *ElementTy;

2806 uint64_t ElementSize;

2807

2808

2809

2810 uint64_t BeginOffset = 0;

2811 uint64_t EndOffset = 0;

2812

2813

2814

2815 uint64_t NewBeginOffset = 0, NewEndOffset = 0;

2816

2817 uint64_t SliceSize = 0;

2818 bool IsSplittable = false;

2819 bool IsSplit = false;

2820 Use *OldUse = nullptr;

2822

2823

2824 SmallSetVector<PHINode *, 8> &PHIUsers;

2825 SmallSetVector<SelectInst *, 8> &SelectUsers;

2826

2827

2828

2829 IRBuilderTy IRB;

2830

2831

2832

2833 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {

2835 return &NewAI;

2836

2837 Type *AccessTy = IRB.getPtrTy(AddrSpace);

2838 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);

2839 }

2840

2841public:

2842 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,

2843 AllocaInst &OldAI, AllocaInst &NewAI,

2844 uint64_t NewAllocaBeginOffset,

2845 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,

2846 VectorType *PromotableVecTy,

2847 SmallSetVector<PHINode *, 8> &PHIUsers,

2848 SmallSetVector<SelectInst *, 8> &SelectUsers)

2849 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),

2850 NewAllocaBeginOffset(NewAllocaBeginOffset),

2851 NewAllocaEndOffset(NewAllocaEndOffset),

2852 NewAllocaTy(NewAI.getAllocatedType()),

2853 IntTy(

2854 IsIntegerPromotable

2856 DL.getTypeSizeInBits(NewAI.getAllocatedType())

2857 .getFixedValue())

2858 : nullptr),

2859 VecTy(PromotableVecTy),

2860 ElementTy(VecTy ? VecTy->getElementType() : nullptr),

2861 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8

2862 : 0),

2863 PHIUsers(PHIUsers), SelectUsers(SelectUsers),

2864 IRB(NewAI.getContext(), ConstantFolder()) {

2865 if (VecTy) {

2866 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&

2867 "Only multiple-of-8 sized vector elements are viable");

2868 ++NumVectorized;

2869 }

2870 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));

2871 }

2872

2873 bool visit(AllocaSlices::const_iterator I) {

2874 bool CanSROA = true;

2875 BeginOffset = I->beginOffset();

2876 EndOffset = I->endOffset();

2877 IsSplittable = I->isSplittable();

2878 IsSplit =

2879 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;

2880 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));

2883

2884

2885 assert(BeginOffset < NewAllocaEndOffset);

2886 assert(EndOffset > NewAllocaBeginOffset);

2887 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);

2888 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);

2889

2890 SliceSize = NewEndOffset - NewBeginOffset;

2891 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset

2892 << ") NewBegin:(" << NewBeginOffset << ", "

2893 << NewEndOffset << ") NewAllocaBegin:("

2894 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset

2895 << ")\n");

2896 assert(IsSplit || NewBeginOffset == BeginOffset);

2897 OldUse = I->getUse();

2899

2901 IRB.SetInsertPoint(OldUserI);

2902 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());

2903 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +

2904 Twine(BeginOffset) + ".");

2905

2907 if (VecTy || IntTy)

2909 return CanSROA;

2910 }

2911

2912

2913

2914

2915

2916

2917

2918

2919

2920

2921

2922

2923

2924

2925

2926

2927

2928

2929

2930

2931

2932

2933

2934

2935

2936

2937

2938

2939

2940

2941

2942

2943

2944

2945

2946

2947

2948

2949

2950 std::optional<SmallVector<Value *, 4>>

2951 rewriteTreeStructuredMerge(Partition &P) {

2952

2953 if (P.splitSliceTails().size() > 0)

2954 return std::nullopt;

2955

2957 LoadInst *TheLoad = nullptr;

2958

2959

2960 struct StoreInfo {

2961 StoreInst *Store;

2962 uint64_t BeginOffset;

2963 uint64_t EndOffset;

2964 Value *StoredValue;

2965 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)

2966 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}

2967 };

2968

2970

2971

2972

2973 Type *AllocatedEltTy =

2976 : Type::getInt8Ty(NewAI.getContext());

2977 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);

2978

2979

2980

2981

2982

2983

2984 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {

2986 return FixedVecTy &&

2987 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&

2988 !FixedVecTy->getElementType()->isPointerTy();

2989 };

2990

2991 for (Slice &S : P) {

2994

2995

2996

2997

2998

2999 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||

3000 S.beginOffset() != NewAllocaBeginOffset ||

3001 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())

3002 return std::nullopt;

3003 TheLoad = LI;

3005

3006

3007

3008

3009

3010 if (!IsTypeValidForTreeStructuredMerge(

3011 SI->getValueOperand()->getType()) ||

3012 SI->isVolatile())

3013 return std::nullopt;

3015 unsigned NumElts = VecTy->getNumElements();

3016 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());

3017 if (NumElts * EltSize % AllocatedEltTySize != 0)

3018 return std::nullopt;

3019 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),

3020 SI->getValueOperand());

3021 } else {

3022

3023

3024 return std::nullopt;

3025 }

3026 }

3027

3028 if (!TheLoad)

3029 return std::nullopt;

3030

3031

3032 if (StoreInfos.size() < 2)

3033 return std::nullopt;

3034

3035

3036

3037 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {

3038 return A.BeginOffset < B.BeginOffset;

3039 });

3040

3041

3042 uint64_t ExpectedStart = NewAllocaBeginOffset;

3043 for (auto &StoreInfo : StoreInfos) {

3044 uint64_t BeginOff = StoreInfo.BeginOffset;

3045 uint64_t EndOff = StoreInfo.EndOffset;

3046

3047

3048 if (BeginOff != ExpectedStart)

3049 return std::nullopt;

3050

3051 ExpectedStart = EndOff;

3052 }

3053

3054 if (ExpectedStart != NewAllocaEndOffset)

3055 return std::nullopt;

3056

3057

3058

3059

3060

3061

3062

3063

3064

3066 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();

3067

3068 for (auto &StoreInfo : StoreInfos) {

3069 if (StoreInfo.Store->getParent() != StoreBB)

3070 return std::nullopt;

3071 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))

3072 return std::nullopt;

3073 }

3074

3075

3076

3078 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad

3079 << "\n Ordered stores:\n";

3081 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "

3082 << Info.EndOffset << ") \tStore: " << *Info.Store

3083 << "\tValue: " << *Info.StoredValue << "\n";

3084 });

3085

3086

3087

3088 std::queue<Value *> VecElements;

3089 IRBuilder<> Builder(StoreInfos.back().Store);

3090 for (const auto &Info : StoreInfos) {

3092 VecElements.push(Info.StoredValue);

3093 }

3094

3095 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");

3096 while (VecElements.size() > 1) {

3097 const auto NumElts = VecElements.size();

3098 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {

3099 Value *V0 = VecElements.front();

3100 VecElements.pop();

3101 Value *V1 = VecElements.front();

3102 VecElements.pop();

3104 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");

3105 VecElements.push(Merged);

3106 }

3107 if (NumElts % 2 == 1) {

3108 Value *V = VecElements.front();

3109 VecElements.pop();

3110 VecElements.push(V);

3111 }

3112 }

3113

3114

3115 Value *MergedValue = VecElements.front();

3116 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());

3117

3120 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),

3121 TheLoad->getName() + ".sroa.new.load"));

3122 DeletedValues.push_back(TheLoad);

3123

3124 return DeletedValues;

3125 }

3126

3127private:

3128

3129 using Base::visit;

3130

3131

3132 bool visitInstruction(Instruction &I) {

3133 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");

3135 }

3136

3138

3139

3140 assert(IsSplit || BeginOffset == NewBeginOffset);

3141 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3142

3143 StringRef OldName = OldPtr->getName();

3144

3145 size_t LastSROAPrefix = OldName.rfind(".sroa.");

3147 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));

3148

3150 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {

3151

3152 OldName = OldName.substr(IndexEnd + 1);

3154 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')

3155

3156 OldName = OldName.substr(OffsetEnd + 1);

3157 }

3158 }

3159

3160 OldName = OldName.substr(0, OldName.find(".sroa_"));

3161

3164 PointerTy, Twine(OldName) + ".");

3165 }

3166

3167

3168

3169

3170

3171

3172 Align getSliceAlign() {

3174 NewBeginOffset - NewAllocaBeginOffset);

3175 }

3176

3177 unsigned getIndex(uint64_t Offset) {

3178 assert(VecTy && "Can only call getIndex when rewriting a vector");

3179 uint64_t RelOffset = Offset - NewAllocaBeginOffset;

3180 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");

3181 uint32_t Index = RelOffset / ElementSize;

3182 assert(Index * ElementSize == RelOffset);

3184 }

3185

3186 void deleteIfTriviallyDead(Value *V) {

3189 Pass.DeadInsts.push_back(I);

3190 }

3191

3192 Value *rewriteVectorizedLoadInst(LoadInst &LI) {

3193 unsigned BeginIndex = getIndex(NewBeginOffset);

3194 unsigned EndIndex = getIndex(NewEndOffset);

3195 assert(EndIndex > BeginIndex && "Empty vector!");

3196

3199

3200 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,

3201 LLVMContext::MD_access_group});

3202 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");

3203 }

3204

3205 Value *rewriteIntegerLoad(LoadInst &LI) {

3206 assert(IntTy && "We cannot insert an integer to the alloca");

3211 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");

3212 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3213 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {

3214 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);

3216 }

3217

3218

3219

3220

3221

3223 "Can only handle an extract for an overly wide load");

3225 V = IRB.CreateZExt(V, LI.getType());

3226 return V;

3227 }

3228

3229 bool visitLoadInst(LoadInst &LI) {

3232 assert(OldOp == OldPtr);

3233

3235

3237

3238 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)

3240 bool IsPtrAdjusted = false;

3242 if (VecTy) {

3243 V = rewriteVectorizedLoadInst(LI);

3245 V = rewriteIntegerLoad(LI);

3246 } else if (NewBeginOffset == NewAllocaBeginOffset &&

3247 NewEndOffset == NewAllocaEndOffset &&

3250 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&

3252 Value *NewPtr =

3253 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());

3254 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,

3255 NewAI.getAlign(), LI.isVolatile(),

3256 LI.getName());

3257 if (LI.isVolatile())

3258 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());

3259 if (NewLI->isAtomic())

3260 NewLI->setAlignment(LI.getAlign());

3261

3262

3263

3264

3265 copyMetadataForLoad(*NewLI, LI);

3266

3267

3268 if (AATags)

3269 NewLI->setAAMetadata(AATags.adjustForAccess(

3270 NewBeginOffset - BeginOffset, NewLI->getType(), DL));

3271

3272

3273 V = NewLI;

3274

3275

3276

3277

3278 if (auto *AITy = dyn_cast(NewAllocaTy))

3279 if (auto *TITy = dyn_cast(TargetTy))

3280 if (AITy->getBitWidth() < TITy->getBitWidth()) {

3281 V = IRB.CreateZExt(V, TITy, "load.ext");

3282 if (DL.isBigEndian())

3283 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),

3284 "endian_shift");

3285 }

3286 } else {

3287 Type *LTy = IRB.getPtrTy(AS);

3288 LoadInst *NewLI =

3289 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),

3291

3292 if (AATags)

3294 NewBeginOffset - BeginOffset, NewLI->getType(), DL));

3295

3298 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,

3299 LLVMContext::MD_access_group});

3300

3301 V = NewLI;

3302 IsPtrAdjusted = true;

3303 }

3305

3306 if (IsSplit) {

3309 "Only integer type loads and stores are split");

3310 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&

3311 "Split load isn't smaller than original load");

3313 "Non-byte-multiple bit width");

3314

3316

3317

3318

3319 LIIt.setHeadBit(true);

3320 IRB.SetInsertPoint(LI.getParent(), LIIt);

3321

3322

3323

3324

3325 Value *Placeholder =

3327 false, Align(1));

3328 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,

3329 "insert");

3331 Placeholder->replaceAllUsesWith(&LI);

3332 Placeholder->deleteValue();

3333 } else {

3335 }

3336

3337 Pass.DeadInsts.push_back(&LI);

3338 deleteIfTriviallyDead(OldOp);

3340 return !LI.isVolatile() && !IsPtrAdjusted;

3341 }

3342

3343 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,

3344 AAMDNodes AATags) {

3345

3346

3348 if (V->getType() != VecTy) {

3349 unsigned BeginIndex = getIndex(NewBeginOffset);

3350 unsigned EndIndex = getIndex(NewEndOffset);

3351 assert(EndIndex > BeginIndex && "Empty vector!");

3352 unsigned NumElements = EndIndex - BeginIndex;

3354 "Too many elements!");

3355 Type *SliceTy = (NumElements == 1)

3356 ? ElementTy

3357 : FixedVectorType::get(ElementTy, NumElements);

3358 if (V->getType() != SliceTy)

3360

3361

3364 V = insertVector(IRB, Old, V, BeginIndex, "vec");

3365 }

3366 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());

3367 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,

3368 LLVMContext::MD_access_group});

3369 if (AATags)

3371 V->getType(), DL));

3372 Pass.DeadInsts.push_back(&SI);

3373

3374

3375 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,

3376 Store, Store->getPointerOperand(), OrigV, DL);

3378 return true;

3379 }

3380

3381 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {

3382 assert(IntTy && "We cannot extract an integer from the alloca");

3384 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=

3387 NewAI.getAlign(), "oldload");

3389 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");

3390 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;

3392 }

3394 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());

3395 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,

3396 LLVMContext::MD_access_group});

3397 if (AATags)

3399 V->getType(), DL));

3400

3401 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,

3402 Store, Store->getPointerOperand(),

3403 Store->getValueOperand(), DL);

3404

3405 Pass.DeadInsts.push_back(&SI);

3407 return true;

3408 }

3409

3410 bool visitStoreInst(StoreInst &SI) {

3412 Value *OldOp = SI.getOperand(1);

3413 assert(OldOp == OldPtr);

3414

3415 AAMDNodes AATags = SI.getAAMetadata();

3416 Value *V = SI.getValueOperand();

3417

3418

3419

3420 if (V->getType()->isPointerTy())

3422 Pass.PostPromotionWorklist.insert(AI);

3423

3424 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());

3427 assert(V->getType()->isIntegerTy() &&

3428 "Only integer type loads and stores are split");

3429 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&

3430 "Non-byte-multiple bit width");

3431 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);

3432 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,

3433 "extract");

3434 }

3435

3436 if (VecTy)

3437 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);

3438 if (IntTy && V->getType()->isIntegerTy())

3439 return rewriteIntegerStore(V, SI, AATags);

3440

3441 StoreInst *NewSI;

3442 if (NewBeginOffset == NewAllocaBeginOffset &&

3443 NewEndOffset == NewAllocaEndOffset &&

3447 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());

3448

3449 NewSI =

3450 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());

3451 } else {

3452 unsigned AS = SI.getPointerAddressSpace();

3453 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));

3454 NewSI =

3455 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());

3456 }

3457 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,

3458 LLVMContext::MD_access_group});

3459 if (AATags)

3461 V->getType(), DL));

3462 if (SI.isVolatile())

3463 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());

3466

3467 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,

3470

3471 Pass.DeadInsts.push_back(&SI);

3472 deleteIfTriviallyDead(OldOp);

3473

3477 SI.isVolatile();

3478 }

3479

3480

3481

3482

3483

3484

3485

3486

3487

3488

3490 assert(Size > 0 && "Expected a positive number of bytes.");

3492 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");

3493 if (Size == 1)

3494 return V;

3495

3497 V = IRB.CreateMul(

3498 IRB.CreateZExt(V, SplatIntTy, "zext"),

3501 SplatIntTy)),

3502 "isplat");

3503 return V;

3504 }

3505

3506

3508 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");

3510 return V;

3511 }

3512

3513 bool visitMemSetInst(MemSetInst &II) {

3515 assert(II.getRawDest() == OldPtr);

3516

3517 AAMDNodes AATags = II.getAAMetadata();

3518

3519

3520

3523 assert(NewBeginOffset == BeginOffset);

3524 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));

3525 II.setDestAlignment(getSliceAlign());

3526

3527

3528

3530 "AT: Unexpected link to non-const GEP");

3531 deleteIfTriviallyDead(OldPtr);

3532 return false;

3533 }

3534

3535

3536 Pass.DeadInsts.push_back(&II);

3537

3540

3541 const bool CanContinue = [&]() {

3542 if (VecTy || IntTy)

3543 return true;

3544 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)

3545 return false;

3546

3548 const uint64_t Len = C->getLimitedValue();

3549 if (Len > std::numeric_limits::max())

3550 return false;

3551 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());

3554 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());

3555 }();

3556

3557

3558

3559 if (!CanContinue) {

3560 Type *SizeTy = II.getLength()->getType();

3561 unsigned Sz = NewEndOffset - NewBeginOffset;

3562 Constant *Size = ConstantInt::get(SizeTy, Sz);

3564 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,

3565 MaybeAlign(getSliceAlign()), II.isVolatile()));

3566 if (AATags)

3567 New->setAAMetadata(

3568 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));

3569

3570 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,

3571 New, New->getRawDest(), nullptr, DL);

3572

3574 return false;

3575 }

3576

3577

3578

3579

3580

3581

3583

3584 if (VecTy) {

3585

3586 assert(ElementTy == ScalarTy);

3587

3588 unsigned BeginIndex = getIndex(NewBeginOffset);

3589 unsigned EndIndex = getIndex(NewEndOffset);

3590 assert(EndIndex > BeginIndex && "Empty vector!");

3591 unsigned NumElements = EndIndex - BeginIndex;

3593 "Too many elements!");

3594

3596 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);

3598 if (NumElements > 1)

3600

3602 NewAI.getAlign(), "oldload");

3604 } else if (IntTy) {

3605

3606

3608

3609 uint64_t Size = NewEndOffset - NewBeginOffset;

3610 V = getIntegerSplat(II.getValue(), Size);

3611

3612 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||

3613 EndOffset != NewAllocaBeginOffset)) {

3615 NewAI.getAlign(), "oldload");

3617 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3619 } else {

3620 assert(V->getType() == IntTy &&

3621 "Wrong type for an alloca wide integer!");

3622 }

3624 } else {

3625

3626 assert(NewBeginOffset == NewAllocaBeginOffset);

3627 assert(NewEndOffset == NewAllocaEndOffset);

3628

3629 V = getIntegerSplat(II.getValue(),

3630 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);

3634

3636 }

3637

3638 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());

3639 StoreInst *New =

3640 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());

3641 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,

3642 LLVMContext::MD_access_group});

3643 if (AATags)

3644 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,

3645 V->getType(), DL));

3646

3647 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,

3648 New, New->getPointerOperand(), V, DL);

3649

3651 return II.isVolatile();

3652 }

3653

3654 bool visitMemTransferInst(MemTransferInst &II) {

3655

3656

3657

3659

3660 AAMDNodes AATags = II.getAAMetadata();

3661

3662 bool IsDest = &II.getRawDestUse() == OldUse;

3663 assert((IsDest && II.getRawDest() == OldPtr) ||

3664 (!IsDest && II.getRawSource() == OldPtr));

3665

3666 Align SliceAlign = getSliceAlign();

3667

3668

3669

3670

3671

3672

3673

3674 if (!IsSplittable) {

3675 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

3676 if (IsDest) {

3677

3680 DbgAssign->getAddress() == II.getDest())

3681 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);

3682 }

3683 II.setDest(AdjustedPtr);

3684 II.setDestAlignment(SliceAlign);

3685 } else {

3686 II.setSource(AdjustedPtr);

3687 II.setSourceAlignment(SliceAlign);

3688 }

3689

3691 deleteIfTriviallyDead(OldPtr);

3692 return false;

3693 }

3694

3695

3696

3697

3698

3699

3700

3701

3702 bool EmitMemCpy =

3703 !VecTy && !IntTy &&

3704 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||

3705 SliceSize !=

3709

3710

3711

3712

3713 if (EmitMemCpy && &OldAI == &NewAI) {

3714

3715 assert(NewBeginOffset == BeginOffset);

3716

3717

3718 if (NewEndOffset != EndOffset)

3719 II.setLength(NewEndOffset - NewBeginOffset);

3720 return false;

3721 }

3722

3723 Pass.DeadInsts.push_back(&II);

3724

3725

3726

3727 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();

3728 if (AllocaInst *AI =

3730 assert(AI != &OldAI && AI != &NewAI &&

3731 "Splittable transfers cannot reach the same alloca on both ends.");

3732 Pass.Worklist.insert(AI);

3733 }

3734

3737

3738

3739 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);

3740 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);

3741 Align OtherAlign =

3742 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();

3743 OtherAlign =

3744 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());

3745

3746 if (EmitMemCpy) {

3747

3748

3749 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,

3750 OtherPtr->getName() + ".");

3751

3752 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

3753 Type *SizeTy = II.getLength()->getType();

3754 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);

3755

3756 Value *DestPtr, *SrcPtr;

3757 MaybeAlign DestAlign, SrcAlign;

3758

3759 if (IsDest) {

3760 DestPtr = OurPtr;

3761 DestAlign = SliceAlign;

3762 SrcPtr = OtherPtr;

3763 SrcAlign = OtherAlign;

3764 } else {

3765 DestPtr = OtherPtr;

3766 DestAlign = OtherAlign;

3767 SrcPtr = OurPtr;

3768 SrcAlign = SliceAlign;

3769 }

3770 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,

3771 Size, II.isVolatile());

3772 if (AATags)

3773 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));

3774

3775 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);

3776 if (IsDest) {

3777 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,

3778 &II, New, DestPtr, nullptr, DL);

3781 DL, Offset, true))) {

3783 SliceSize * 8, &II, New, DestPtr, nullptr, DL);

3784 }

3786 return false;

3787 }

3788

3789 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&

3790 NewEndOffset == NewAllocaEndOffset;

3791 uint64_t Size = NewEndOffset - NewBeginOffset;

3792 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;

3793 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;

3794 unsigned NumElements = EndIndex - BeginIndex;

3795 IntegerType *SubIntTy =

3796 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;

3797

3798

3799

3800 Type *OtherTy;

3801 if (VecTy && !IsWholeAlloca) {

3802 if (NumElements == 1)

3803 OtherTy = VecTy->getElementType();

3804 else

3806 } else if (IntTy && !IsWholeAlloca) {

3807 OtherTy = SubIntTy;

3808 } else {

3809 OtherTy = NewAllocaTy;

3810 }

3811

3813 OtherPtr->getName() + ".");

3814 MaybeAlign SrcAlign = OtherAlign;

3815 MaybeAlign DstAlign = SliceAlign;

3816 if (!IsDest)

3818

3821

3822 if (IsDest) {

3823 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());

3824 SrcPtr = AdjPtr;

3825 } else {

3826 DstPtr = AdjPtr;

3827 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());

3828 }

3829

3831 if (VecTy && !IsWholeAlloca && !IsDest) {

3832 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,

3834 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");

3835 } else if (IntTy && !IsWholeAlloca && !IsDest) {

3836 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,

3839 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3841 } else {

3842 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,

3843 II.isVolatile(), "copyload");

3844 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,

3845 LLVMContext::MD_access_group});

3846 if (AATags)

3847 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,

3848 Load->getType(), DL));

3850 }

3851

3852 if (VecTy && !IsWholeAlloca && IsDest) {

3854 NewAI.getAlign(), "oldload");

3855 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");

3856 } else if (IntTy && !IsWholeAlloca && IsDest) {

3858 NewAI.getAlign(), "oldload");

3860 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3863 }

3864

3866 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));

3867 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,

3868 LLVMContext::MD_access_group});

3869 if (AATags)

3871 Src->getType(), DL));

3872

3873 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);

3874 if (IsDest) {

3875

3876 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,

3877 Store, DstPtr, Src, DL);

3880 DL, Offset, true))) {

3882 &II, Store, DstPtr, Src, DL);

3883 }

3884

3886 return II.isVolatile();

3887 }

3888

3889 bool visitIntrinsicInst(IntrinsicInst &II) {

3890 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&

3891 "Unexpected intrinsic!");

3893

3894

3895 Pass.DeadInsts.push_back(&II);

3896

3897 if (II.isDroppable()) {

3898 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");

3899

3901 return true;

3902 }

3903

3904 assert(II.getArgOperand(0) == OldPtr);

3908 if (II.getIntrinsicID() == Intrinsic::lifetime_start)

3909 New = IRB.CreateLifetimeStart(Ptr);

3910 else

3911 New = IRB.CreateLifetimeEnd(Ptr);

3912

3913 (void)New;

3915

3916 return true;

3917 }

3918

3919 void fixLoadStoreAlign(Instruction &Root) {

3920

3921

3922

3923 SmallPtrSet<Instruction *, 4> Visited;

3924 SmallVector<Instruction *, 4> Uses;

3925 Visited.insert(&Root);

3926 Uses.push_back(&Root);

3927 do {

3929

3932 continue;

3933 }

3935 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));

3936 continue;

3937 }

3938

3942 for (User *U : I->users())

3945 } while (Uses.empty());

3946 }

3947

3948 bool visitPHINode(PHINode &PN) {

3950 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");

3951 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");

3952

3953

3954

3955

3956

3957 IRBuilderBase::InsertPointGuard Guard(IRB);

3959 IRB.SetInsertPoint(OldPtr->getParent(),

3960 OldPtr->getParent()->getFirstInsertionPt());

3961 else

3962 IRB.SetInsertPoint(OldPtr);

3963 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());

3964

3965 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

3966

3968

3970 deleteIfTriviallyDead(OldPtr);

3971

3972

3973 fixLoadStoreAlign(PN);

3974

3975

3976

3977

3978 PHIUsers.insert(&PN);

3979 return true;

3980 }

3981

3982 bool visitSelectInst(SelectInst &SI) {

3984 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&

3985 "Pointer isn't an operand!");

3986 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");

3987 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");

3988

3989 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

3990

3991 if (SI.getOperand(1) == OldPtr)

3992 SI.setOperand(1, NewPtr);

3993 if (SI.getOperand(2) == OldPtr)

3994 SI.setOperand(2, NewPtr);

3995

3997 deleteIfTriviallyDead(OldPtr);

3998

3999

4000 fixLoadStoreAlign(SI);

4001

4002

4003

4004

4005 SelectUsers.insert(&SI);

4006 return true;

4007 }

4008};

4009

4010

4011

4012

4013

4014

4015class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {

4016

4017 friend class InstVisitor<AggLoadStoreRewriter, bool>;

4018

4019

4021

4022

4023 SmallPtrSet<User *, 8> Visited;

4024

4025

4026

4027 Use *U = nullptr;

4028

4029

4030 const DataLayout &DL;

4031

4032 IRBuilderTy &IRB;

4033

4034public:

4035 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)

4036 : DL(DL), IRB(IRB) {}

4037

4038

4039

4040 bool rewrite(Instruction &I) {

4041 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");

4042 enqueueUsers(I);

4044 while (Queue.empty()) {

4045 U = Queue.pop_back_val();

4047 }

4049 }

4050

4051private:

4052

4053

4054 void enqueueUsers(Instruction &I) {

4055 for (Use &U : I.uses())

4056 if (Visited.insert(U.getUser()).second)

4057 Queue.push_back(&U);

4058 }

4059

4060

4061 bool visitInstruction(Instruction &I) { return false; }

4062

4063

4064 template class OpSplitter {

4065 protected:

4066

4067 IRBuilderTy &IRB;

4068

4069

4070

4071 SmallVector<unsigned, 4> Indices;

4072

4073

4074

4076

4077

4078

4080

4081

4082 Type *BaseTy;

4083

4084

4085 Align BaseAlign;

4086

4087

4088

4089 const DataLayout &DL;

4090

4091

4092

4093 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,

4094 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)

4095 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),

4096 BaseAlign(BaseAlign), DL(DL) {

4097 IRB.SetInsertPoint(InsertionPoint);

4098 }

4099

4100 public:

4101

4102

4103

4104

4105

4106

4107

4108

4109

4110

4111

4112

4113

4114 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {

4116 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);

4117 return static_cast<Derived *>(this)->emitFunc(

4119 }

4120

4122 unsigned OldSize = Indices.size();

4123 (void)OldSize;

4124 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;

4125 ++Idx) {

4126 assert(Indices.size() == OldSize && "Did not return to the old size");

4128 GEPIndices.push_back(IRB.getInt32(Idx));

4129 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));

4132 }

4133 return;

4134 }

4135

4137 unsigned OldSize = Indices.size();

4138 (void)OldSize;

4139 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;

4140 ++Idx) {

4141 assert(Indices.size() == OldSize && "Did not return to the old size");

4143 GEPIndices.push_back(IRB.getInt32(Idx));

4144 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));

4147 }

4148 return;

4149 }

4150

4151 llvm_unreachable("Only arrays and structs are aggregate loadable types");

4152 }

4153 };

4154

4155 struct LoadOpSplitter : public OpSplitter {

4156 AAMDNodes AATags;

4157

4158

4160

4161

4163

4164 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,

4165 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,

4166 IRBuilderTy &IRB)

4167 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,

4168 IRB),

4169 AATags(AATags) {}

4170

4171

4172

4173 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {

4175

4177 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");

4178 LoadInst *Load =

4179 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");

4180

4183 if (AATags &&

4185 Load->setAAMetadata(

4187

4188

4190

4191 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");

4193 }

4194

4195

4196 void recordFakeUses(LoadInst &LI) {

4197 for (Use &U : LI.uses())

4199 if (II->getIntrinsicID() == Intrinsic::fake_use)

4201 }

4202

4203

4204

4205 void emitFakeUses() {

4206 for (Instruction *I : FakeUses) {

4207 IRB.SetInsertPoint(I);

4208 for (auto *V : Components)

4209 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});

4210 I->eraseFromParent();

4211 }

4212 }

4213 };

4214

4215 bool visitLoadInst(LoadInst &LI) {

4218 return false;

4219

4220

4224 Splitter.recordFakeUses(LI);

4226 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");

4227 Splitter.emitFakeUses();

4228 Visited.erase(&LI);

4231 return true;

4232 }

4233

4234 struct StoreOpSplitter : public OpSplitter {

4235 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,

4236 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,

4237 const DataLayout &DL, IRBuilderTy &IRB)

4238 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign,

4239 DL, IRB),

4240 AATags(AATags), AggStore(AggStore) {}

4241 AAMDNodes AATags;

4242 StoreInst *AggStore;

4243

4244

4245 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {

4247

4248

4249

4250

4251 Value *ExtractValue =

4252 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");

4253 Value *InBoundsGEP =

4254 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");

4255 StoreInst *Store =

4256 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);

4257

4261 if (AATags) {

4264 }

4265

4266

4267

4268

4269

4272 uint64_t SizeInBits =

4273 DL.getTypeSizeInBits(Store->getValueOperand()->getType());

4275 SizeInBits, AggStore, Store,

4276 Store->getPointerOperand(), Store->getValueOperand(),

4277 DL);

4278 } else {

4280 "AT: unexpected debug.assign linked to store through "

4281 "unbounded GEP");

4282 }

4284 }

4285 };

4286

4287 bool visitStoreInst(StoreInst &SI) {

4288 if (SI.isSimple() || SI.getPointerOperand() != *U)

4289 return false;

4290 Value *V = SI.getValueOperand();

4291 if (V->getType()->isSingleValueType())

4292 return false;

4293

4294

4296 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,

4298 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");

4299 Visited.erase(&SI);

4300

4301

4303 SI.eraseFromParent();

4304 return true;

4305 }

4306

4307 bool visitBitCastInst(BitCastInst &BC) {

4308 enqueueUsers(BC);

4309 return false;

4310 }

4311

4312 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {

4313 enqueueUsers(ASC);

4314 return false;

4315 }

4316

4317

4318

4319

4320

4321

4322 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {

4323

4324

4328 if (Sel)

4329 return false;

4330

4331 Sel = SI;

4334 return false;

4335 continue;

4336 }

4338 if (Sel)

4339 return false;

4340 Sel = ZI;

4341 if (!ZI->getSrcTy()->isIntegerTy(1))

4342 return false;

4343 continue;

4344 }

4345

4347 return false;

4348 }

4349

4350 if (!Sel)

4351 return false;

4352

4353 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";

4354 dbgs() << " original: " << *Sel << "\n";

4355 dbgs() << " " << GEPI << "\n";);

4356

4357 auto GetNewOps = [&](Value *SelOp) {

4360 if (Op == Sel)

4362 else

4364 return NewOps;

4365 };

4366

4370 Cond = SI->getCondition();

4371 True = SI->getTrueValue();

4372 False = SI->getFalseValue();

4374 MDFrom = SI;

4375 } else {

4376 Cond = Sel->getOperand(0);

4377 True = ConstantInt::get(Sel->getType(), 1);

4378 False = ConstantInt::get(Sel->getType(), 0);

4379 }

4382

4383 IRB.SetInsertPoint(&GEPI);

4385

4387 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),

4388 True->getName() + ".sroa.gep", NW);

4389

4391 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),

4392 False->getName() + ".sroa.gep", NW);

4393

4394 Value *NSel = MDFrom

4395 ? IRB.CreateSelect(Cond, NTrue, NFalse,

4396 Sel->getName() + ".sroa.sel", MDFrom)

4397 : IRB.CreateSelectWithUnknownProfile(

4399 Sel->getName() + ".sroa.sel");

4400 Visited.erase(&GEPI);

4404 Visited.insert(NSelI);

4405 enqueueUsers(*NSelI);

4406

4408 dbgs() << " " << *NFalse << "\n";

4409 dbgs() << " " << *NSel << "\n";);

4410

4411 return true;

4412 }

4413

4414

4415

4416

4417

4418 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {

4419

4420

4421

4423 auto IsInvalidPointerOperand = [](Value *V) {

4425 return false;

4427 return !AI->isStaticAlloca();

4428 return true;

4429 };

4430 if (Phi) {

4431 if (any_of(Phi->operands(), IsInvalidPointerOperand))

4432 return false;

4433 } else {

4435 return false;

4436 }

4437

4438

4441 if (Phi)

4442 return false;

4443

4445 if (all\_of(Phi->incoming_values(),

4446 [](Value *V) { return isa(V); }))

4447 return false;

4448 continue;

4449 }

4450

4452 return false;

4453 }

4454

4455 if (!Phi)

4456 return false;

4457

4458 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";

4459 dbgs() << " original: " << *Phi << "\n";

4460 dbgs() << " " << GEPI << "\n";);

4461

4462 auto GetNewOps = [&](Value *PhiOp) {

4465 if (Op == Phi)

4467 else

4469 return NewOps;

4470 };

4471

4472 IRB.SetInsertPoint(Phi);

4473 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),

4474 Phi->getName() + ".sroa.phi");

4475

4477

4478

4480 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {

4486 } else {

4488 NewGEP =

4489 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),

4491 }

4493 }

4494

4495 Visited.erase(&GEPI);

4498 Visited.insert(NewPhi);

4499 enqueueUsers(*NewPhi);

4500

4504 << "\n " << *In;

4505 dbgs() << "\n " << *NewPhi << '\n');

4506

4507 return true;

4508 }

4509

4510 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {

4511 if (unfoldGEPSelect(GEPI))

4512 return true;

4513

4514 if (unfoldGEPPhi(GEPI))

4515 return true;

4516

4517 enqueueUsers(GEPI);

4518 return false;

4519 }

4520

4521 bool visitPHINode(PHINode &PN) {

4522 enqueueUsers(PN);

4523 return false;

4524 }

4525

4526 bool visitSelectInst(SelectInst &SI) {

4527 enqueueUsers(SI);

4528 return false;

4529 }

4530};

4531

4532}

4533

4534

4535

4536

4537

4538

4540 if (Ty->isSingleValueType())

4541 return Ty;

4542

4543 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();

4545

4546 Type *InnerTy;

4548 InnerTy = ArrTy->getElementType();

4552 InnerTy = STy->getElementType(Index);

4553 } else {

4554 return Ty;

4555 }

4556

4557 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||

4558 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())

4559 return Ty;

4560

4562}

4563

4564

4565

4566

4567

4568

4569

4570

4571

4572

4573

4574

4575

4576

4579 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)

4581 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||

4582 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)

4583 return nullptr;

4584

4586 Type *ElementTy;

4589 ElementTy = AT->getElementType();

4590 TyNumElements = AT->getNumElements();

4591 } else {

4592

4593

4595 ElementTy = VT->getElementType();

4596 TyNumElements = VT->getNumElements();

4597 }

4598 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();

4600 if (NumSkippedElements >= TyNumElements)

4601 return nullptr;

4602 Offset -= NumSkippedElements * ElementSize;

4603

4604

4605 if (Offset > 0 || Size < ElementSize) {

4606

4608 return nullptr;

4609

4611 }

4613

4614 if (Size == ElementSize)

4618 if (NumElements * ElementSize != Size)

4619 return nullptr;

4621 }

4622

4624 if (!STy)

4625 return nullptr;

4626

4628

4630 return nullptr;

4631

4633 return nullptr;

4636 return nullptr;

4637

4640

4642 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();

4643 if (Offset >= ElementSize)

4644 return nullptr;

4645

4646

4647 if (Offset > 0 || Size < ElementSize) {

4649 return nullptr;

4651 }

4653

4654 if (Size == ElementSize)

4656

4661 if (Index == EndIndex)

4662 return nullptr;

4663

4664

4665

4666

4667

4669 return nullptr;

4670

4671 assert(Index < EndIndex);

4673 }

4674

4675

4678 const StructLayout *SubSL = DL.getStructLayout(SubTy);

4680 return nullptr;

4681

4682 return SubTy;

4683}

4684

4685

4686

4687

4688

4689

4690

4691

4692

4693

4694

4695

4696

4697

4698

4699

4700

4701

4702

4703

4704

4705

4706

4707

4708

4709

4710bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {

4711 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");

4712

4713

4714

4715

4716

4719

4720

4721

4722

4723

4724 struct SplitOffsets {

4725 Slice *S;

4726 std::vector<uint64_t> Splits;

4727 };

4728 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;

4729

4730

4731

4732

4733

4734

4735

4736

4737

4738

4739

4740

4741 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;

4742

4743 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");

4744 for (auto &P : AS.partitions()) {

4745 for (Slice &S : P) {

4747 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {

4748

4749

4750

4752 UnsplittableLoads.insert(LI);

4755 UnsplittableLoads.insert(LI);

4756 continue;

4757 }

4758 assert(P.endOffset() > S.beginOffset() &&

4759 "Empty or backwards partition!");

4760

4761

4763 assert(!LI->isVolatile() && "Cannot split volatile loads!");

4764

4765

4766

4767

4768 auto IsLoadSimplyStored = [](LoadInst *LI) {

4769 for (User *LU : LI->users()) {

4771 if (!SI || SI->isSimple())

4772 return false;

4773 }

4774 return true;

4775 };

4776 if (!IsLoadSimplyStored(LI)) {

4777 UnsplittableLoads.insert(LI);

4778 continue;

4779 }

4780

4783 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))

4784

4785 continue;

4787 if (!StoredLoad || !StoredLoad->isSimple())

4788 continue;

4789 assert(SI->isVolatile() && "Cannot split volatile stores!");

4790

4792 } else {

4793

4794 continue;

4795 }

4796

4797

4799 auto &Offsets = SplitOffsetsMap[I];

4801 "Should not have splits the first time we see an instruction!");

4803 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());

4804 }

4805

4806

4807

4808 for (Slice *S : P.splitSliceTails()) {

4809 auto SplitOffsetsMapI =

4811 if (SplitOffsetsMapI == SplitOffsetsMap.end())

4812 continue;

4813 auto &Offsets = SplitOffsetsMapI->second;

4814

4815 assert(Offsets.S == S && "Found a mismatched slice!");

4817 "Cannot have an empty set of splits on the second partition!");

4819 P.beginOffset() - Offsets.S->beginOffset() &&

4820 "Previous split does not end where this one begins!");

4821

4822

4823

4824 if (S->endOffset() > P.endOffset())

4825 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());

4826 }

4827 }

4828

4829

4830

4831

4832

4833 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {

4834

4835

4837

4838

4839 if (UnsplittableLoads.count(LI))

4840 return true;

4841

4842 auto LoadOffsetsI = SplitOffsetsMap.find(LI);

4843 if (LoadOffsetsI == SplitOffsetsMap.end())

4844 return false;

4845 auto &LoadOffsets = LoadOffsetsI->second;

4846

4847

4848 auto &StoreOffsets = SplitOffsetsMap[SI];

4849

4850

4851

4852

4853 if (LoadOffsets.Splits == StoreOffsets.Splits)

4854 return false;

4855

4856 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"

4857 << " " << *LI << "\n"

4858 << " " << *SI << "\n");

4859

4860

4861

4862

4863

4864 UnsplittableLoads.insert(LI);

4865 return true;

4866 });

4867

4868

4869

4870

4871 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {

4873 return UnsplittableLoads.count(LI);

4874 });

4875

4876

4877 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {

4878 return UnsplittableLoads.count(LI);

4879 });

4880

4881

4882

4883 if (Loads.empty() && Stores.empty())

4884 return false;

4885

4886

4887

4888 IRBuilderTy IRB(&AI);

4889

4890

4892

4893

4894

4895 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;

4896

4897

4898

4899

4900

4901

4902

4903

4904

4905 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;

4906 std::vector<LoadInst *> SplitLoads;

4907 const DataLayout &DL = AI.getDataLayout();

4908 for (LoadInst *LI : Loads) {

4909 SplitLoads.clear();

4910

4911 auto &Offsets = SplitOffsetsMap[LI];

4912 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();

4914 "Load must have type size equal to store size");

4916 "Load must be >= slice size");

4917

4918 uint64_t BaseOffset = Offsets.S->beginOffset();

4919 assert(BaseOffset + SliceSize > BaseOffset &&

4920 "Cannot represent alloca access size using 64-bit integers!");

4921

4923 IRB.SetInsertPoint(LI);

4924

4925 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");

4926

4927 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();

4928 int Idx = 0, Size = Offsets.Splits.size();

4929 for (;;) {

4930 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);

4933 LoadInst *PLoad = IRB.CreateAlignedLoad(

4934 PartTy,

4936 APInt(DL.getIndexSizeInBits(AS), PartOffset),

4937 PartPtrTy, BasePtr->getName() + "."),

4939 false, LI->getName());

4940 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,

4941 LLVMContext::MD_access_group});

4942

4943

4944

4945 SplitLoads.push_back(PLoad);

4946

4947

4949 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,

4951 false));

4952 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()

4953 << ", " << NewSlices.back().endOffset()

4954 << "): " << *PLoad << "\n");

4955

4956

4957 if (Idx >= Size)

4958 break;

4959

4960

4961 PartOffset = Offsets.Splits[Idx];

4962 ++Idx;

4963 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;

4964 }

4965

4966

4967

4968

4969 bool DeferredStores = false;

4970 for (User *LU : LI->users()) {

4972 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {

4973 DeferredStores = true;

4974 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI

4975 << "\n");

4976 continue;

4977 }

4978

4979 Value *StoreBasePtr = SI->getPointerOperand();

4980 IRB.SetInsertPoint(SI);

4981 AAMDNodes AATags = SI->getAAMetadata();

4982

4983 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");

4984

4985 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {

4986 LoadInst *PLoad = SplitLoads[Idx];

4987 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];

4988 auto *PartPtrTy = SI->getPointerOperandType();

4989

4990 auto AS = SI->getPointerAddressSpace();

4991 StoreInst *PStore = IRB.CreateAlignedStore(

4992 PLoad,

4994 APInt(DL.getIndexSizeInBits(AS), PartOffset),

4995 PartPtrTy, StoreBasePtr->getName() + "."),

4997 false);

4998 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,

4999 LLVMContext::MD_access_group,

5000 LLVMContext::MD_DIAssignID});

5001

5002 if (AATags)

5005 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");

5006 }

5007

5008

5009

5010

5011

5013 ResplitPromotableAllocas.insert(OtherAI);

5014 Worklist.insert(OtherAI);

5017 Worklist.insert(OtherAI);

5018 }

5019

5020

5021 DeadInsts.push_back(SI);

5022 }

5023

5024

5025 if (DeferredStores)

5026 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));

5027

5028

5029 DeadInsts.push_back(LI);

5031 }

5032

5033

5034

5035

5036

5037

5038 for (StoreInst *SI : Stores) {

5042 uint64_t StoreSize = Ty->getBitWidth() / 8;

5043 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");

5044

5045 auto &Offsets = SplitOffsetsMap[SI];

5047 "Slice size should always match load size exactly!");

5048 uint64_t BaseOffset = Offsets.S->beginOffset();

5049 assert(BaseOffset + StoreSize > BaseOffset &&

5050 "Cannot represent alloca access size using 64-bit integers!");

5051

5054

5055 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");

5056

5057

5058 auto SplitLoadsMapI = SplitLoadsMap.find(LI);

5059 std::vector<LoadInst *> *SplitLoads = nullptr;

5060 if (SplitLoadsMapI != SplitLoadsMap.end()) {

5061 SplitLoads = &SplitLoadsMapI->second;

5062 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&

5063 "Too few split loads for the number of splits in the store!");

5064 } else {

5066 }

5067

5068 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();

5069 int Idx = 0, Size = Offsets.Splits.size();

5070 for (;;) {

5071 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);

5073 auto *StorePartPtrTy = SI->getPointerOperandType();

5074

5075

5076 LoadInst *PLoad;

5077 if (SplitLoads) {

5078 PLoad = (*SplitLoads)[Idx];

5079 } else {

5080 IRB.SetInsertPoint(LI);

5082 PLoad = IRB.CreateAlignedLoad(

5083 PartTy,

5085 APInt(DL.getIndexSizeInBits(AS), PartOffset),

5086 LoadPartPtrTy, LoadBasePtr->getName() + "."),

5088 false, LI->getName());

5089 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,

5090 LLVMContext::MD_access_group});

5091 }

5092

5093

5094 IRB.SetInsertPoint(SI);

5095 auto AS = SI->getPointerAddressSpace();

5096 StoreInst *PStore = IRB.CreateAlignedStore(

5097 PLoad,

5099 APInt(DL.getIndexSizeInBits(AS), PartOffset),

5100 StorePartPtrTy, StoreBasePtr->getName() + "."),

5102 false);

5103 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,

5104 LLVMContext::MD_access_group});

5105

5106

5108 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,

5110 false));

5111 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()

5112 << ", " << NewSlices.back().endOffset()

5113 << "): " << *PStore << "\n");

5114 if (!SplitLoads) {

5115 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");

5116 }

5117

5118

5119 if (Idx >= Size)

5120 break;

5121

5122

5123 PartOffset = Offsets.Splits[Idx];

5124 ++Idx;

5125 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;

5126 }

5127

5128

5129

5130

5131

5132

5133 if (!SplitLoads) {

5135 assert(OtherAI != &AI && "We can't re-split our own alloca!");

5136 ResplitPromotableAllocas.insert(OtherAI);

5137 Worklist.insert(OtherAI);

5140 assert(OtherAI != &AI && "We can't re-split our own alloca!");

5141 Worklist.insert(OtherAI);

5142 }

5143 }

5144

5145

5146

5147

5148

5149

5150

5151

5152

5153

5155 assert(*LI->user_begin() == SI && "Single use isn't this store!");

5156 DeadInsts.push_back(LI);

5157 }

5158 DeadInsts.push_back(SI);

5160 }

5161

5162

5163 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });

5164

5165

5166

5167 AS.insert(NewSlices);

5168

5170#ifndef NDEBUG

5171 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)

5173#endif

5174

5175

5176

5177 PromotableAllocas.set_subtract(ResplitPromotableAllocas);

5178

5179 return true;

5180}

5181

5182

5183

5184

5185

5186

5187

5188

5189

5190

5191

5192AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,

5193 Partition &P) {

5194

5195

5196

5197 Type *SliceTy = nullptr;

5198 const DataLayout &DL = AI.getDataLayout();

5199 unsigned VScale = AI.getFunction()->getVScaleValue();

5200

5201 std::pair<Type *, IntegerType *> CommonUseTy =

5203

5204 if (CommonUseTy.first) {

5205 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);

5207 SliceTy = CommonUseTy.first;

5208 }

5209

5210 if (!SliceTy)

5212 P.beginOffset(), P.size()))

5213 SliceTy = TypePartitionTy;

5214

5215

5216 if (!SliceTy && CommonUseTy.second)

5217 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size())

5218 SliceTy = CommonUseTy.second;

5219 if ((!SliceTy || (SliceTy->isArrayTy() &&

5221 DL.isLegalInteger(P.size() * 8)) {

5222 SliceTy = Type::getIntNTy(*C, P.size() * 8);

5223 }

5224

5225 if (!SliceTy)

5226 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());

5227 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());

5228

5230

5233 if (VecTy)

5234 SliceTy = VecTy;

5235

5236

5237

5238

5239

5240

5241

5242 AllocaInst *NewAI;

5243 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {

5244 NewAI = &AI;

5245

5246

5247

5248 } else {

5249

5251

5252

5253 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);

5254 NewAI = new AllocaInst(

5255 SliceTy, AI.getAddressSpace(), nullptr,

5256 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,

5257 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),

5258 AI.getIterator());

5259

5261 ++NumNewAllocas;

5262 }

5263

5264 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()

5265 << "," << P.endOffset() << ") to: " << *NewAI << "\n");

5266

5267

5268

5269

5270 unsigned PPWOldSize = PostPromotionWorklist.size();

5271 unsigned NumUses = 0;

5272 SmallSetVector<PHINode *, 8> PHIUsers;

5273 SmallSetVector<SelectInst *, 8> SelectUsers;

5274

5275 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),

5276 P.endOffset(), IsIntegerPromotable, VecTy,

5277 PHIUsers, SelectUsers);

5278 bool Promotable = true;

5279

5280 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {

5281 NumUses += DeletedValues->size() + 1;

5282 for (Value *V : *DeletedValues)

5283 DeadInsts.push_back(V);

5284 } else {

5285 for (Slice *S : P.splitSliceTails()) {

5286 Promotable &= Rewriter.visit(S);

5287 ++NumUses;

5288 }

5289 for (Slice &S : P) {

5290 Promotable &= Rewriter.visit(&S);

5291 ++NumUses;

5292 }

5293 }

5294

5295 NumAllocaPartitionUses += NumUses;

5296 MaxUsesPerAllocaPartition.updateMax(NumUses);

5297

5298

5299

5300 for (PHINode *PHI : PHIUsers)

5302 Promotable = false;

5303 PHIUsers.clear();

5304 SelectUsers.clear();

5305 break;

5306 }

5307

5309 NewSelectsToRewrite;

5310 NewSelectsToRewrite.reserve(SelectUsers.size());

5311 for (SelectInst *Sel : SelectUsers) {

5312 std::optional Ops =

5313 isSafeSelectToSpeculate(*Sel, PreserveCFG);

5314 if (Ops) {

5315 Promotable = false;

5316 PHIUsers.clear();

5317 SelectUsers.clear();

5318 NewSelectsToRewrite.clear();

5319 break;

5320 }

5321 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));

5322 }

5323

5324 if (Promotable) {

5325 for (Use *U : AS.getDeadUsesIfPromotable()) {

5327 Value::dropDroppableUse(*U);

5328 if (OldInst)

5330 DeadInsts.push_back(OldInst);

5331 }

5332 if (PHIUsers.empty() && SelectUsers.empty()) {

5333

5334 PromotableAllocas.insert(NewAI);

5335 } else {

5336

5337

5338

5339 SpeculatablePHIs.insert_range(PHIUsers);

5340 SelectsToRewrite.reserve(SelectsToRewrite.size() +

5341 NewSelectsToRewrite.size());

5343 std::make_move_iterator(NewSelectsToRewrite.begin()),

5344 std::make_move_iterator(NewSelectsToRewrite.end())))

5345 SelectsToRewrite.insert(std::move(KV));

5346 Worklist.insert(NewAI);

5347 }

5348 } else {

5349

5350 while (PostPromotionWorklist.size() > PPWOldSize)

5351 PostPromotionWorklist.pop_back();

5352

5353

5354

5355 if (NewAI == &AI)

5356 return nullptr;

5357

5358

5359

5360

5361 Worklist.insert(NewAI);

5362 }

5363

5364 return NewAI;

5365}

5366

5367

5368

5374

5380

5381

5382

5383

5384

5385

5386

5387

5388

5389

5390

5391

5392

5393

5394

5395

5396

5397

5398

5399

5400

5401

5402

5403

5404

5405

5408 int64_t BitExtractOffset) {

5410 bool HasFragment = false;

5411 bool HasBitExtract = false;

5412

5415 HasFragment = true;

5416 continue;

5417 }

5420 HasBitExtract = true;

5421 int64_t ExtractOffsetInBits = Op.getArg(0);

5422 int64_t ExtractSizeInBits = Op.getArg(1);

5423

5424

5425

5426

5427

5429 return nullptr;

5430

5431 assert(BitExtractOffset <= 0);

5432 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;

5433

5434

5435

5436

5437

5438 if (AdjustedOffset < 0)

5439 return nullptr;

5440

5441 Ops.push_back(Op.getOp());

5442 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));

5443 Ops.push_back(ExtractSizeInBits);

5444 continue;

5445 }

5446 Op.appendToVector(Ops);

5447 }

5448

5449

5450

5451 if (HasFragment && HasBitExtract)

5452 return nullptr;

5453

5454 if (!HasBitExtract) {

5458 }

5460}

5461

5462

5463

5464

5465

5466

5467

5468

5469

5470static void

5473 std::optionalDIExpression::FragmentInfo NewFragment,

5474 int64_t BitExtractAdjustment) {

5475 (void)DIB;

5476

5477

5478

5479

5482 if (NewFragment)

5484 BitExtractAdjustment);

5485 if (!NewFragmentExpr)

5486 return;

5487

5491 BeforeInst->getParent()->insertDbgRecordBefore(DVR,

5493 return;

5494 }

5495

5499

5500

5501

5504 BeforeInst->getParent()->insertDbgRecordBefore(DVR,

5506 return;

5507 }

5508

5509

5510 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {

5511 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,

5513 }

5514

5516 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,

5518 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");

5519 (void)NewAssign;

5520}

5521

5522

5523

5524bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {

5525 if (AS.begin() == AS.end())

5526 return false;

5527

5528 unsigned NumPartitions = 0;

5530 const DataLayout &DL = AI.getModule()->getDataLayout();

5531

5532

5533 Changed |= presplitLoadsAndStores(AI, AS);

5534

5535

5536

5537

5538

5539

5540

5541 bool IsSorted = true;

5542

5543 uint64_t AllocaSize =

5544 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();

5545 const uint64_t MaxBitVectorSize = 1024;

5546 if (AllocaSize <= MaxBitVectorSize) {

5547

5548

5549 SmallBitVector SplittableOffset(AllocaSize + 1, true);

5550 for (Slice &S : AS)

5551 for (unsigned O = S.beginOffset() + 1;

5552 O < S.endOffset() && O < AllocaSize; O++)

5553 SplittableOffset.reset(O);

5554

5555 for (Slice &S : AS) {

5556 if (!S.isSplittable())

5557 continue;

5558

5559 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&

5560 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))

5561 continue;

5562

5565 S.makeUnsplittable();

5566 IsSorted = false;

5567 }

5568 }

5569 } else {

5570

5571

5572 for (Slice &S : AS) {

5573 if (!S.isSplittable())

5574 continue;

5575

5576 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)

5577 continue;

5578

5581 S.makeUnsplittable();

5582 IsSorted = false;

5583 }

5584 }

5585 }

5586

5587 if (!IsSorted)

5589

5590

5591

5592 struct Fragment {

5593 AllocaInst *Alloca;

5595 uint64_t Size;

5596 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)

5598 };

5600

5601

5602 for (auto &P : AS.partitions()) {

5603 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {

5604 Changed = true;

5605 if (NewAI != &AI) {

5606 uint64_t SizeOfByte = 8;

5607 uint64_t AllocaSize =

5608 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();

5609

5610 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);

5611 Fragments.push_back(

5612 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));

5613 }

5614 }

5615 ++NumPartitions;

5616 }

5617

5618 NumAllocaPartitions += NumPartitions;

5619 MaxPartitionsPerAlloca.updateMax(NumPartitions);

5620

5621

5622

5623 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {

5624

5626 return;

5627

5628 const Value *DbgPtr = DbgVariable->getAddress();

5630 DbgVariable->getFragmentOrEntireVariable();

5631

5632

5633 int64_t CurrentExprOffsetInBytes = 0;

5634 SmallVector<uint64_t> PostOffsetOps;

5636 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))

5637 return;

5638

5639

5640 int64_t ExtractOffsetInBits = 0;

5644 ExtractOffsetInBits = Op.getArg(0);

5645 break;

5646 }

5647 }

5648

5649 DIBuilder DIB(*AI.getModule(), false);

5650 for (auto Fragment : Fragments) {

5651 int64_t OffsetFromLocationInBits;

5652 std::optionalDIExpression::FragmentInfo NewDbgFragment;

5653

5654

5655

5657 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,

5658 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,

5659 NewDbgFragment, OffsetFromLocationInBits))

5660 continue;

5661

5662

5663

5664

5665 if (NewDbgFragment && !NewDbgFragment->SizeInBits)

5666 continue;

5667

5668

5669

5670 if (!NewDbgFragment)

5671 NewDbgFragment = DbgVariable->getFragment();

5672

5673

5674

5675 int64_t OffestFromNewAllocaInBits =

5676 OffsetFromLocationInBits - ExtractOffsetInBits;

5677

5678

5679 int64_t BitExtractOffset =

5680 std::min<int64_t>(0, OffestFromNewAllocaInBits);

5681

5682

5683

5684

5685 OffestFromNewAllocaInBits =

5686 std::max(int64_t(0), OffestFromNewAllocaInBits);

5687

5688

5689

5690

5691

5692 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);

5693 if (OffestFromNewAllocaInBits > 0) {

5694 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;

5696 }

5697

5698

5699

5700 auto RemoveOne = [DbgVariable](auto *OldDII) {

5701 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {

5702 return LHS->getVariable() == RHS->getVariable() &&

5703 LHS->getDebugLoc()->getInlinedAt() ==

5704 RHS->getDebugLoc()->getInlinedAt();

5705 };

5706 if (SameVariableFragment(OldDII, DbgVariable))

5707 OldDII->eraseFromParent();

5708 };

5711 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,

5712 NewDbgFragment, BitExtractOffset);

5713 }

5714 };

5715

5716

5717

5721

5723}

5724

5725

5726void SROA::clobberUse(Use &U) {

5728

5730

5731

5732

5733

5736 DeadInsts.push_back(OldI);

5737 }

5738}

5739

5740

5742public:

5749

5753

5754private:

5755 Type *ZeroType;

5756};

5757

5758bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {

5759

5760

5761

5762

5763 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");

5764 bool AllSameAndValid = true;

5765 Type *PartitionType = nullptr;

5767 uint64_t BeginOffset = 0;

5768 uint64_t EndOffset = 0;

5769

5770 auto Flush = [&]() {

5771 if (AllSameAndValid && !Insts.empty()) {

5772 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "

5773 << EndOffset << ")\n");

5775 SSAUpdater SSA(&NewPHIs);

5777 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);

5778 Promoter.run(Insts);

5779 }

5780 AllSameAndValid = true;

5781 PartitionType = nullptr;

5783 };

5784

5785 for (Slice &S : AS) {

5789 dbgs() << "Ignoring slice: ";

5790 AS.print(dbgs(), &S);

5791 });

5792 continue;

5793 }

5794 if (S.beginOffset() >= EndOffset) {

5795 Flush();

5796 BeginOffset = S.beginOffset();

5797 EndOffset = S.endOffset();

5798 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {

5799 if (AllSameAndValid) {

5801 dbgs() << "Slice does not match range [" << BeginOffset << ", "

5802 << EndOffset << ")";

5803 AS.print(dbgs(), &S);

5804 });

5805 AllSameAndValid = false;

5806 }

5807 EndOffset = std::max(EndOffset, S.endOffset());

5808 continue;

5809 }

5810

5813

5814 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))

5815 AllSameAndValid = false;

5816 PartitionType = UserTy;

5819 Type *UserTy = SI->getValueOperand()->getType();

5820 if (SI->isSimple() || (PartitionType && UserTy != PartitionType))

5821 AllSameAndValid = false;

5822 PartitionType = UserTy;

5824 } else {

5825 AllSameAndValid = false;

5826 }

5827 }

5828

5829 Flush();

5830 return true;

5831}

5832

5833

5834

5835

5836

5837

5838std::pair<bool , bool >

5839SROA::runOnAlloca(AllocaInst &AI) {

5841 bool CFGChanged = false;

5842

5843 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");

5844 ++NumAllocasAnalyzed;

5845

5846

5847 if (AI.use_empty()) {

5848 AI.eraseFromParent();

5850 return {Changed, CFGChanged};

5851 }

5852 const DataLayout &DL = AI.getDataLayout();

5853

5854

5855 auto *AT = AI.getAllocatedType();

5856 TypeSize Size = DL.getTypeAllocSize(AT);

5857 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||

5858 Size.getFixedValue() == 0)

5859 return {Changed, CFGChanged};

5860

5861

5862

5863 IRBuilderTy IRB(&AI);

5864 AggLoadStoreRewriter AggRewriter(DL, IRB);

5865 Changed |= AggRewriter.rewrite(AI);

5866

5867

5868 AllocaSlices AS(DL, AI);

5870 if (AS.isEscaped())

5871 return {Changed, CFGChanged};

5872

5873 if (AS.isEscapedReadOnly()) {

5874 Changed |= propagateStoredValuesToLoads(AI, AS);

5875 return {Changed, CFGChanged};

5876 }

5877

5878

5879 for (Instruction *DeadUser : AS.getDeadUsers()) {

5880

5881 for (Use &DeadOp : DeadUser->operands())

5882 clobberUse(DeadOp);

5883

5884

5885 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));

5886

5887

5888 DeadInsts.push_back(DeadUser);

5890 }

5891 for (Use *DeadOp : AS.getDeadOperands()) {

5892 clobberUse(*DeadOp);

5894 }

5895

5896

5897 if (AS.begin() == AS.end())

5898 return {Changed, CFGChanged};

5899

5900 Changed |= splitAlloca(AI, AS);

5901

5903 while (!SpeculatablePHIs.empty())

5905

5907 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();

5908 while (!RemainingSelectsToRewrite.empty()) {

5909 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();

5910 CFGChanged |=

5912 }

5913

5914 return {Changed, CFGChanged};

5915}

5916

5917

5918

5919

5920

5921

5922

5923

5924

5925

5926bool SROA::deleteDeadInstructions(

5927 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {

5929 while (!DeadInsts.empty()) {

5931 if (I)

5932 continue;

5933 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");

5934

5935

5936

5937

5939 DeletedAllocas.insert(AI);

5941 OldDII->eraseFromParent();

5942 }

5943

5946

5947 for (Use &Operand : I->operands())

5949

5950 Operand = nullptr;

5952 DeadInsts.push_back(U);

5953 }

5954

5955 ++NumDeleted;

5956 I->eraseFromParent();

5958 }

5960}

5961

5962

5963

5964

5965

5966bool SROA::promoteAllocas() {

5967 if (PromotableAllocas.empty())

5968 return false;

5969

5971 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");

5972 } else {

5973 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");

5974 NumPromoted += PromotableAllocas.size();

5975 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);

5976 }

5977

5978 PromotableAllocas.clear();

5979 return true;

5980}

5981

5982std::pair<bool , bool > SROA::runSROA(Function &F) {

5983 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");

5984

5985 const DataLayout &DL = F.getDataLayout();

5986 BasicBlock &EntryBB = F.getEntryBlock();

5988 I != E; ++I) {

5990 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&

5992 PromotableAllocas.insert(AI);

5993 else

5994 Worklist.insert(AI);

5995 }

5996 }

5997

5999 bool CFGChanged = false;

6000

6001

6002 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;

6003

6004 do {

6005 while (!Worklist.empty()) {

6006 auto [IterationChanged, IterationCFGChanged] =

6007 runOnAlloca(*Worklist.pop_back_val());

6008 Changed |= IterationChanged;

6009 CFGChanged |= IterationCFGChanged;

6010

6011 Changed |= deleteDeadInstructions(DeletedAllocas);

6012

6013

6014

6015 if (!DeletedAllocas.empty()) {

6016 Worklist.set_subtract(DeletedAllocas);

6017 PostPromotionWorklist.set_subtract(DeletedAllocas);

6018 PromotableAllocas.set_subtract(DeletedAllocas);

6019 DeletedAllocas.clear();

6020 }

6021 }

6022

6023 Changed |= promoteAllocas();

6024

6025 Worklist = PostPromotionWorklist;

6026 PostPromotionWorklist.clear();

6027 } while (!Worklist.empty());

6028

6029 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");

6031 "Should not have modified the CFG when told to preserve it.");

6032

6034 for (auto &BB : F) {

6036 }

6037 }

6038

6039 return {Changed, CFGChanged};

6040}

6041

6045 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

6046 auto [Changed, CFGChanged] =

6047 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);

6051 if (!CFGChanged)

6054 return PA;

6055}

6056

6060 OS, MapClassName2PassName);

6062 : "");

6063}

6064

6066

6067namespace {

6068

6069

6072

6073public:

6074 static char ID;

6075

6079 }

6080

6082 if (skipFunction(F))

6083 return false;

6084

6085 DominatorTree &DT = getAnalysis().getDomTree();

6087 getAnalysis().getAssumptionCache(F);

6088 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

6090 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);

6092 }

6093

6094 void getAnalysisUsage(AnalysisUsage &AU) const override {

6095 AU.addRequired();

6096 AU.addRequired();

6098 AU.addPreserved();

6099 }

6100

6101 StringRef getPassName() const override { return "SROA"; }

6102};

6103

6104}

6105

6106char SROALegacyPass::ID = 0;

6107

6112

6114 "Scalar Replacement Of Aggregates", false, false)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

static bool runOnFunction(Function &F, bool PostInlining)

This is the interface for a simple mod/ref and alias analysis over globals.

Module.h This file contains the declarations for the Module class.

This header defines various interfaces for pass management in LLVM.

This defines the Use class.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

print mir2vec MIR2Vec Vocabulary Printer Pass

This file implements a map that provides insertion order iteration.

uint64_t IntrinsicInst * II

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the PointerIntPair class.

This file provides a collection of visitors which walk the (instruction) uses of a pointer.

const SmallVectorImpl< MachineOperand > & Cond

Remove Loads Into Fake Uses

static unsigned getNumElements(Type *Ty)

bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)

Find linked dbg.assign and generate a new one with the correct FragmentInfo.

Definition SROA.cpp:342

static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)

Test whether the given alloca partitioning and range of slices can be promoted to a vector.

Definition SROA.cpp:2335

static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)

Compute the adjusted alignment for a load or store from an offset.

Definition SROA.cpp:1920

static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)

Test whether any vector type in CandidateTys is viable for promotion.

Definition SROA.cpp:2186

static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)

Walk the range of a partitioning looking for a common type to cover this sequence of slices.

Definition SROA.cpp:1486

static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)

Strip aggregate type wrapping.

Definition SROA.cpp:4539

static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)

Definition SROA.cpp:277

static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)

Create or replace an existing fragment in a DIExpression with Frag.

Definition SROA.cpp:5406

static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)

Definition SROA.cpp:2578

static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)

Test whether the given slice use can be promoted to a vector.

Definition SROA.cpp:2111

static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)

Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.

Definition SROA.cpp:1909

static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)

Test whether a slice of an alloca is valid for integer widening.

Definition SROA.cpp:2417

static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)

Definition SROA.cpp:2611

static Value * foldPHINodeOrSelectInst(Instruction &I)

A helper that folds a PHI node or a select.

Definition SROA.cpp:1005

static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)

Definition SROA.cpp:1875

static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)

Definition SROA.cpp:1808

static Value * foldSelectInst(SelectInst &SI)

Definition SROA.cpp:992

bool isKillAddress(const DbgVariableRecord *DVR)

Definition SROA.cpp:5369

static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)

Definition SROA.cpp:2633

static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)

Test whether the given alloca partition's integer operations can be widened to promotable ones.

Definition SROA.cpp:2512

static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)

Definition SROA.cpp:1626

static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)

Definition SROA.cpp:2291

static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)

Definition SROA.cpp:323

static bool isSafePHIToSpeculate(PHINode &PN)

PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...

Definition SROA.cpp:1552

static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)

Definition SROA.cpp:2553

static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)

Insert a new DbgRecord.

Definition SROA.cpp:5471

static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)

Definition SROA.cpp:1769

static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)

This function takes two vector values and combines them into a single vector by concatenating their e...

Definition SROA.cpp:2707

const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)

Definition SROA.cpp:5375

static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)

Try to find a partition of the aggregate type passed in for a given offset and size.

Definition SROA.cpp:4577

static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)

Test whether we can convert a value from the old to the new type.

Definition SROA.cpp:1930

static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)

Definition SROA.cpp:1707

static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)

Generic routine to convert an SSA value to a value of a different type.

Definition SROA.cpp:2020

This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.

This file implements a set that has insertion order iteration characteristics.

This file implements the SmallBitVector class.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static SymbolRef::Type getType(const Symbol *Sym)

static unsigned getBitWidth(Type *Ty, const DataLayout &DL)

Returns the bitwidth of the given scalar or pointer type.

Virtual Register Rewriter

Builder for the alloca slices.

Definition SROA.cpp:1017

SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)

Definition SROA.cpp:1033

An iterator over partitions of the alloca's slices.

Definition SROA.cpp:805

bool operator==(const partition_iterator &RHS) const

Definition SROA.cpp:952

friend class AllocaSlices

Definition SROA.cpp:806

partition_iterator & operator++()

Definition SROA.cpp:972

Partition & operator*()

Definition SROA.cpp:977

Class for arbitrary precision integers.

an instruction to allocate memory on the stack

LLVM_ABI bool isStaticAlloca() const

Return true if this alloca is in the entry block of the function and is a constant size.

Align getAlign() const

Return the alignment of the memory that is being allocated by the instruction.

PointerType * getType() const

Overload to return most specific pointer type.

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Represents analyses that only rely on functions' control flow.

LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const

Return which pointer components this operand may capture.

bool onlyReadsMemory(unsigned OpNo) const

bool isDataOperand(const Use *U) const

This is the shared class of boolean and integer constants.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static DIAssignID * getDistinct(LLVMContext &Context)

LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)

Insert a new llvm.dbg.assign intrinsic call.

iterator_range< expr_op_iterator > expr_ops() const

DbgVariableFragmentInfo FragmentInfo

LLVM_ABI bool startsWithDeref() const

Return whether the first element a DW_OP_deref.

static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)

Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...

static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)

Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...

static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)

Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...

A parsed version of the target data layout string in and methods for querying it.

LLVM_ABI void moveBefore(DbgRecord *MoveBefore)

DebugLoc getDebugLoc() const

void setDebugLoc(DebugLoc Loc)

Record of a variable value-assignment, aka a non instruction representation of the dbg....

LLVM_ABI void setKillAddress()

Kill the address component.

LLVM_ABI bool isKillLocation() const

LocationType getType() const

LLVM_ABI bool isKillAddress() const

Check whether this kills the address component.

LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)

Value * getValue(unsigned OpIdx=0) const

static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)

LLVM_ABI void setAssignId(DIAssignID *New)

DIExpression * getExpression() const

static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)

static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)

DILocalVariable * getVariable() const

LLVM_ABI void setKillLocation()

bool isDbgDeclare() const

void setAddress(Value *V)

DIExpression * getAddressExpression() const

LLVM_ABI DILocation * getInlinedAt() const

Identifies a unique instance of a variable.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Class to represent fixed width SIMD vectors.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

unsigned getVScaleValue() const

Return the value for vscale based on the vscale_range attribute or 0 when unknown.

const BasicBlock & getEntryBlock() const

LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const

Accumulate the constant address offset of this GEP if possible.

Value * getPointerOperand()

iterator_range< op_iterator > indices()

Type * getSourceElementType() const

LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const

Get the nowrap flags for the GEP instruction.

This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...

virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Base class for instruction visitors.

LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY

Return the number of successors that this instruction has.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

LLVM_ABI void setAAMetadata(const AAMDNodes &N)

Sets the AA metadata on this instruction from the AAMDNodes structure.

bool hasMetadata() const

Return true if this instruction has any metadata attached to it.

LLVM_ABI bool isAtomic() const LLVM_READONLY

Return true if this instruction has an AtomicOrdering of unordered or higher.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Instruction * user_back()

Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY

Return true if the instruction may have side effects.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

LLVM_ABI AAMDNodes getAAMetadata() const

Returns the AA metadata for this instruction.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

Class to represent integer types.

@ MAX_INT_BITS

Maximum number of bits that can be specified.

unsigned getBitWidth() const

Get the number of bits in this IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

An instruction for reading from memory.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

void setAlignment(Align Align)

Value * getPointerOperand()

bool isVolatile() const

Return true if this is a load from a volatile memory location.

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this load instruction.

AtomicOrdering getOrdering() const

Returns the ordering constraint of this load instruction.

Type * getPointerOperandType() const

static unsigned getPointerOperandIndex()

SyncScope::ID getSyncScopeID() const

Returns the synchronization scope ID of this load instruction.

Align getAlign() const

Return the alignment of the access that is being performed.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

LLVMContext & getContext() const

LLVM_ABI StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

This is the common base class for memset/memcpy/memmove.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

op_range incoming_values()

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

int getBasicBlockIndex(const BasicBlock *BB) const

Return the first index of the specified basic block in the value list for this PHI.

unsigned getNumIncomingValues() const

Return the number of incoming edges.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

PointerIntPair - This class implements a pair of a pointer and small integer.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

PtrUseVisitor(const DataLayout &DL)

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Run the pass over the function.

Definition SROA.cpp:6042

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

Definition SROA.cpp:6057

SROAPass(SROAOptions PreserveCFG)

If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...

Definition SROA.cpp:6065

Helper class for SSA formation on a set of values defined in multiple blocks.

This class represents the LLVM 'select' instruction.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

void clear()

Completely clear the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

typename SuperClass::const_iterator const_iterator

typename SuperClass::iterator iterator

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

void setAlignment(Align Align)

Value * getValueOperand()

static unsigned getPointerOperandIndex()

Value * getPointerOperand()

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this store instruction.

StringRef - Represent a constant reference to a string, i.e.

static constexpr size_t npos

constexpr StringRef substr(size_t Start, size_t N=npos) const

Return a reference to the substring from [Start, Start + N).

size_t rfind(char C, size_t From=npos) const

Search for the last character C in the string.

size_t find(char C, size_t From=0) const

Search for the first character C in the string.

LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const

Find the first character in the string that is not C or npos if not found.

Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...

TypeSize getSizeInBytes() const

LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const

Given a valid byte offset into the structure, returns the structure index that contains it.

TypeSize getElementOffset(unsigned Idx) const

TypeSize getSizeInBits() const

Class to represent struct types.

static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)

This static method is the primary way to create a literal StructType.

element_iterator element_end() const

element_iterator element_begin() const

Type * getElementType(unsigned N) const

Type::subtype_iterator element_iterator

Target - Wrapper for Target specific information.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

static constexpr TypeSize getFixed(ScalarTy ExactSize)

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getIntegerBitWidth() const

bool isArrayTy() const

True if this is an instance of ArrayType.

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

bool isPointerTy() const

True if this is an instance of PointerType.

Type * getArrayElementType() const

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

bool isSingleValueType() const

Return true if the type is a valid type for a register in codegen.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

bool isStructTy() const

True if this is an instance of StructType.

bool isTargetExtTy() const

Return true if this is a target extension type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const

Strip off pointer casts and inbounds GEPs.

iterator_range< user_iterator > users()

LLVM_ABI void dropDroppableUsesIn(User &Usr)

Remove every use of this value in User that can safely be removed.

LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const

Accumulate the constant offset this value has compared to a base pointer.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

iterator_range< use_iterator > uses()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)

This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...

static LLVM_ABI bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

constexpr ScalarTy getFixedValue() const

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr bool isFixed() const

Returns true if the quantity is not scaled by vscale.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

self_iterator getIterator()

CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...

A range adaptor for a pair of iterators.

This class implements an extremely fast bulk output stream that can only output to a stream.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr char IsVolatile[]

Key for Kernel::Arg::Metadata::mIsVolatile.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Tail

Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

Offsets

Offsets in bytes from the start of the input buffer.

SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)

Return a range of dbg_assign records for which Inst performs the assignment they encode.

LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)

Delete the llvm.dbg.assign intrinsics linked to Inst.

initializer< Ty > init(const Ty &Val)

@ DW_OP_LLVM_extract_bits_zext

Only used in LLVM metadata.

@ DW_OP_LLVM_fragment

Only used in LLVM metadata.

@ DW_OP_LLVM_extract_bits_sext

Only used in LLVM metadata.

@ User

could "use" a pointer

NodeAddr< PhiNode * > Phi

NodeAddr< UseNode * > Use

Context & getContext() const

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI iterator begin() const

This is an optimization pass for GlobalISel generic memory operations.

static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)

Disable running mem2reg during SROA in order to test or debug SROA.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

bool operator<(int64_t V1, const APSInt &V2)

FunctionAddr VTableAddr Value

void stable_sort(R &&Range)

LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)

Try to remove redundant dbg.value instructions from given basic block.

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)

Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...

LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)

Return true if it is an intrinsic that cannot be speculated but also cannot trap.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

auto successors(const MachineBasicBlock *BB)

bool operator!=(uint64_t V1, const APInt &V2)

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

auto unique(Range &&R, Predicate P)

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)

Return true if this alloca is legal for promotion.

auto dyn_cast_or_null(const Y &Val)

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)

Return true if the result produced by the instruction is not used, and the instruction will return.

bool capturesFullProvenance(CaptureComponents CC)

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)

SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...

LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)

Return true if we know that executing a load from this value cannot trap.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)

As above, for DVRValues.

LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)

This function calls abort(), and prints the optional message to stderr.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

constexpr int PoisonMaskElem

iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)

Return true if assignment tracking is enabled for module M.

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)

Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))

LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)

Split the containing block at the specified instruction - everything before SplitBefore stays in the ...

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)

Definition SROA.cpp:6108

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

A collection of metadata nodes that might be associated with a memory access used by the alias-analys...

AAMDNodes shift(size_t Offset) const

Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...

LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)

Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.

This struct is a compact representation of a valid (non-zero power of two) alignment.

Describes an element of a Bitfield.

static Bitfield::Type get(StorageType Packed)

Unpacks the field from the Packed value.

static void set(StorageType &Packed, typename Bitfield::Type Value)

Sets the typed value in the provided Packed value.

A CRTP mix-in to automatically provide informational APIs needed for passes.