LLVM: lib/Transforms/Scalar/SROA.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

47#include "llvm/Config/llvm-config.h"

89#include

90#include

91#include

92#include

93#include

94#include

95#include

96#include

97#include

98#include

99#include

100#include

101

102using namespace llvm;

103

104#define DEBUG_TYPE "sroa"

105

106STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");

107STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");

108STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");

109STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");

110STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");

111STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");

112STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");

113STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");

115 "Number of loads rewritten into predicated loads to allow promotion");

117 NumStoresPredicated,

118 "Number of stores rewritten into predicated loads to allow promotion");

119STATISTIC(NumDeleted, "Number of instructions deleted");

120STATISTIC(NumVectorized, "Number of vectorized aggregates");

121

122namespace llvm {

123

127}

128

129namespace {

130

131class AllocaSliceRewriter;

132class AllocaSlices;

133class Partition;

134

135class SelectHandSpeculativity {

136 unsigned char Storage = 0;

139public:

140 SelectHandSpeculativity() = default;

141 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);

142 bool isSpeculatable(bool isTrueVal) const;

143 bool areAllSpeculatable() const;

144 bool areAnySpeculatable() const;

145 bool areNoneSpeculatable() const;

146

147 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }

148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}

149};

150static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));

151

152using PossiblySpeculatableLoad =

154using UnspeculatableStore = StoreInst *;

155using RewriteableMemOp =

156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177class SROA {

178 LLVMContext *const C;

179 DomTreeUpdater *const DTU;

180 AssumptionCache *const AC;

181 const bool PreserveCFG;

182

183

184

185

186

187

188

189

190 SmallSetVector<AllocaInst *, 16> Worklist;

191

192

193

194

196

197

198

199

200

201

202

203

204

205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;

206

207

208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,

209 SmallPtrSet<AllocaInst *, 16>, 16>

210 PromotableAllocas;

211

212

213

214

215

216

217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;

218

219

220

221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239 static std::optional

240 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);

241

242public:

243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,

245 : C(C), DTU(DTU), AC(AC),

246 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}

247

248

249 std::pair<bool , bool > runSROA(Function &F);

250

251private:

252 friend class AllocaSliceRewriter;

253

254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);

255 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);

256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);

257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);

258 std::pair<bool , bool > runOnAlloca(AllocaInst &AI);

259 void clobberUse(Use &U);

260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);

261 bool promoteAllocas();

262};

263

264}

265

266

267

268

269

270

271

272

273

274namespace {

275enum FragCalcResult { UseFrag, UseNoFrag, Skip };

276}

277static FragCalcResult

279 uint64_t NewStorageSliceOffsetInBits,

280 uint64_t NewStorageSliceSizeInBits,

281 std::optionalDIExpression::FragmentInfo StorageFragment,

282 std::optionalDIExpression::FragmentInfo CurrentFragment,

284

285

286 if (StorageFragment) {

288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);

289 Target.OffsetInBits =

290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;

291 } else {

292 Target.SizeInBits = NewStorageSliceSizeInBits;

293 Target.OffsetInBits = NewStorageSliceOffsetInBits;

294 }

295

296

297

298

299 if (!CurrentFragment) {

300 if (auto Size = Variable->getSizeInBits()) {

301

303 if (Target == CurrentFragment)

304 return UseNoFrag;

305 }

306 }

307

308

309

310 if (!CurrentFragment || *CurrentFragment == Target)

311 return UseFrag;

312

313

314

315

316 if (Target.startInBits() < CurrentFragment->startInBits() ||

317 Target.endInBits() > CurrentFragment->endInBits())

318 return Skip;

319

320

321 return UseFrag;

322}

323

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

344 uint64_t OldAllocaOffsetInBits,

348

349

350

351

353

355

356 if (DVRAssignMarkerRange.empty())

357 return;

358

360 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");

361 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");

362 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits

363 << "\n");

364 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");

365 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");

370

371

373 BaseFragments;

376 DVR->getExpression()->getFragmentInfo();

377

378

379

385

387 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign

388 << "\n");

389 auto *Expr = DbgAssign->getExpression();

390 bool SetKillLocation = false;

391

392 if (IsSplit) {

393 std::optionalDIExpression::FragmentInfo BaseFragment;

394 {

396 if (R == BaseFragments.end())

397 return;

398 BaseFragment = R->second;

399 }

400 std::optionalDIExpression::FragmentInfo CurrentFragment =

401 Expr->getFragmentInfo();

404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,

405 BaseFragment, CurrentFragment, NewFragment);

406

407 if (Result == Skip)

408 return;

409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {

410 if (CurrentFragment) {

411

412

413

414

415 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;

416 }

417

420 Expr = *E;

421 } else {

422

423

424

428 SetKillLocation = true;

429 }

430 }

431 }

432

433

434 if (!NewID) {

436 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);

437 }

438

440 if (IsSplit) {

443 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,

445 DbgAssign->getDebugLoc())));

446 } else {

447

448 NewAssign = DbgAssign;

449 NewAssign->setAssignId(NewID);

454 }

455

456

457

458

459

460

461

462

463

464

465

466 SetKillLocation |=

467 Value && (DbgAssign->hasArgList() ||

468 !DbgAssign->getExpression()->isSingleLocationExpression());

469 if (SetKillLocation)

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485 if (NewAssign != DbgAssign) {

486 NewAssign->moveBefore(DbgAssign->getIterator());

487 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());

488 }

489 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");

490 };

491

492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);

493}

494

495namespace {

496

497

498

500 std::string Prefix;

501

502 Twine getNameWithPrefix(const Twine &Name) const {

503 return Name.isTriviallyEmpty() ? Name : Prefix + Name;

504 }

505

506public:

507 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }

508

509 void InsertHelper(Instruction *I, const Twine &Name,

512 InsertPt);

513 }

514};

515

516

518

519

520

521

522

523

524

525class Slice {

526

527 uint64_t BeginOffset = 0;

528

529

530 uint64_t EndOffset = 0;

531

532

533

534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;

535

536public:

537 Slice() = default;

538

539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable,

540 Value *ProtectedFieldDisc)

541 : BeginOffset(BeginOffset), EndOffset(EndOffset),

542 UseAndIsSplittable(U, IsSplittable),

543 ProtectedFieldDisc(ProtectedFieldDisc) {}

544

545 uint64_t beginOffset() const { return BeginOffset; }

546 uint64_t endOffset() const { return EndOffset; }

547

548 bool isSplittable() const { return UseAndIsSplittable.getInt(); }

549 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }

550

551 Use *getUse() const { return UseAndIsSplittable.getPointer(); }

552

553 bool isDead() const { return getUse() == nullptr; }

554 void kill() { UseAndIsSplittable.setPointer(nullptr); }

555

556

557

558 Value *ProtectedFieldDisc;

559

560

561

562

563

564

565

567 if (beginOffset() < RHS.beginOffset())

568 return true;

569 if (beginOffset() > RHS.beginOffset())

570 return false;

571 if (isSplittable() != RHS.isSplittable())

572 return !isSplittable();

573 if (endOffset() > RHS.endOffset())

574 return true;

575 return false;

576 }

577

578

579 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {

580 return LHS.beginOffset() < RHSOffset;

581 }

582 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {

583 return LHSOffset < RHS.beginOffset();

584 }

585

587 return isSplittable() == RHS.isSplittable() &&

588 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();

589 }

591};

592

593

594

595

596

597

598

599

600class AllocaSlices {

601public:

602

603 AllocaSlices(const DataLayout &DL, AllocaInst &AI);

604

605

606

607

608

609 bool isEscaped() const { return PointerEscapingInstr; }

610 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }

611

612

613

615 using range = iterator_range;

616

617 iterator begin() { return Slices.begin(); }

618 iterator end() { return Slices.end(); }

619

621 using const_range = iterator_range<const_iterator>;

622

623 const_iterator begin() const { return Slices.begin(); }

624 const_iterator end() const { return Slices.end(); }

625

626

627

628 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }

629

630

631

632

633

634

636 int OldSize = Slices.size();

637 Slices.append(NewSlices.begin(), NewSlices.end());

638 auto SliceI = Slices.begin() + OldSize;

639 std::stable_sort(SliceI, Slices.end());

640 std::inplace_merge(Slices.begin(), SliceI, Slices.end());

641 }

642

643

644

647

648

650

651

652

654

655

657 return DeadUseIfPromotable;

658 }

659

660

661

662

663

664

665

666 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }

667

668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

669 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;

670 void printSlice(raw_ostream &OS, const_iterator I,

671 StringRef Indent = " ") const;

672 void printUse(raw_ostream &OS, const_iterator I,

673 StringRef Indent = " ") const;

674 void print(raw_ostream &OS) const;

675 void dump(const_iterator I) const;

676 void dump() const;

677#endif

678

679private:

680 template <typename DerivedT, typename RetT = void> class BuilderBase;

682

683 friend class AllocaSlices::SliceBuilder;

684

685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

686

687 AllocaInst &AI;

688#endif

689

690

691

692

693

694

695

697 Instruction *PointerEscapingInstrReadOnly;

698

699

700

701

702

703

704

706

707

708

709

710

711

712

713 SmallVector<Instruction *, 8> DeadUsers;

714

715

716

718

719

721

722

723

724

725

726

727

728

729

731};

732

733

734

735

736

737

738

739

740

741

742class Partition {

743private:

744 friend class AllocaSlices;

745 friend class AllocaSlices::partition_iterator;

746

747 using iterator = AllocaSlices::iterator;

748

749

750

751 uint64_t BeginOffset = 0, EndOffset = 0;

752

753

754 iterator SI, SJ;

755

756

758

759

760

761 Partition(iterator SI) : SI(SI), SJ(SI) {}

762

763public:

764

765

766

767 uint64_t beginOffset() const { return BeginOffset; }

768

769

770

771

772 uint64_t endOffset() const { return EndOffset; }

773

774

775

776

777 uint64_t size() const {

778 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");

779 return EndOffset - BeginOffset;

780 }

781

782

783

784 bool empty() const { return SI == SJ; }

785

786

787

788

789

790

791

792

793

794

795 iterator begin() const { return SI; }

796 iterator end() const { return SJ; }

797

798

799

800

801

802

803

805};

806

807}

808

809

810

811

812

813

814

815

816

817

820 Partition> {

822

823

824

825 Partition P;

826

827

828 AllocaSlices::iterator SE;

829

830

831

832 uint64_t MaxSplitSliceEndOffset = 0;

833

834

835

836 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)

837 : P(SI), SE(SE) {

838

839

840 if (SI != SE)

841 advance();

842 }

843

844

845

846

847 void advance() {

848 assert((P.SI != SE || P.SplitTails.empty()) &&

849 "Cannot advance past the end of the slices!");

850

851

852 if (P.SplitTails.empty()) {

853 if (P.EndOffset >= MaxSplitSliceEndOffset) {

854

855 P.SplitTails.clear();

856 MaxSplitSliceEndOffset = 0;

857 } else {

858

859

860

862 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });

864 [&](Slice *S) {

865 return S->endOffset() == MaxSplitSliceEndOffset;

866 }) &&

867 "Could not find the current max split slice offset!");

869 [&](Slice *S) {

870 return S->endOffset() <= MaxSplitSliceEndOffset;

871 }) &&

872 "Max split slice end offset is not actually the max!");

873 }

874 }

875

876

877

878 if (P.SI == SE) {

879 assert(P.SplitTails.empty() && "Failed to clear the split slices!");

880 return;

881 }

882

883

884

885 if (P.SI != P.SJ) {

886

887

888 for (Slice &S : P)

889 if (S.isSplittable() && S.endOffset() > P.EndOffset) {

890 P.SplitTails.push_back(&S);

891 MaxSplitSliceEndOffset =

892 std::max(S.endOffset(), MaxSplitSliceEndOffset);

893 }

894

895

896 P.SI = P.SJ;

897

898

899 if (P.SI == SE) {

900 P.BeginOffset = P.EndOffset;

901 P.EndOffset = MaxSplitSliceEndOffset;

902 return;

903 }

904

905

906

907

908 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&

909 !P.SI->isSplittable()) {

910 P.BeginOffset = P.EndOffset;

911 P.EndOffset = P.SI->beginOffset();

912 return;

913 }

914 }

915

916

917

918

919

920

921 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;

922 P.EndOffset = P.SI->endOffset();

923 ++P.SJ;

924

925

926

927 if (!P.SI->isSplittable()) {

928

929

930 assert(P.BeginOffset == P.SI->beginOffset());

931

932

933

934 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {

935 if (!P.SJ->isSplittable())

936 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());

937 ++P.SJ;

938 }

939

940

941

942 return;

943 }

944

945

946

947

948 assert(P.SI->isSplittable() && "Forming a splittable partition!");

949

950

951 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&

952 P.SJ->isSplittable()) {

953 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());

954 ++P.SJ;

955 }

956

957

958

959

960 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {

961 assert(!P.SJ->isSplittable());

962 P.EndOffset = P.SJ->beginOffset();

963 }

964 }

965

966public:

967 bool operator==(const partition_iterator &RHS) const {

968 assert(SE == RHS.SE &&

969 "End iterators don't match between compared partition iterators!");

970

971

972

973

974

975

976 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {

977 assert(P.SJ == RHS.P.SJ &&

978 "Same set of slices formed two different sized partitions!");

979 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&

980 "Same slice position with differently sized non-empty split "

981 "slice tails!");

982 return true;

983 }

984 return false;

985 }

986

988 advance();

989 return *this;

990 }

991

993};

994

995

996

997

998

999

1000

1001

1003 return make_range(partition_iterator(begin(), end()),

1004 partition_iterator(end(), end()));

1005}

1006

1008

1009

1010

1012 return SI.getOperand(1 + CI->isZero());

1013 if (SI.getOperand(1) == SI.getOperand(2))

1014 return SI.getOperand(1);

1015

1016 return nullptr;

1017}

1018

1019

1022

1023 return PN->hasConstantValue();

1024 }

1026}

1027

1028

1029

1030

1031

1035

1037

1039 AllocaSlices &AS;

1040

1043

1044

1046

1047

1048

1049 Value *ProtectedFieldDisc = nullptr;

1050

1051public:

1054 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),

1055 AS(AS) {}

1056

1057private:

1059 if (VisitedDeadInsts.insert(&I).second)

1061 }

1062

1064 bool IsSplittable = false) {

1065

1066

1067 if (Size == 0 || Offset.uge(AllocSize)) {

1070 << " which has zero size or starts outside of the "

1071 << AllocSize << " byte alloca:\n"

1072 << " alloca: " << AS.AI << "\n"

1073 << " use: " << I << "\n");

1074 return markAsDead(I);

1075 }

1076

1077 uint64_t BeginOffset = Offset.getZExtValue();

1078 uint64_t EndOffset = BeginOffset + Size;

1079

1080

1081

1082

1083

1084

1085

1086 assert(AllocSize >= BeginOffset);

1087 if (Size > AllocSize - BeginOffset) {

1088 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"

1089 << Offset << " to remain within the " << AllocSize

1090 << " byte alloca:\n"

1091 << " alloca: " << AS.AI << "\n"

1092 << " use: " << I << "\n");

1093 EndOffset = AllocSize;

1094 }

1095

1096 AS.Slices.push_back(

1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));

1098 }

1099

1100 void visitBitCastInst(BitCastInst &BC) {

1102 return markAsDead(BC);

1103

1104 return Base::visitBitCastInst(BC);

1105 }

1106

1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {

1109 return markAsDead(ASC);

1110

1111 return Base::visitAddrSpaceCastInst(ASC);

1112 }

1113

1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {

1116 return markAsDead(GEPI);

1117

1118 return Base::visitGetElementPtrInst(GEPI);

1119 }

1120

1121 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,

1122 uint64_t Size, bool IsVolatile) {

1123

1124

1125

1126 bool IsSplittable =

1128

1129 insertUse(I, Offset, Size, IsSplittable);

1130 }

1131

1132 void visitLoadInst(LoadInst &LI) {

1134 "All simple FCA loads should have been pre-split");

1135

1136

1137

1138 if (!IsOffsetKnown)

1139 return PI.setEscapedReadOnly(&LI);

1140

1141 TypeSize Size = DL.getTypeStoreSize(LI.getType());

1142 if (Size.isScalable()) {

1144 if (!VScale)

1145 return PI.setAborted(&LI);

1146

1148 }

1149

1150 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),

1152 }

1153

1154 void visitStoreInst(StoreInst &SI) {

1155 Value *ValOp = SI.getValueOperand();

1156 if (ValOp == *U)

1157 return PI.setEscapedAndAborted(&SI);

1158 if (!IsOffsetKnown)

1159 return PI.setAborted(&SI);

1160

1161 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());

1163 unsigned VScale = SI.getFunction()->getVScaleValue();

1164 if (!VScale)

1165 return PI.setAborted(&SI);

1166

1168 }

1169

1171

1172

1173

1174

1175

1176

1177

1178

1179 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {

1180 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"

1181 << Offset << " which extends past the end of the "

1182 << AllocSize << " byte alloca:\n"

1183 << " alloca: " << AS.AI << "\n"

1184 << " use: " << SI << "\n");

1185 return markAsDead(SI);

1186 }

1187

1189 "All simple FCA stores should have been pre-split");

1191 }

1192

1193 void visitMemSetInst(MemSetInst &II) {

1194 assert(II.getRawDest() == *U && "Pointer use is not the destination?");

1197 (IsOffsetKnown && Offset.uge(AllocSize)))

1198

1199 return markAsDead(II);

1200

1201 if (!IsOffsetKnown)

1202 return PI.setAborted(&II);

1203

1206 : AllocSize - Offset.getLimitedValue(),

1208 }

1209

1210 void visitMemTransferInst(MemTransferInst &II) {

1213

1214 return markAsDead(II);

1215

1216

1217

1218 if (VisitedDeadInsts.count(&II))

1219 return;

1220

1221 if (!IsOffsetKnown)

1222 return PI.setAborted(&II);

1223

1224

1225

1226

1227

1228

1229 if (Offset.uge(AllocSize)) {

1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =

1231 MemTransferSliceMap.find(&II);

1232 if (MTPI != MemTransferSliceMap.end())

1233 AS.Slices[MTPI->second].kill();

1234 return markAsDead(II);

1235 }

1236

1237 uint64_t RawOffset = Offset.getLimitedValue();

1238 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;

1239

1240

1241

1242 if (*U == II.getRawDest() && *U == II.getRawSource()) {

1243

1244 if (II.isVolatile())

1245 return markAsDead(II);

1246

1247 return insertUse(II, Offset, Size, false);

1248 }

1249

1250

1251

1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;

1254 std::tie(MTPI, Inserted) =

1255 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));

1256 unsigned PrevIdx = MTPI->second;

1257 if (!Inserted) {

1258 Slice &PrevP = AS.Slices[PrevIdx];

1259

1260

1261

1262 if (II.isVolatile() && PrevP.beginOffset() == RawOffset) {

1263 PrevP.kill();

1264 return markAsDead(II);

1265 }

1266

1267

1268

1269 PrevP.makeUnsplittable();

1270 }

1271

1272

1274

1275

1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&

1277 "Map index doesn't point back to a slice with this user.");

1278 }

1279

1280

1281

1282

1283 void visitIntrinsicInst(IntrinsicInst &II) {

1284 if (II.isDroppable()) {

1285 AS.DeadUseIfPromotable.push_back(U);

1286 return;

1287 }

1288

1289 if (!IsOffsetKnown)

1290 return PI.setAborted(&II);

1291

1292 if (II.isLifetimeStartOrEnd()) {

1293 insertUse(II, Offset, AllocSize, true);

1294 return;

1295 }

1296

1297 if (II.getIntrinsicID() == Intrinsic::protected_field_ptr) {

1298

1299

1300

1301 AS.PFPUsers.push_back(&II);

1302 ProtectedFieldDisc = II.getArgOperand(1);

1303 for (Use &U : II.uses()) {

1304 this->U = &U;

1306 visitLoadInst(*LI);

1308 visitStoreInst(*SI);

1309 else

1310 PI.setAborted(&II);

1311 if (PI.isAborted())

1312 break;

1313 }

1314 ProtectedFieldDisc = nullptr;

1315 return;

1316 }

1317

1318 Base::visitIntrinsicInst(II);

1319 }

1320

1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {

1322

1323

1324

1325

1326 SmallPtrSet<Instruction *, 4> Visited;

1328 Visited.insert(Root);

1331

1332

1334 do {

1336 std::tie(UsedI, I) = Uses.pop_back_val();

1337

1339 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());

1341 PI.setAborted(LI);

1342 return nullptr;

1343 }

1345 continue;

1346 }

1349 if (Op == UsedI)

1350 return SI;

1351 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());

1353 PI.setAborted(SI);

1354 return nullptr;

1355 }

1357 continue;

1358 }

1359

1361 if (GEP->hasAllZeroIndices())

1362 return GEP;

1365 return I;

1366 }

1367

1368 for (User *U : I->users())

1371 } while (Uses.empty());

1372

1373 return nullptr;

1374 }

1375

1376 void visitPHINodeOrSelectInst(Instruction &I) {

1378 if (I.use_empty())

1379 return markAsDead(I);

1380

1381

1382

1383

1385 I.getParent()->getFirstInsertionPt() == I.getParent()->end())

1386 return PI.setAborted(&I);

1387

1388

1389

1390

1391

1392

1393

1394

1395

1397 if (Result == *U)

1398

1399

1400 enqueueUsers(I);

1401 else

1402

1403

1404 AS.DeadOperands.push_back(U);

1405

1406 return;

1407 }

1408

1409 if (!IsOffsetKnown)

1410 return PI.setAborted(&I);

1411

1412

1413 uint64_t &Size = PHIOrSelectSizes[&I];

1414 if (Size) {

1415

1416 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))

1417 return PI.setAborted(UnsafeI);

1418 }

1419

1420

1421

1422

1423

1424

1425

1426 if (Offset.uge(AllocSize)) {

1427 AS.DeadOperands.push_back(U);

1428 return;

1429 }

1430

1432 }

1433

1434 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }

1435

1436 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }

1437

1438

1439 void visitInstruction(Instruction &I) { PI.setAborted(&I); }

1440

1441 void visitCallBase(CallBase &CB) {

1442

1443

1447 PI.setEscapedReadOnly(&CB);

1448 return;

1449 }

1450

1451 Base::visitCallBase(CB);

1452 }

1453};

1454

1455AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)

1456 :

1457#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1458 AI(AI),

1459#endif

1460 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {

1462 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);

1463 if (PtrI.isEscaped() || PtrI.isAborted()) {

1464

1465

1466 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()

1467 : PtrI.getAbortingInst();

1468 assert(PointerEscapingInstr && "Did not track a bad instruction");

1469 return;

1470 }

1471 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();

1472

1473 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });

1474

1475

1476

1478}

1479

1480#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

1481

1482void AllocaSlices::print(raw_ostream &OS, const_iterator I,

1483 StringRef Indent) const {

1484 printSlice(OS, I, Indent);

1485 OS << "\n";

1486 printUse(OS, I, Indent);

1487}

1488

1489void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,

1490 StringRef Indent) const {

1491 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"

1492 << " slice #" << (I - begin())

1493 << (I->isSplittable() ? " (splittable)" : "");

1494}

1495

1496void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,

1497 StringRef Indent) const {

1498 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";

1499}

1500

1501void AllocaSlices::print(raw_ostream &OS) const {

1502 if (PointerEscapingInstr) {

1503 OS << "Can't analyze slices for alloca: " << AI << "\n"

1504 << " A pointer to this alloca escaped by:\n"

1505 << " " << *PointerEscapingInstr << "\n";

1506 return;

1507 }

1508

1509 if (PointerEscapingInstrReadOnly)

1510 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";

1511

1512 OS << "Slices of alloca: " << AI << "\n";

1513 for (const_iterator I = begin(), E = end(); I != E; ++I)

1515}

1516

1517LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {

1519}

1521

1522#endif

1523

1524

1525

1526static std::pair<Type *, IntegerType *>

1527findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,

1529 Type *Ty = nullptr;

1530 bool TyIsCommon = true;

1532

1533

1534

1535 for (AllocaSlices::const_iterator I = B; I != E; ++I) {

1536 Use *U = I->getUse();

1538 continue;

1539 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)

1540 continue;

1541

1542 Type *UserTy = nullptr;

1546 UserTy = SI->getValueOperand()->getType();

1547 }

1548

1550

1551

1552

1553

1554 if (UserITy->getBitWidth() % 8 != 0 ||

1555 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))

1556 continue;

1557

1558

1559

1560 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())

1561 ITy = UserITy;

1562 }

1563

1564

1565

1566 if (!UserTy || (Ty && Ty != UserTy))

1567 TyIsCommon = false;

1568 else

1569 Ty = UserTy;

1570 }

1571

1572 return {TyIsCommon ? Ty : nullptr, ITy};

1573}

1574

1575

1576

1577

1578

1579

1580

1581

1582

1583

1584

1585

1586

1587

1588

1589

1590

1591

1592

1595

1596

1597

1598

1599

1603 Type *LoadType = nullptr;

1607 return false;

1608

1609

1610

1611

1613 return false;

1614

1615 if (LoadType) {

1616 if (LoadType != LI->getType())

1617 return false;

1618 } else {

1619 LoadType = LI->getType();

1620 }

1621

1622

1623

1625 if (BBI->mayWriteToMemory())

1626 return false;

1627

1628 MaxAlign = std::max(MaxAlign, LI->getAlign());

1629 }

1630

1631 if (!LoadType)

1632 return false;

1633

1634 APInt LoadSize =

1635 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());

1636

1637

1638

1639

1640 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {

1643

1644

1645

1646

1648 return false;

1649

1650

1651

1653 continue;

1654

1655

1656

1657

1659 continue;

1660

1661 return false;

1662 }

1663

1664 return true;

1665}

1666

1669

1672 IRB.SetInsertPoint(&PN);

1674 PN.getName() + ".sroa.speculated");

1675

1676

1677

1680

1681

1686 }

1687

1688

1690 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {

1693

1694

1695

1696

1697

1698 if (Value *V = InjectedLoads.lookup(Pred)) {

1700 continue;

1701 }

1702

1703 Instruction *TI = Pred->getTerminator();

1704 IRB.SetInsertPoint(TI);

1705

1706 LoadInst *Load = IRB.CreateAlignedLoad(

1707 LoadTy, InVal, Alignment,

1708 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));

1709 ++NumLoadsSpeculated;

1710 if (AATags)

1711 Load->setAAMetadata(AATags);

1713 InjectedLoads[Pred] = Load;

1714 }

1715

1716 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");

1718}

1719

1720SelectHandSpeculativity &

1721SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {

1722 if (isTrueVal)

1724 else

1726 return *this;

1727}

1728

1729bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {

1732}

1733

1734bool SelectHandSpeculativity::areAllSpeculatable() const {

1735 return isSpeculatable(true) &&

1736 isSpeculatable(false);

1737}

1738

1739bool SelectHandSpeculativity::areAnySpeculatable() const {

1740 return isSpeculatable(true) ||

1741 isSpeculatable(false);

1742}

1743bool SelectHandSpeculativity::areNoneSpeculatable() const {

1744 return !areAnySpeculatable();

1745}

1746

1747static SelectHandSpeculativity

1750 SelectHandSpeculativity Spec;

1751

1753 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})

1755 &LI))

1756 Spec.setAsSpeculatable(Value == SI.getTrueValue());

1758 return Spec;

1759

1760 return Spec;

1761}

1762

1763std::optional

1764SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {

1765 RewriteableMemOps Ops;

1766

1767 for (User *U : SI.users()) {

1770

1772

1773

1774

1776 return {};

1777 Ops.emplace_back(Store);

1778 continue;

1779 }

1780

1782

1783

1784

1786 return {};

1787

1788 PossiblySpeculatableLoad Load(LI);

1790

1791

1793 return {};

1794 Ops.emplace_back(Load);

1795 continue;

1796 }

1797

1798 SelectHandSpeculativity Spec =

1800 if (PreserveCFG && !Spec.areAllSpeculatable())

1801 return {};

1802

1803 Load.setInt(Spec);

1804 Ops.emplace_back(Load);

1805 }

1806

1807 return Ops;

1808}

1809

1811 IRBuilderTy &IRB) {

1813

1814 Value *TV = SI.getTrueValue();

1815 Value *FV = SI.getFalseValue();

1816

1817

1818 assert(LI.isSimple() && "We only speculate simple loads");

1819

1820 IRB.SetInsertPoint(&LI);

1821

1824 LI.getName() + ".sroa.speculate.load.true");

1827 LI.getName() + ".sroa.speculate.load.false");

1828 NumLoadsSpeculated += 2;

1829

1830

1833

1835 if (Tags) {

1838 }

1839

1840 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,

1841 LI.getName() + ".sroa.speculated",

1843

1844 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");

1846}

1847

1848template

1850 SelectHandSpeculativity Spec,

1853 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");

1857 if (Spec.areNoneSpeculatable())

1859 SI.getMetadata(LLVMContext::MD_prof), &DTU);

1860 else {

1862 SI.getMetadata(LLVMContext::MD_prof), &DTU,

1863 nullptr, nullptr);

1864 if (Spec.isSpeculatable(true))

1866 }

1868 Spec = {};

1870 Tail->setName(Head->getName() + ".cont");

1875 bool IsThen = SuccBB == HeadBI->getSuccessor(0);

1876 int SuccIdx = IsThen ? 0 : 1;

1877 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;

1878 auto &CondMemOp = cast(*I.clone());

1879 if (NewMemOpBB != Head) {

1880 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));

1882 ++NumLoadsPredicated;

1883 else

1884 ++NumStoresPredicated;

1885 } else {

1886 CondMemOp.dropUBImplyingAttrsAndMetadata();

1887 ++NumLoadsSpeculated;

1888 }

1889 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());

1890 Value *Ptr = SI.getOperand(1 + SuccIdx);

1891 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);

1893 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");

1894 PN->addIncoming(&CondMemOp, NewMemOpBB);

1895 } else

1897 }

1901 I.replaceAllUsesWith(PN);

1902 }

1903}

1904

1906 SelectHandSpeculativity Spec,

1912 else

1914}

1915

1917 const RewriteableMemOps &Ops,

1919 bool CFGChanged = false;

1921

1922 for (const RewriteableMemOp &Op : Ops) {

1923 SelectHandSpeculativity Spec;

1925 if (auto *const *US = std::get_if(&Op)) {

1926 I = *US;

1927 } else {

1928 auto PSL = std::get(Op);

1929 I = PSL.getPointer();

1930 Spec = PSL.getInt();

1931 }

1932 if (Spec.areAllSpeculatable()) {

1934 } else {

1935 assert(DTU && "Should not get here when not allowed to modify the CFG!");

1937 CFGChanged = true;

1938 }

1939 I->eraseFromParent();

1940 }

1941

1944 SI.eraseFromParent();

1945 return CFGChanged;

1946}

1947

1948

1949

1952 const Twine &NamePrefix) {

1954 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),

1955 NamePrefix + "sroa_idx");

1956 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,

1957 NamePrefix + "sroa_cast");

1958}

1959

1960

1964

1965

1966

1967

1968

1969

1970

1972 unsigned VScale = 0) {

1973 if (OldTy == NewTy)

1974 return true;

1975

1976

1977

1978

1982 "We can't have the same bitwidth for different int types");

1983 return false;

1984 }

1985

1986 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);

1987 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);

1988

1991

1992 if (!VScale)

1993 return false;

1994

1995

1996

1997

1998 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;

1999 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;

2000

2003 return false;

2004

2006 } else {

2008 return false;

2009

2011 }

2012 }

2013

2014 if (NewSize != OldSize)

2015 return false;

2017 return false;

2018

2019

2020

2027

2028

2029

2030 return OldAS == NewAS ||

2031 (DL.isNonIntegralAddressSpace(OldAS) &&

2032 DL.isNonIntegralAddressSpace(NewAS) &&

2033 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));

2034 }

2035

2036

2037

2039 return DL.isNonIntegralPointerType(NewTy);

2040

2041

2042

2043 if (DL.isNonIntegralPointerType(OldTy))

2045

2046 return false;

2047 }

2048

2050 return false;

2051

2052 return true;

2053}

2054

2055

2056

2057

2058

2059

2060

2062 Type *NewTy) {

2063 Type *OldTy = V->getType();

2064

2065#ifndef NDEBUG

2066 BasicBlock *BB = IRB.GetInsertBlock();

2070 "Value not convertable to type");

2071#endif

2072

2073 if (OldTy == NewTy)

2074 return V;

2075

2077 "Integer types must be the exact same to convert.");

2078

2079

2080

2081 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {

2082 Type *InTy = In->getType();

2083 if (InTy == Ty)

2084 return In;

2085

2087

2088

2090 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,

2092 IRB.getInt64(0)),

2093 Ty);

2094 }

2095

2097

2098

2100 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),

2101 IRB.getInt64(0));

2102 }

2103

2104 return IRB.CreateBitCast(In, Ty);

2105 };

2106

2107

2109

2110

2111

2112

2113 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),

2114 NewTy);

2115 }

2116

2117

2119

2120

2121

2122

2123 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),

2124 NewTy);

2125 }

2126

2130

2131

2132

2133

2134

2135

2136 if (OldAS != NewAS) {

2137 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));

2138 return IRB.CreateIntToPtr(

2139 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),

2140 DL.getIntPtrType(NewTy)),

2141 NewTy);

2142 }

2143 }

2144

2145 return CreateBitCastLike(V, NewTy);

2146}

2147

2148

2149

2150

2151

2156 unsigned VScale) {

2157

2159 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();

2160 uint64_t BeginIndex = BeginOffset / ElementSize;

2161 if (BeginIndex * ElementSize != BeginOffset ||

2163 return false;

2164 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();

2165 uint64_t EndIndex = EndOffset / ElementSize;

2166 if (EndIndex * ElementSize != EndOffset ||

2168 return false;

2169

2170 assert(EndIndex > BeginIndex && "Empty vector!");

2171 uint64_t NumElements = EndIndex - BeginIndex;

2172 Type *SliceTy = (NumElements == 1)

2173 ? Ty->getElementType()

2175

2176 Type *SplitIntTy =

2177 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);

2178

2179 Use *U = S.getUse();

2180

2182 if (MI->isVolatile())

2183 return false;

2184 if (!S.isSplittable())

2185 return false;

2187 if (II->isLifetimeStartOrEnd() && II->isDroppable())

2188 return false;

2191 return false;

2193

2194 if (LTy->isStructTy())

2195 return false;

2196 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {

2197 assert(LTy->isIntegerTy());

2198 LTy = SplitIntTy;

2199 }

2201 return false;

2203 if (SI->isVolatile())

2204 return false;

2205 Type *STy = SI->getValueOperand()->getType();

2206

2208 return false;

2209 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {

2211 STy = SplitIntTy;

2212 }

2214 return false;

2215 } else {

2216 return false;

2217 }

2218

2219 return true;

2220}

2221

2222

2223

2224

2225

2229 bool HaveCommonEltTy, Type *CommonEltTy,

2230 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,

2231 VectorType *CommonVecPtrTy, unsigned VScale) {

2232

2233 if (CandidateTys.empty())

2234 return nullptr;

2235

2236

2237

2238

2239

2240 if (HaveVecPtrTy && !HaveCommonVecPtrTy)

2241 return nullptr;

2242

2243

2244 if (!HaveCommonEltTy && HaveVecPtrTy) {

2245

2246 CandidateTys.clear();

2247 CandidateTys.push_back(CommonVecPtrTy);

2248 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {

2249

2250 for (VectorType *&VTy : CandidateTys) {

2251 if (!VTy->getElementType()->isIntegerTy())

2253 VTy->getContext(), VTy->getScalarSizeInBits())));

2254 }

2255

2256

2257

2259 (void)DL;

2260 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==

2261 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&

2262 "Cannot have vector types of different sizes!");

2263 assert(RHSTy->getElementType()->isIntegerTy() &&

2264 "All non-integer types eliminated!");

2265 assert(LHSTy->getElementType()->isIntegerTy() &&

2266 "All non-integer types eliminated!");

2269 };

2271 (void)DL;

2272 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==

2273 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&

2274 "Cannot have vector types of different sizes!");

2275 assert(RHSTy->getElementType()->isIntegerTy() &&

2276 "All non-integer types eliminated!");

2277 assert(LHSTy->getElementType()->isIntegerTy() &&

2278 "All non-integer types eliminated!");

2281 };

2282 llvm::sort(CandidateTys, RankVectorTypesComp);

2283 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),

2284 CandidateTys.end());

2285 } else {

2286

2287

2288#ifndef NDEBUG

2289 for (VectorType *VTy : CandidateTys) {

2290 assert(VTy->getElementType() == CommonEltTy &&

2291 "Unaccounted for element type!");

2292 assert(VTy == CandidateTys[0] &&

2293 "Different vector types with the same element type!");

2294 }

2295#endif

2296 CandidateTys.resize(1);

2297 }

2298

2299

2300

2303 std::numeric_limits::max();

2304 });

2305

2306

2309 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();

2310

2311

2312

2313 if (ElementSize % 8)

2314 return false;

2315 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&

2316 "vector size not a multiple of element size?");

2317 ElementSize /= 8;

2318

2319 for (const Slice &S : P)

2321 return false;

2322

2323 for (const Slice *S : P.splitSliceTails())

2325 return false;

2326

2327 return true;

2328 });

2329 return VTy != CandidateTys.end() ? *VTy : nullptr;

2330}

2331

2336 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,

2337 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {

2338 [[maybe_unused]] VectorType *OriginalElt =

2339 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;

2340

2341

2342 for (Type *Ty : OtherTys) {

2344 continue;

2345 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();

2346

2347

2348 for (VectorType *const VTy : CandidateTysCopy) {

2349

2350 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");

2351 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();

2352 unsigned ElementSize =

2353 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();

2355 VectorSize % TypeSize == 0) {

2357 CheckCandidateType(NewVTy);

2358 }

2359 }

2360 }

2361

2363 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,

2364 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);

2365}

2366

2367

2368

2369

2370

2371

2372

2373

2374

2375

2377 unsigned VScale) {

2378

2379

2383 Type *CommonEltTy = nullptr;

2384 VectorType *CommonVecPtrTy = nullptr;

2385 bool HaveVecPtrTy = false;

2386 bool HaveCommonEltTy = true;

2387 bool HaveCommonVecPtrTy = true;

2388 auto CheckCandidateType = [&](Type *Ty) {

2390

2391 if (!CandidateTys.empty()) {

2393 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=

2394 DL.getTypeSizeInBits(V).getFixedValue()) {

2395 CandidateTys.clear();

2396 return;

2397 }

2398 }

2400 Type *EltTy = VTy->getElementType();

2401

2402 if (!CommonEltTy)

2403 CommonEltTy = EltTy;

2404 else if (CommonEltTy != EltTy)

2405 HaveCommonEltTy = false;

2406

2408 HaveVecPtrTy = true;

2409 if (!CommonVecPtrTy)

2410 CommonVecPtrTy = VTy;

2411 else if (CommonVecPtrTy != VTy)

2412 HaveCommonVecPtrTy = false;

2413 }

2414 }

2415 };

2416

2417

2418 for (const Slice &S : P) {

2423 Ty = SI->getValueOperand()->getType();

2424 else

2425 continue;

2426

2427 auto CandTy = Ty->getScalarType();

2428 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||

2429 S.endOffset() != P.endOffset())) {

2430 DeferredTys.insert(Ty);

2431 continue;

2432 }

2433

2434 LoadStoreTys.insert(Ty);

2435

2436 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())

2437 CheckCandidateType(Ty);

2438 }

2439

2442 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,

2443 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,

2444 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))

2445 return VTy;

2446

2447 CandidateTys.clear();

2449 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,

2450 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,

2451 CommonVecPtrTy, VScale);

2452}

2453

2454

2455

2456

2457

2460 Type *AllocaTy,

2462 bool &WholeAllocaOp) {

2463 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();

2464

2465 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;

2466 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;

2467

2468 Use *U = S.getUse();

2469

2470

2471

2472

2473

2475 if (II->isLifetimeStartOrEnd() || II->isDroppable())

2476 return true;

2477 }

2478

2479

2480

2481 if (RelEnd > Size)

2482 return false;

2483

2486 return false;

2487

2490 return false;

2491

2492

2493 if (S.beginOffset() < AllocBeginOffset)

2494 return false;

2495

2496

2497

2499 WholeAllocaOp = true;

2501 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())

2502 return false;

2503 } else if (RelBegin != 0 || RelEnd != Size ||

2505

2506

2507 return false;

2508 }

2510 Type *ValueTy = SI->getValueOperand()->getType();

2511 if (SI->isVolatile())

2512 return false;

2513

2514 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);

2516 return false;

2517

2518

2519 if (S.beginOffset() < AllocBeginOffset)

2520 return false;

2521

2522

2523

2525 WholeAllocaOp = true;

2527 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())

2528 return false;

2529 } else if (RelBegin != 0 || RelEnd != Size ||

2531

2532

2533 return false;

2534 }

2537 return false;

2538 if (!S.isSplittable())

2539 return false;

2540 } else {

2541 return false;

2542 }

2543

2544 return true;

2545}

2546

2547

2548

2549

2550

2551

2552

2555 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();

2556

2558 return false;

2559

2560

2561 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())

2562 return false;

2563

2564

2565

2566

2570 return false;

2571

2572

2573

2574

2575

2576

2577

2578

2579 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);

2580

2581 for (const Slice &S : P)

2583 WholeAllocaOp))

2584 return false;

2585

2586 for (const Slice *S : P.splitSliceTails())

2588 WholeAllocaOp))

2589 return false;

2590

2591 return WholeAllocaOp;

2592}

2593

2596 const Twine &Name) {

2599 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=

2600 DL.getTypeStoreSize(IntTy).getFixedValue() &&

2601 "Element extends past full value");

2603 if (DL.isBigEndian())

2604 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -

2605 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);

2606 if (ShAmt) {

2607 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");

2609 }

2610 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&

2611 "Cannot extract to a larger integer!");

2612 if (Ty != IntTy) {

2613 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");

2615 }

2616 return V;

2617}

2618

2623 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&

2624 "Cannot insert a larger integer!");

2626 if (Ty != IntTy) {

2627 V = IRB.CreateZExt(V, IntTy, Name + ".ext");

2629 }

2630 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=

2631 DL.getTypeStoreSize(IntTy).getFixedValue() &&

2632 "Element store outside of alloca store");

2634 if (DL.isBigEndian())

2635 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -

2636 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);

2637 if (ShAmt) {

2638 V = IRB.CreateShl(V, ShAmt, Name + ".shift");

2640 }

2641

2642 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {

2643 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);

2644 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");

2646 V = IRB.CreateOr(Old, V, Name + ".insert");

2648 }

2649 return V;

2650}

2651

2653 unsigned EndIndex, const Twine &Name) {

2655 unsigned NumElements = EndIndex - BeginIndex;

2657

2658 if (NumElements == VecTy->getNumElements())

2659 return V;

2660

2661 if (NumElements == 1) {

2662 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),

2663 Name + ".extract");

2665 return V;

2666 }

2667

2669 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");

2671 return V;

2672}

2673

2675 unsigned BeginIndex, const Twine &Name) {

2677 assert(VecTy && "Can only insert a vector into a vector");

2678

2680 if (!Ty) {

2681

2682 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),

2683 Name + ".insert");

2685 return V;

2686 }

2687

2690 "Too many elements!");

2693 assert(V->getType() == VecTy && "Vector type mismatch");

2694 return V;

2695 }

2697

2698

2699

2700

2701

2705 if (i >= BeginIndex && i < EndIndex)

2706 Mask.push_back(i - BeginIndex);

2707 else

2708 Mask.push_back(-1);

2709 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");

2711

2715 Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));

2716

2717

2718 V = IRB.CreateSelectWithUnknownProfile(ConstantVector::get(Mask2), V, Old,

2720

2722 return V;

2723}

2724

2725

2726

2727

2728

2729

2730

2731

2732

2733

2734

2735

2736

2737

2738

2739

2740

2741

2742

2743

2744

2745

2746

2747

2750

2751

2752

2755

2756

2757

2759 const char *DebugName) {

2760 Type *EltType = VecType->getElementType();

2761 if (EltType != NewAIEltTy) {

2762

2763 unsigned TotalBits =

2764 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);

2765 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);

2766

2768 V = Builder.CreateBitCast(V, NewVecType);

2769 VecType = NewVecType;

2770 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");

2771 }

2772 };

2773

2774 BitcastIfNeeded(V0, VecType0, "V0");

2775 BitcastIfNeeded(V1, VecType1, "V1");

2776

2777 unsigned NumElts0 = VecType0->getNumElements();

2778 unsigned NumElts1 = VecType1->getNumElements();

2779

2781

2782 if (NumElts0 == NumElts1) {

2783 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)

2784 ShuffleMask.push_back(i);

2785 } else {

2786

2787

2788 unsigned SmallSize = std::min(NumElts0, NumElts1);

2789 unsigned LargeSize = std::max(NumElts0, NumElts1);

2790 bool IsV0Smaller = NumElts0 < NumElts1;

2791 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;

2793 for (unsigned i = 0; i < SmallSize; ++i)

2795 for (unsigned i = SmallSize; i < LargeSize; ++i)

2797 ExtendedVec = Builder.CreateShuffleVector(

2799 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");

2800 for (unsigned i = 0; i < NumElts0; ++i)

2801 ShuffleMask.push_back(i);

2802 for (unsigned i = 0; i < NumElts1; ++i)

2803 ShuffleMask.push_back(LargeSize + i);

2804 }

2805

2806 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);

2807}

2808

2809namespace {

2810

2811

2812

2813

2814

2815

2816

2817class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {

2818

2819 friend class InstVisitor<AllocaSliceRewriter, bool>;

2820

2821 using Base = InstVisitor<AllocaSliceRewriter, bool>;

2822

2823 const DataLayout &DL;

2824 AllocaSlices &AS;

2826 AllocaInst &OldAI, &NewAI;

2827 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;

2828 Type *NewAllocaTy;

2829

2830

2831

2832

2833

2834 IntegerType *IntTy;

2835

2836

2837

2838

2839

2840

2841

2842

2843

2844

2846 Type *ElementTy;

2847 uint64_t ElementSize;

2848

2849

2850

2851 uint64_t BeginOffset = 0;

2852 uint64_t EndOffset = 0;

2853

2854

2855

2856 uint64_t NewBeginOffset = 0, NewEndOffset = 0;

2857

2858 uint64_t SliceSize = 0;

2859 bool IsSplittable = false;

2860 bool IsSplit = false;

2861 Use *OldUse = nullptr;

2863

2864

2865 SmallSetVector<PHINode *, 8> &PHIUsers;

2866 SmallSetVector<SelectInst *, 8> &SelectUsers;

2867

2868

2869

2870 IRBuilderTy IRB;

2871

2872

2873

2874 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {

2876 return &NewAI;

2877

2878 Type *AccessTy = IRB.getPtrTy(AddrSpace);

2879 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);

2880 }

2881

2882public:

2883 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,

2884 AllocaInst &OldAI, AllocaInst &NewAI,

2885 uint64_t NewAllocaBeginOffset,

2886 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,

2887 VectorType *PromotableVecTy,

2888 SmallSetVector<PHINode *, 8> &PHIUsers,

2889 SmallSetVector<SelectInst *, 8> &SelectUsers)

2890 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),

2891 NewAllocaBeginOffset(NewAllocaBeginOffset),

2892 NewAllocaEndOffset(NewAllocaEndOffset),

2893 NewAllocaTy(NewAI.getAllocatedType()),

2894 IntTy(

2895 IsIntegerPromotable

2897 DL.getTypeSizeInBits(NewAI.getAllocatedType())

2898 .getFixedValue())

2899 : nullptr),

2900 VecTy(PromotableVecTy),

2901 ElementTy(VecTy ? VecTy->getElementType() : nullptr),

2902 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8

2903 : 0),

2904 PHIUsers(PHIUsers), SelectUsers(SelectUsers),

2905 IRB(NewAI.getContext(), ConstantFolder()) {

2906 if (VecTy) {

2907 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&

2908 "Only multiple-of-8 sized vector elements are viable");

2909 ++NumVectorized;

2910 }

2911 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));

2912 }

2913

2914 bool visit(AllocaSlices::const_iterator I) {

2915 bool CanSROA = true;

2916 BeginOffset = I->beginOffset();

2917 EndOffset = I->endOffset();

2918 IsSplittable = I->isSplittable();

2919 IsSplit =

2920 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;

2921 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));

2924

2925

2926 assert(BeginOffset < NewAllocaEndOffset);

2927 assert(EndOffset > NewAllocaBeginOffset);

2928 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);

2929 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);

2930

2931 SliceSize = NewEndOffset - NewBeginOffset;

2932 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset

2933 << ") NewBegin:(" << NewBeginOffset << ", "

2934 << NewEndOffset << ") NewAllocaBegin:("

2935 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset

2936 << ")\n");

2937 assert(IsSplit || NewBeginOffset == BeginOffset);

2938 OldUse = I->getUse();

2940

2942 IRB.SetInsertPoint(OldUserI);

2943 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());

2944 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +

2945 Twine(BeginOffset) + ".");

2946

2948 if (VecTy || IntTy)

2950 return CanSROA;

2951 }

2952

2953

2954

2955

2956

2957

2958

2959

2960

2961

2962

2963

2964

2965

2966

2967

2968

2969

2970

2971

2972

2973

2974

2975

2976

2977

2978

2979

2980

2981

2982

2983

2984

2985

2986

2987

2988

2989

2990

2991 std::optional<SmallVector<Value *, 4>>

2992 rewriteTreeStructuredMerge(Partition &P) {

2993

2994 if (P.splitSliceTails().size() > 0)

2995 return std::nullopt;

2996

2998 LoadInst *TheLoad = nullptr;

2999

3000

3001 struct StoreInfo {

3002 StoreInst *Store;

3003 uint64_t BeginOffset;

3004 uint64_t EndOffset;

3005 Value *StoredValue;

3006 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)

3007 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}

3008 };

3009

3011

3012

3013

3014 Type *AllocatedEltTy =

3017 : Type::getInt8Ty(NewAI.getContext());

3018 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);

3019

3020

3021

3022

3023

3024

3025 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {

3027 return FixedVecTy &&

3028 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&

3029 !FixedVecTy->getElementType()->isPointerTy();

3030 };

3031

3032 for (Slice &S : P) {

3035

3036

3037

3038

3039

3040 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||

3041 S.beginOffset() != NewAllocaBeginOffset ||

3042 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())

3043 return std::nullopt;

3044 TheLoad = LI;

3046

3047

3048

3049

3050

3051 if (!IsTypeValidForTreeStructuredMerge(

3052 SI->getValueOperand()->getType()) ||

3053 SI->isVolatile())

3054 return std::nullopt;

3056 unsigned NumElts = VecTy->getNumElements();

3057 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());

3058 if (NumElts * EltSize % AllocatedEltTySize != 0)

3059 return std::nullopt;

3060 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),

3061 SI->getValueOperand());

3062 } else {

3063

3064

3065 return std::nullopt;

3066 }

3067 }

3068

3069 if (!TheLoad)

3070 return std::nullopt;

3071

3072

3073 if (StoreInfos.size() < 2)

3074 return std::nullopt;

3075

3076

3077

3078 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {

3079 return A.BeginOffset < B.BeginOffset;

3080 });

3081

3082

3083 uint64_t ExpectedStart = NewAllocaBeginOffset;

3084 for (auto &StoreInfo : StoreInfos) {

3085 uint64_t BeginOff = StoreInfo.BeginOffset;

3086 uint64_t EndOff = StoreInfo.EndOffset;

3087

3088

3089 if (BeginOff != ExpectedStart)

3090 return std::nullopt;

3091

3092 ExpectedStart = EndOff;

3093 }

3094

3095 if (ExpectedStart != NewAllocaEndOffset)

3096 return std::nullopt;

3097

3098

3099

3100

3101

3102

3103

3104

3105

3107 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();

3108

3109 for (auto &StoreInfo : StoreInfos) {

3110 if (StoreInfo.Store->getParent() != StoreBB)

3111 return std::nullopt;

3112 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))

3113 return std::nullopt;

3114 }

3115

3116

3117

3119 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad

3120 << "\n Ordered stores:\n";

3122 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "

3123 << Info.EndOffset << ") \tStore: " << *Info.Store

3124 << "\tValue: " << *Info.StoredValue << "\n";

3125 });

3126

3127

3128

3129 std::queue<Value *> VecElements;

3130 IRBuilder<> Builder(StoreInfos.back().Store);

3131 for (const auto &Info : StoreInfos) {

3133 VecElements.push(Info.StoredValue);

3134 }

3135

3136 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");

3137 while (VecElements.size() > 1) {

3138 const auto NumElts = VecElements.size();

3139 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {

3140 Value *V0 = VecElements.front();

3141 VecElements.pop();

3142 Value *V1 = VecElements.front();

3143 VecElements.pop();

3145 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");

3146 VecElements.push(Merged);

3147 }

3148 if (NumElts % 2 == 1) {

3149 Value *V = VecElements.front();

3150 VecElements.pop();

3151 VecElements.push(V);

3152 }

3153 }

3154

3155

3156 Value *MergedValue = VecElements.front();

3157 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());

3158

3161 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),

3162 TheLoad->getName() + ".sroa.new.load"));

3163 DeletedValues.push_back(TheLoad);

3164

3165 return DeletedValues;

3166 }

3167

3168private:

3169

3170 using Base::visit;

3171

3172

3173 bool visitInstruction(Instruction &I) {

3174 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");

3176 }

3177

3179

3180

3181 assert(IsSplit || BeginOffset == NewBeginOffset);

3182 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3183

3184 StringRef OldName = OldPtr->getName();

3185

3186 size_t LastSROAPrefix = OldName.rfind(".sroa.");

3188 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));

3189

3191 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {

3192

3193 OldName = OldName.substr(IndexEnd + 1);

3195 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')

3196

3197 OldName = OldName.substr(OffsetEnd + 1);

3198 }

3199 }

3200

3201 OldName = OldName.substr(0, OldName.find(".sroa_"));

3202

3205 PointerTy, Twine(OldName) + ".");

3206 }

3207

3208

3209

3210

3211

3212

3213 Align getSliceAlign() {

3215 NewBeginOffset - NewAllocaBeginOffset);

3216 }

3217

3218 unsigned getIndex(uint64_t Offset) {

3219 assert(VecTy && "Can only call getIndex when rewriting a vector");

3220 uint64_t RelOffset = Offset - NewAllocaBeginOffset;

3221 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");

3222 uint32_t Index = RelOffset / ElementSize;

3223 assert(Index * ElementSize == RelOffset);

3225 }

3226

3227 void deleteIfTriviallyDead(Value *V) {

3230 Pass.DeadInsts.push_back(I);

3231 }

3232

3233 Value *rewriteVectorizedLoadInst(LoadInst &LI) {

3234 unsigned BeginIndex = getIndex(NewBeginOffset);

3235 unsigned EndIndex = getIndex(NewEndOffset);

3236 assert(EndIndex > BeginIndex && "Empty vector!");

3237

3240

3241 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,

3242 LLVMContext::MD_access_group});

3243 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");

3244 }

3245

3246 Value *rewriteIntegerLoad(LoadInst &LI) {

3247 assert(IntTy && "We cannot insert an integer to the alloca");

3252 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");

3253 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3254 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {

3255 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);

3257 }

3258

3259

3260

3261

3262

3264 "Can only handle an extract for an overly wide load");

3266 V = IRB.CreateZExt(V, LI.getType());

3267 return V;

3268 }

3269

3270 bool visitLoadInst(LoadInst &LI) {

3273 assert(OldOp == OldPtr);

3274

3276

3278

3279 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)

3281 bool IsPtrAdjusted = false;

3283 if (VecTy) {

3284 V = rewriteVectorizedLoadInst(LI);

3286 V = rewriteIntegerLoad(LI);

3287 } else if (NewBeginOffset == NewAllocaBeginOffset &&

3288 NewEndOffset == NewAllocaEndOffset &&

3291 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&

3293 Value *NewPtr =

3294 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());

3295 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,

3296 NewAI.getAlign(), LI.isVolatile(),

3297 LI.getName());

3298 if (LI.isVolatile())

3299 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());

3300 if (NewLI->isAtomic())

3301 NewLI->setAlignment(LI.getAlign());

3302

3303

3304

3305

3306 copyMetadataForLoad(*NewLI, LI);

3307

3308

3309 if (AATags)

3310 NewLI->setAAMetadata(AATags.adjustForAccess(

3311 NewBeginOffset - BeginOffset, NewLI->getType(), DL));

3312

3313

3314 V = NewLI;

3315

3316

3317

3318

3319 if (auto *AITy = dyn_cast(NewAllocaTy))

3320 if (auto *TITy = dyn_cast(TargetTy))

3321 if (AITy->getBitWidth() < TITy->getBitWidth()) {

3322 V = IRB.CreateZExt(V, TITy, "load.ext");

3323 if (DL.isBigEndian())

3324 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),

3325 "endian_shift");

3326 }

3327 } else {

3328 Type *LTy = IRB.getPtrTy(AS);

3329 LoadInst *NewLI =

3330 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),

3332

3333 if (AATags)

3335 NewBeginOffset - BeginOffset, NewLI->getType(), DL));

3336

3339 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,

3340 LLVMContext::MD_access_group});

3341

3342 V = NewLI;

3343 IsPtrAdjusted = true;

3344 }

3346

3347 if (IsSplit) {

3350 "Only integer type loads and stores are split");

3351 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&

3352 "Split load isn't smaller than original load");

3354 "Non-byte-multiple bit width");

3355

3357

3358

3359

3360 LIIt.setHeadBit(true);

3361 IRB.SetInsertPoint(LI.getParent(), LIIt);

3362

3363

3364

3365

3366 Value *Placeholder =

3368 false, Align(1));

3369 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,

3370 "insert");

3372 Placeholder->replaceAllUsesWith(&LI);

3373 Placeholder->deleteValue();

3374 } else {

3376 }

3377

3378 Pass.DeadInsts.push_back(&LI);

3379 deleteIfTriviallyDead(OldOp);

3381 return !LI.isVolatile() && !IsPtrAdjusted;

3382 }

3383

3384 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,

3385 AAMDNodes AATags) {

3386

3387

3389 if (V->getType() != VecTy) {

3390 unsigned BeginIndex = getIndex(NewBeginOffset);

3391 unsigned EndIndex = getIndex(NewEndOffset);

3392 assert(EndIndex > BeginIndex && "Empty vector!");

3393 unsigned NumElements = EndIndex - BeginIndex;

3395 "Too many elements!");

3396 Type *SliceTy = (NumElements == 1)

3397 ? ElementTy

3398 : FixedVectorType::get(ElementTy, NumElements);

3399 if (V->getType() != SliceTy)

3401

3402

3405 V = insertVector(IRB, Old, V, BeginIndex, "vec");

3406 }

3407 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());

3408 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,

3409 LLVMContext::MD_access_group});

3410 if (AATags)

3412 V->getType(), DL));

3413 Pass.DeadInsts.push_back(&SI);

3414

3415

3416 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,

3417 Store, Store->getPointerOperand(), OrigV, DL);

3419 return true;

3420 }

3421

3422 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {

3423 assert(IntTy && "We cannot extract an integer from the alloca");

3425 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=

3428 NewAI.getAlign(), "oldload");

3430 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");

3431 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;

3433 }

3435 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());

3436 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,

3437 LLVMContext::MD_access_group});

3438 if (AATags)

3440 V->getType(), DL));

3441

3442 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,

3443 Store, Store->getPointerOperand(),

3444 Store->getValueOperand(), DL);

3445

3446 Pass.DeadInsts.push_back(&SI);

3448 return true;

3449 }

3450

3451 bool visitStoreInst(StoreInst &SI) {

3453 Value *OldOp = SI.getOperand(1);

3454 assert(OldOp == OldPtr);

3455

3456 AAMDNodes AATags = SI.getAAMetadata();

3457 Value *V = SI.getValueOperand();

3458

3459

3460

3461 if (V->getType()->isPointerTy())

3463 Pass.PostPromotionWorklist.insert(AI);

3464

3465 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());

3468 assert(V->getType()->isIntegerTy() &&

3469 "Only integer type loads and stores are split");

3470 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&

3471 "Non-byte-multiple bit width");

3472 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);

3473 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,

3474 "extract");

3475 }

3476

3477 if (VecTy)

3478 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);

3479 if (IntTy && V->getType()->isIntegerTy())

3480 return rewriteIntegerStore(V, SI, AATags);

3481

3482 StoreInst *NewSI;

3483 if (NewBeginOffset == NewAllocaBeginOffset &&

3484 NewEndOffset == NewAllocaEndOffset &&

3488 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());

3489

3490 NewSI =

3491 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());

3492 } else {

3493 unsigned AS = SI.getPointerAddressSpace();

3494 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));

3495 NewSI =

3496 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());

3497 }

3498 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,

3499 LLVMContext::MD_access_group});

3500 if (AATags)

3502 V->getType(), DL));

3503 if (SI.isVolatile())

3504 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());

3507

3508 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,

3511

3512 Pass.DeadInsts.push_back(&SI);

3513 deleteIfTriviallyDead(OldOp);

3514

3518 SI.isVolatile();

3519 }

3520

3521

3522

3523

3524

3525

3526

3527

3528

3529

3531 assert(Size > 0 && "Expected a positive number of bytes.");

3533 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");

3534 if (Size == 1)

3535 return V;

3536

3538 V = IRB.CreateMul(

3539 IRB.CreateZExt(V, SplatIntTy, "zext"),

3542 SplatIntTy)),

3543 "isplat");

3544 return V;

3545 }

3546

3547

3549 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");

3551 return V;

3552 }

3553

3554 bool visitMemSetInst(MemSetInst &II) {

3556 assert(II.getRawDest() == OldPtr);

3557

3558 AAMDNodes AATags = II.getAAMetadata();

3559

3560

3561

3564 assert(NewBeginOffset == BeginOffset);

3565 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));

3566 II.setDestAlignment(getSliceAlign());

3567

3568

3569

3571 "AT: Unexpected link to non-const GEP");

3572 deleteIfTriviallyDead(OldPtr);

3573 return false;

3574 }

3575

3576

3577 Pass.DeadInsts.push_back(&II);

3578

3581

3582 const bool CanContinue = [&]() {

3583 if (VecTy || IntTy)

3584 return true;

3585 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)

3586 return false;

3587

3589 const uint64_t Len = C->getLimitedValue();

3590 if (Len > std::numeric_limits::max())

3591 return false;

3592 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());

3595 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());

3596 }();

3597

3598

3599

3600 if (!CanContinue) {

3601 Type *SizeTy = II.getLength()->getType();

3602 unsigned Sz = NewEndOffset - NewBeginOffset;

3603 Constant *Size = ConstantInt::get(SizeTy, Sz);

3605 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,

3606 MaybeAlign(getSliceAlign()), II.isVolatile()));

3607 if (AATags)

3608 New->setAAMetadata(

3609 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));

3610

3611 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,

3612 New, New->getRawDest(), nullptr, DL);

3613

3615 return false;

3616 }

3617

3618

3619

3620

3621

3622

3624

3625 if (VecTy) {

3626

3627 assert(ElementTy == ScalarTy);

3628

3629 unsigned BeginIndex = getIndex(NewBeginOffset);

3630 unsigned EndIndex = getIndex(NewEndOffset);

3631 assert(EndIndex > BeginIndex && "Empty vector!");

3632 unsigned NumElements = EndIndex - BeginIndex;

3634 "Too many elements!");

3635

3637 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);

3639 if (NumElements > 1)

3641

3643 NewAI.getAlign(), "oldload");

3645 } else if (IntTy) {

3646

3647

3649

3650 uint64_t Size = NewEndOffset - NewBeginOffset;

3651 V = getIntegerSplat(II.getValue(), Size);

3652

3653 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||

3654 EndOffset != NewAllocaBeginOffset)) {

3656 NewAI.getAlign(), "oldload");

3658 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3660 } else {

3661 assert(V->getType() == IntTy &&

3662 "Wrong type for an alloca wide integer!");

3663 }

3665 } else {

3666

3667 assert(NewBeginOffset == NewAllocaBeginOffset);

3668 assert(NewEndOffset == NewAllocaEndOffset);

3669

3670 V = getIntegerSplat(II.getValue(),

3671 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);

3675

3677 }

3678

3679 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());

3680 StoreInst *New =

3681 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());

3682 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,

3683 LLVMContext::MD_access_group});

3684 if (AATags)

3685 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,

3686 V->getType(), DL));

3687

3688 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,

3689 New, New->getPointerOperand(), V, DL);

3690

3692 return II.isVolatile();

3693 }

3694

3695 bool visitMemTransferInst(MemTransferInst &II) {

3696

3697

3698

3700

3701 AAMDNodes AATags = II.getAAMetadata();

3702

3703 bool IsDest = &II.getRawDestUse() == OldUse;

3704 assert((IsDest && II.getRawDest() == OldPtr) ||

3705 (!IsDest && II.getRawSource() == OldPtr));

3706

3707 Align SliceAlign = getSliceAlign();

3708

3709

3710

3711

3712

3713

3714

3715 if (!IsSplittable) {

3716 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

3717 if (IsDest) {

3718

3721 DbgAssign->getAddress() == II.getDest())

3722 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);

3723 }

3724 II.setDest(AdjustedPtr);

3725 II.setDestAlignment(SliceAlign);

3726 } else {

3727 II.setSource(AdjustedPtr);

3728 II.setSourceAlignment(SliceAlign);

3729 }

3730

3732 deleteIfTriviallyDead(OldPtr);

3733 return false;

3734 }

3735

3736

3737

3738

3739

3740

3741

3742

3743 bool EmitMemCpy =

3744 !VecTy && !IntTy &&

3745 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||

3746 SliceSize !=

3750

3751

3752

3753

3754 if (EmitMemCpy && &OldAI == &NewAI) {

3755

3756 assert(NewBeginOffset == BeginOffset);

3757

3758

3759 if (NewEndOffset != EndOffset)

3760 II.setLength(NewEndOffset - NewBeginOffset);

3761 return false;

3762 }

3763

3764 Pass.DeadInsts.push_back(&II);

3765

3766

3767

3768 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();

3769 if (AllocaInst *AI =

3771 assert(AI != &OldAI && AI != &NewAI &&

3772 "Splittable transfers cannot reach the same alloca on both ends.");

3773 Pass.Worklist.insert(AI);

3774 }

3775

3778

3779

3780 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);

3781 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);

3782 Align OtherAlign =

3783 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();

3784 OtherAlign =

3785 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());

3786

3787 if (EmitMemCpy) {

3788

3789

3790 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,

3791 OtherPtr->getName() + ".");

3792

3793 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

3794 Type *SizeTy = II.getLength()->getType();

3795 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);

3796

3797 Value *DestPtr, *SrcPtr;

3798 MaybeAlign DestAlign, SrcAlign;

3799

3800 if (IsDest) {

3801 DestPtr = OurPtr;

3802 DestAlign = SliceAlign;

3803 SrcPtr = OtherPtr;

3804 SrcAlign = OtherAlign;

3805 } else {

3806 DestPtr = OtherPtr;

3807 DestAlign = OtherAlign;

3808 SrcPtr = OurPtr;

3809 SrcAlign = SliceAlign;

3810 }

3811 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,

3812 Size, II.isVolatile());

3813 if (AATags)

3814 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));

3815

3816 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);

3817 if (IsDest) {

3818 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,

3819 &II, New, DestPtr, nullptr, DL);

3822 DL, Offset, true))) {

3824 SliceSize * 8, &II, New, DestPtr, nullptr, DL);

3825 }

3827 return false;

3828 }

3829

3830 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&

3831 NewEndOffset == NewAllocaEndOffset;

3832 uint64_t Size = NewEndOffset - NewBeginOffset;

3833 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;

3834 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;

3835 unsigned NumElements = EndIndex - BeginIndex;

3836 IntegerType *SubIntTy =

3837 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;

3838

3839

3840

3841 Type *OtherTy;

3842 if (VecTy && !IsWholeAlloca) {

3843 if (NumElements == 1)

3844 OtherTy = VecTy->getElementType();

3845 else

3847 } else if (IntTy && !IsWholeAlloca) {

3848 OtherTy = SubIntTy;

3849 } else {

3850 OtherTy = NewAllocaTy;

3851 }

3852

3854 OtherPtr->getName() + ".");

3855 MaybeAlign SrcAlign = OtherAlign;

3856 MaybeAlign DstAlign = SliceAlign;

3857 if (!IsDest)

3859

3862

3863 if (IsDest) {

3864 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());

3865 SrcPtr = AdjPtr;

3866 } else {

3867 DstPtr = AdjPtr;

3868 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());

3869 }

3870

3872 if (VecTy && !IsWholeAlloca && !IsDest) {

3873 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,

3875 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");

3876 } else if (IntTy && !IsWholeAlloca && !IsDest) {

3877 Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,

3880 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3882 } else {

3883 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,

3884 II.isVolatile(), "copyload");

3885 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,

3886 LLVMContext::MD_access_group});

3887 if (AATags)

3888 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,

3889 Load->getType(), DL));

3891 }

3892

3893 if (VecTy && !IsWholeAlloca && IsDest) {

3895 NewAI.getAlign(), "oldload");

3896 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");

3897 } else if (IntTy && !IsWholeAlloca && IsDest) {

3899 NewAI.getAlign(), "oldload");

3901 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

3904 }

3905

3907 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));

3908 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,

3909 LLVMContext::MD_access_group});

3910 if (AATags)

3912 Src->getType(), DL));

3913

3914 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);

3915 if (IsDest) {

3916

3917 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,

3918 Store, DstPtr, Src, DL);

3921 DL, Offset, true))) {

3923 &II, Store, DstPtr, Src, DL);

3924 }

3925

3927 return II.isVolatile();

3928 }

3929

3930 bool visitIntrinsicInst(IntrinsicInst &II) {

3931 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&

3932 "Unexpected intrinsic!");

3934

3935

3936 Pass.DeadInsts.push_back(&II);

3937

3938 if (II.isDroppable()) {

3939 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");

3940

3942 return true;

3943 }

3944

3945 assert(II.getArgOperand(0) == OldPtr);

3949 if (II.getIntrinsicID() == Intrinsic::lifetime_start)

3950 New = IRB.CreateLifetimeStart(Ptr);

3951 else

3952 New = IRB.CreateLifetimeEnd(Ptr);

3953

3954 (void)New;

3956

3957 return true;

3958 }

3959

3960 void fixLoadStoreAlign(Instruction &Root) {

3961

3962

3963

3964 SmallPtrSet<Instruction *, 4> Visited;

3965 SmallVector<Instruction *, 4> Uses;

3966 Visited.insert(&Root);

3967 Uses.push_back(&Root);

3968 do {

3970

3973 continue;

3974 }

3976 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));

3977 continue;

3978 }

3979

3983 for (User *U : I->users())

3986 } while (Uses.empty());

3987 }

3988

3989 bool visitPHINode(PHINode &PN) {

3991 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");

3992 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");

3993

3994

3995

3996

3997

3998 IRBuilderBase::InsertPointGuard Guard(IRB);

4000 IRB.SetInsertPoint(OldPtr->getParent(),

4001 OldPtr->getParent()->getFirstInsertionPt());

4002 else

4003 IRB.SetInsertPoint(OldPtr);

4004 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());

4005

4006 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

4007

4009

4011 deleteIfTriviallyDead(OldPtr);

4012

4013

4014 fixLoadStoreAlign(PN);

4015

4016

4017

4018

4019 PHIUsers.insert(&PN);

4020 return true;

4021 }

4022

4023 bool visitSelectInst(SelectInst &SI) {

4025 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&

4026 "Pointer isn't an operand!");

4027 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");

4028 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");

4029

4030 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());

4031

4032 if (SI.getOperand(1) == OldPtr)

4033 SI.setOperand(1, NewPtr);

4034 if (SI.getOperand(2) == OldPtr)

4035 SI.setOperand(2, NewPtr);

4036

4038 deleteIfTriviallyDead(OldPtr);

4039

4040

4041 fixLoadStoreAlign(SI);

4042

4043

4044

4045

4046 SelectUsers.insert(&SI);

4047 return true;

4048 }

4049};

4050

4051

4052

4053

4054

4055

4056class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {

4057

4058 friend class InstVisitor<AggLoadStoreRewriter, bool>;

4059

4060

4062

4063

4064 SmallPtrSet<User *, 8> Visited;

4065

4066

4067

4068 Use *U = nullptr;

4069

4070

4071 const DataLayout &DL;

4072

4073 IRBuilderTy &IRB;

4074

4075public:

4076 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)

4077 : DL(DL), IRB(IRB) {}

4078

4079

4080

4081 bool rewrite(Instruction &I) {

4082 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");

4083 enqueueUsers(I);

4085 while (Queue.empty()) {

4086 U = Queue.pop_back_val();

4088 }

4090 }

4091

4092private:

4093

4094

4095 void enqueueUsers(Instruction &I) {

4096 for (Use &U : I.uses())

4097 if (Visited.insert(U.getUser()).second)

4098 Queue.push_back(&U);

4099 }

4100

4101

4102 bool visitInstruction(Instruction &I) { return false; }

4103

4104

4105 template class OpSplitter {

4106 protected:

4107

4108 IRBuilderTy &IRB;

4109

4110

4111

4112 SmallVector<unsigned, 4> Indices;

4113

4114

4115

4117

4118

4119

4121

4122

4123 Type *BaseTy;

4124

4125

4126 Align BaseAlign;

4127

4128

4129

4130 const DataLayout &DL;

4131

4132

4133

4134 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,

4135 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)

4136 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),

4137 BaseAlign(BaseAlign), DL(DL) {

4138 IRB.SetInsertPoint(InsertionPoint);

4139 }

4140

4141 public:

4142

4143

4144

4145

4146

4147

4148

4149

4150

4151

4152

4153

4154

4155 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {

4157 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);

4158 return static_cast<Derived *>(this)->emitFunc(

4160 }

4161

4163 unsigned OldSize = Indices.size();

4164 (void)OldSize;

4165 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;

4166 ++Idx) {

4167 assert(Indices.size() == OldSize && "Did not return to the old size");

4169 GEPIndices.push_back(IRB.getInt32(Idx));

4170 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));

4173 }

4174 return;

4175 }

4176

4178 unsigned OldSize = Indices.size();

4179 (void)OldSize;

4180 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;

4181 ++Idx) {

4182 assert(Indices.size() == OldSize && "Did not return to the old size");

4184 GEPIndices.push_back(IRB.getInt32(Idx));

4185 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));

4188 }

4189 return;

4190 }

4191

4192 llvm_unreachable("Only arrays and structs are aggregate loadable types");

4193 }

4194 };

4195

4196 struct LoadOpSplitter : public OpSplitter {

4197 AAMDNodes AATags;

4198

4199

4201

4202

4204

4205 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,

4206 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,

4207 IRBuilderTy &IRB)

4208 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,

4209 IRB),

4210 AATags(AATags) {}

4211

4212

4213

4214 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {

4216

4218 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");

4219 LoadInst *Load =

4220 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");

4221

4224 if (AATags &&

4226 Load->setAAMetadata(

4228

4229

4231

4232 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");

4234 }

4235

4236

4237 void recordFakeUses(LoadInst &LI) {

4238 for (Use &U : LI.uses())

4240 if (II->getIntrinsicID() == Intrinsic::fake_use)

4242 }

4243

4244

4245

4246 void emitFakeUses() {

4247 for (Instruction *I : FakeUses) {

4248 IRB.SetInsertPoint(I);

4249 for (auto *V : Components)

4250 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});

4251 I->eraseFromParent();

4252 }

4253 }

4254 };

4255

4256 bool visitLoadInst(LoadInst &LI) {

4259 return false;

4260

4261

4265 Splitter.recordFakeUses(LI);

4267 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");

4268 Splitter.emitFakeUses();

4269 Visited.erase(&LI);

4272 return true;

4273 }

4274

4275 struct StoreOpSplitter : public OpSplitter {

4276 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,

4277 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,

4278 const DataLayout &DL, IRBuilderTy &IRB)

4279 : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign,

4280 DL, IRB),

4281 AATags(AATags), AggStore(AggStore) {}

4282 AAMDNodes AATags;

4283 StoreInst *AggStore;

4284

4285

4286 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {

4288

4289

4290

4291

4292 Value *ExtractValue =

4293 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");

4294 Value *InBoundsGEP =

4295 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");

4296 StoreInst *Store =

4297 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);

4298

4302 if (AATags) {

4305 }

4306

4307

4308

4309

4310

4313 uint64_t SizeInBits =

4314 DL.getTypeSizeInBits(Store->getValueOperand()->getType());

4316 SizeInBits, AggStore, Store,

4317 Store->getPointerOperand(), Store->getValueOperand(),

4318 DL);

4319 } else {

4321 "AT: unexpected debug.assign linked to store through "

4322 "unbounded GEP");

4323 }

4325 }

4326 };

4327

4328 bool visitStoreInst(StoreInst &SI) {

4329 if (SI.isSimple() || SI.getPointerOperand() != *U)

4330 return false;

4331 Value *V = SI.getValueOperand();

4332 if (V->getType()->isSingleValueType())

4333 return false;

4334

4335

4337 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,

4339 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");

4340 Visited.erase(&SI);

4341

4342

4344 SI.eraseFromParent();

4345 return true;

4346 }

4347

4348 bool visitBitCastInst(BitCastInst &BC) {

4349 enqueueUsers(BC);

4350 return false;

4351 }

4352

4353 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {

4354 enqueueUsers(ASC);

4355 return false;

4356 }

4357

4358

4359

4360

4361

4362

4363 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {

4364

4365

4369 if (Sel)

4370 return false;

4371

4372 Sel = SI;

4375 return false;

4376 continue;

4377 }

4379 if (Sel)

4380 return false;

4381 Sel = ZI;

4382 if (!ZI->getSrcTy()->isIntegerTy(1))

4383 return false;

4384 continue;

4385 }

4386

4388 return false;

4389 }

4390

4391 if (!Sel)

4392 return false;

4393

4394 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";

4395 dbgs() << " original: " << *Sel << "\n";

4396 dbgs() << " " << GEPI << "\n";);

4397

4398 auto GetNewOps = [&](Value *SelOp) {

4401 if (Op == Sel)

4403 else

4405 return NewOps;

4406 };

4407

4411 Cond = SI->getCondition();

4412 True = SI->getTrueValue();

4413 False = SI->getFalseValue();

4415 MDFrom = SI;

4416 } else {

4417 Cond = Sel->getOperand(0);

4418 True = ConstantInt::get(Sel->getType(), 1);

4419 False = ConstantInt::get(Sel->getType(), 0);

4420 }

4423

4424 IRB.SetInsertPoint(&GEPI);

4426

4428 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),

4429 True->getName() + ".sroa.gep", NW);

4430

4432 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),

4433 False->getName() + ".sroa.gep", NW);

4434

4435 Value *NSel = MDFrom

4436 ? IRB.CreateSelect(Cond, NTrue, NFalse,

4437 Sel->getName() + ".sroa.sel", MDFrom)

4438 : IRB.CreateSelectWithUnknownProfile(

4440 Sel->getName() + ".sroa.sel");

4441 Visited.erase(&GEPI);

4445 Visited.insert(NSelI);

4446 enqueueUsers(*NSelI);

4447

4449 dbgs() << " " << *NFalse << "\n";

4450 dbgs() << " " << *NSel << "\n";);

4451

4452 return true;

4453 }

4454

4455

4456

4457

4458

4459 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {

4460

4461

4462

4464 auto IsInvalidPointerOperand = [](Value *V) {

4466 return false;

4468 return !AI->isStaticAlloca();

4469 return true;

4470 };

4471 if (Phi) {

4472 if (any_of(Phi->operands(), IsInvalidPointerOperand))

4473 return false;

4474 } else {

4476 return false;

4477 }

4478

4479

4482 if (Phi)

4483 return false;

4484

4486 if (all\_of(Phi->incoming_values(),

4487 [](Value *V) { return isa(V); }))

4488 return false;

4489 continue;

4490 }

4491

4493 return false;

4494 }

4495

4496 if (!Phi)

4497 return false;

4498

4499 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";

4500 dbgs() << " original: " << *Phi << "\n";

4501 dbgs() << " " << GEPI << "\n";);

4502

4503 auto GetNewOps = [&](Value *PhiOp) {

4506 if (Op == Phi)

4508 else

4510 return NewOps;

4511 };

4512

4513 IRB.SetInsertPoint(Phi);

4514 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),

4515 Phi->getName() + ".sroa.phi");

4516

4518

4519

4521 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {

4527 } else {

4529 NewGEP =

4530 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),

4532 }

4534 }

4535

4536 Visited.erase(&GEPI);

4539 Visited.insert(NewPhi);

4540 enqueueUsers(*NewPhi);

4541

4545 << "\n " << *In;

4546 dbgs() << "\n " << *NewPhi << '\n');

4547

4548 return true;

4549 }

4550

4551 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {

4552 if (unfoldGEPSelect(GEPI))

4553 return true;

4554

4555 if (unfoldGEPPhi(GEPI))

4556 return true;

4557

4558 enqueueUsers(GEPI);

4559 return false;

4560 }

4561

4562 bool visitPHINode(PHINode &PN) {

4563 enqueueUsers(PN);

4564 return false;

4565 }

4566

4567 bool visitSelectInst(SelectInst &SI) {

4568 enqueueUsers(SI);

4569 return false;

4570 }

4571};

4572

4573}

4574

4575

4576

4577

4578

4579

4581 if (Ty->isSingleValueType())

4582 return Ty;

4583

4584 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();

4586

4587 Type *InnerTy;

4589 InnerTy = ArrTy->getElementType();

4593 InnerTy = STy->getElementType(Index);

4594 } else {

4595 return Ty;

4596 }

4597

4598 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||

4599 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())

4600 return Ty;

4601

4603}

4604

4605

4606

4607

4608

4609

4610

4611

4612

4613

4614

4615

4616

4617

4620 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)

4622 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||

4623 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)

4624 return nullptr;

4625

4627 Type *ElementTy;

4630 ElementTy = AT->getElementType();

4631 TyNumElements = AT->getNumElements();

4632 } else {

4633

4634

4636 ElementTy = VT->getElementType();

4637 TyNumElements = VT->getNumElements();

4638 }

4639 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();

4641 if (NumSkippedElements >= TyNumElements)

4642 return nullptr;

4643 Offset -= NumSkippedElements * ElementSize;

4644

4645

4646 if (Offset > 0 || Size < ElementSize) {

4647

4649 return nullptr;

4650

4652 }

4654

4655 if (Size == ElementSize)

4659 if (NumElements * ElementSize != Size)

4660 return nullptr;

4662 }

4663

4665 if (!STy)

4666 return nullptr;

4667

4669

4671 return nullptr;

4672

4674 return nullptr;

4677 return nullptr;

4678

4681

4683 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();

4684 if (Offset >= ElementSize)

4685 return nullptr;

4686

4687

4688 if (Offset > 0 || Size < ElementSize) {

4690 return nullptr;

4692 }

4694

4695 if (Size == ElementSize)

4697

4702 if (Index == EndIndex)

4703 return nullptr;

4704

4705

4706

4707

4708

4710 return nullptr;

4711

4712 assert(Index < EndIndex);

4714 }

4715

4716

4719 const StructLayout *SubSL = DL.getStructLayout(SubTy);

4721 return nullptr;

4722

4723 return SubTy;

4724}

4725

4726

4727

4728

4729

4730

4731

4732

4733

4734

4735

4736

4737

4738

4739

4740

4741

4742

4743

4744

4745

4746

4747

4748

4749

4750

4751bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {

4752 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");

4753

4754

4755

4756

4757

4760

4761

4762

4763

4764

4765 struct SplitOffsets {

4766 Slice *S;

4767 std::vector<uint64_t> Splits;

4768 };

4769 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;

4770

4771

4772

4773

4774

4775

4776

4777

4778

4779

4780

4781

4782 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;

4783

4784 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");

4785 for (auto &P : AS.partitions()) {

4786 for (Slice &S : P) {

4788 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {

4789

4790

4791

4793 UnsplittableLoads.insert(LI);

4796 UnsplittableLoads.insert(LI);

4797 continue;

4798 }

4799 assert(P.endOffset() > S.beginOffset() &&

4800 "Empty or backwards partition!");

4801

4802

4804 assert(!LI->isVolatile() && "Cannot split volatile loads!");

4805

4806

4807

4808

4809 auto IsLoadSimplyStored = [](LoadInst *LI) {

4810 for (User *LU : LI->users()) {

4812 if (!SI || SI->isSimple())

4813 return false;

4814 }

4815 return true;

4816 };

4817 if (!IsLoadSimplyStored(LI)) {

4818 UnsplittableLoads.insert(LI);

4819 continue;

4820 }

4821

4824 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))

4825

4826 continue;

4828 if (!StoredLoad || !StoredLoad->isSimple())

4829 continue;

4830 assert(SI->isVolatile() && "Cannot split volatile stores!");

4831

4833 } else {

4834

4835 continue;

4836 }

4837

4838

4840 auto &Offsets = SplitOffsetsMap[I];

4842 "Should not have splits the first time we see an instruction!");

4844 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());

4845 }

4846

4847

4848

4849 for (Slice *S : P.splitSliceTails()) {

4850 auto SplitOffsetsMapI =

4852 if (SplitOffsetsMapI == SplitOffsetsMap.end())

4853 continue;

4854 auto &Offsets = SplitOffsetsMapI->second;

4855

4856 assert(Offsets.S == S && "Found a mismatched slice!");

4858 "Cannot have an empty set of splits on the second partition!");

4860 P.beginOffset() - Offsets.S->beginOffset() &&

4861 "Previous split does not end where this one begins!");

4862

4863

4864

4865 if (S->endOffset() > P.endOffset())

4866 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());

4867 }

4868 }

4869

4870

4871

4872

4873

4874 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {

4875

4876

4878

4879

4880 if (UnsplittableLoads.count(LI))

4881 return true;

4882

4883 auto LoadOffsetsI = SplitOffsetsMap.find(LI);

4884 if (LoadOffsetsI == SplitOffsetsMap.end())

4885 return false;

4886 auto &LoadOffsets = LoadOffsetsI->second;

4887

4888

4889 auto &StoreOffsets = SplitOffsetsMap[SI];

4890

4891

4892

4893

4894 if (LoadOffsets.Splits == StoreOffsets.Splits)

4895 return false;

4896

4897 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"

4898 << " " << *LI << "\n"

4899 << " " << *SI << "\n");

4900

4901

4902

4903

4904

4905 UnsplittableLoads.insert(LI);

4906 return true;

4907 });

4908

4909

4910

4911

4912 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {

4914 return UnsplittableLoads.count(LI);

4915 });

4916

4917

4918 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {

4919 return UnsplittableLoads.count(LI);

4920 });

4921

4922

4923

4924 if (Loads.empty() && Stores.empty())

4925 return false;

4926

4927

4928

4929 IRBuilderTy IRB(&AI);

4930

4931

4933

4934

4935

4936 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;

4937

4938

4939

4940

4941

4942

4943

4944

4945

4946 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;

4947 std::vector<LoadInst *> SplitLoads;

4948 const DataLayout &DL = AI.getDataLayout();

4949 for (LoadInst *LI : Loads) {

4950 SplitLoads.clear();

4951

4952 auto &Offsets = SplitOffsetsMap[LI];

4953 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();

4955 "Load must have type size equal to store size");

4957 "Load must be >= slice size");

4958

4959 uint64_t BaseOffset = Offsets.S->beginOffset();

4960 assert(BaseOffset + SliceSize > BaseOffset &&

4961 "Cannot represent alloca access size using 64-bit integers!");

4962

4964 IRB.SetInsertPoint(LI);

4965

4966 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");

4967

4968 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();

4969 int Idx = 0, Size = Offsets.Splits.size();

4970 for (;;) {

4971 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);

4974 LoadInst *PLoad = IRB.CreateAlignedLoad(

4975 PartTy,

4977 APInt(DL.getIndexSizeInBits(AS), PartOffset),

4978 PartPtrTy, BasePtr->getName() + "."),

4980 false, LI->getName());

4981 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,

4982 LLVMContext::MD_access_group});

4983

4984

4985

4986 SplitLoads.push_back(PLoad);

4987

4988

4990 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,

4992 false, nullptr));

4993 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()

4994 << ", " << NewSlices.back().endOffset()

4995 << "): " << *PLoad << "\n");

4996

4997

4998 if (Idx >= Size)

4999 break;

5000

5001

5002 PartOffset = Offsets.Splits[Idx];

5003 ++Idx;

5004 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;

5005 }

5006

5007

5008

5009

5010 bool DeferredStores = false;

5011 for (User *LU : LI->users()) {

5013 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {

5014 DeferredStores = true;

5015 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI

5016 << "\n");

5017 continue;

5018 }

5019

5020 Value *StoreBasePtr = SI->getPointerOperand();

5021 IRB.SetInsertPoint(SI);

5022 AAMDNodes AATags = SI->getAAMetadata();

5023

5024 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");

5025

5026 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {

5027 LoadInst *PLoad = SplitLoads[Idx];

5028 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];

5029 auto *PartPtrTy = SI->getPointerOperandType();

5030

5031 auto AS = SI->getPointerAddressSpace();

5032 StoreInst *PStore = IRB.CreateAlignedStore(

5033 PLoad,

5035 APInt(DL.getIndexSizeInBits(AS), PartOffset),

5036 PartPtrTy, StoreBasePtr->getName() + "."),

5038 false);

5039 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,

5040 LLVMContext::MD_access_group,

5041 LLVMContext::MD_DIAssignID});

5042

5043 if (AATags)

5046 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");

5047 }

5048

5049

5050

5051

5052

5054 ResplitPromotableAllocas.insert(OtherAI);

5055 Worklist.insert(OtherAI);

5058 Worklist.insert(OtherAI);

5059 }

5060

5061

5062 DeadInsts.push_back(SI);

5063 }

5064

5065

5066 if (DeferredStores)

5067 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));

5068

5069

5070 DeadInsts.push_back(LI);

5072 }

5073

5074

5075

5076

5077

5078

5079 for (StoreInst *SI : Stores) {

5083 uint64_t StoreSize = Ty->getBitWidth() / 8;

5084 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");

5085

5086 auto &Offsets = SplitOffsetsMap[SI];

5088 "Slice size should always match load size exactly!");

5089 uint64_t BaseOffset = Offsets.S->beginOffset();

5090 assert(BaseOffset + StoreSize > BaseOffset &&

5091 "Cannot represent alloca access size using 64-bit integers!");

5092

5095

5096 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");

5097

5098

5099 auto SplitLoadsMapI = SplitLoadsMap.find(LI);

5100 std::vector<LoadInst *> *SplitLoads = nullptr;

5101 if (SplitLoadsMapI != SplitLoadsMap.end()) {

5102 SplitLoads = &SplitLoadsMapI->second;

5103 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&

5104 "Too few split loads for the number of splits in the store!");

5105 } else {

5107 }

5108

5109 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();

5110 int Idx = 0, Size = Offsets.Splits.size();

5111 for (;;) {

5112 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);

5114 auto *StorePartPtrTy = SI->getPointerOperandType();

5115

5116

5117 LoadInst *PLoad;

5118 if (SplitLoads) {

5119 PLoad = (*SplitLoads)[Idx];

5120 } else {

5121 IRB.SetInsertPoint(LI);

5123 PLoad = IRB.CreateAlignedLoad(

5124 PartTy,

5126 APInt(DL.getIndexSizeInBits(AS), PartOffset),

5127 LoadPartPtrTy, LoadBasePtr->getName() + "."),

5129 false, LI->getName());

5130 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,

5131 LLVMContext::MD_access_group});

5132 }

5133

5134

5135 IRB.SetInsertPoint(SI);

5136 auto AS = SI->getPointerAddressSpace();

5137 StoreInst *PStore = IRB.CreateAlignedStore(

5138 PLoad,

5140 APInt(DL.getIndexSizeInBits(AS), PartOffset),

5141 StorePartPtrTy, StoreBasePtr->getName() + "."),

5143 false);

5144 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,

5145 LLVMContext::MD_access_group});

5146

5147

5148

5149

5151 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,

5153 false, nullptr));

5154 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()

5155 << ", " << NewSlices.back().endOffset()

5156 << "): " << *PStore << "\n");

5157 if (!SplitLoads) {

5158 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");

5159 }

5160

5161

5162 if (Idx >= Size)

5163 break;

5164

5165

5166 PartOffset = Offsets.Splits[Idx];

5167 ++Idx;

5168 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;

5169 }

5170

5171

5172

5173

5174

5175

5176 if (!SplitLoads) {

5178 assert(OtherAI != &AI && "We can't re-split our own alloca!");

5179 ResplitPromotableAllocas.insert(OtherAI);

5180 Worklist.insert(OtherAI);

5183 assert(OtherAI != &AI && "We can't re-split our own alloca!");

5184 Worklist.insert(OtherAI);

5185 }

5186 }

5187

5188

5189

5190

5191

5192

5193

5194

5195

5196

5198 assert(*LI->user_begin() == SI && "Single use isn't this store!");

5199 DeadInsts.push_back(LI);

5200 }

5201 DeadInsts.push_back(SI);

5203 }

5204

5205

5206 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });

5207

5208

5209

5210 AS.insert(NewSlices);

5211

5213#ifndef NDEBUG

5214 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)

5216#endif

5217

5218

5219

5220 PromotableAllocas.set_subtract(ResplitPromotableAllocas);

5221

5222 return true;

5223}

5224

5225

5226

5227

5228

5229

5230

5231

5232

5233

5234

5235AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,

5236 Partition &P) {

5237

5238

5239

5240 Type *SliceTy = nullptr;

5241 const DataLayout &DL = AI.getDataLayout();

5242 unsigned VScale = AI.getFunction()->getVScaleValue();

5243

5244 std::pair<Type *, IntegerType *> CommonUseTy =

5246

5247 if (CommonUseTy.first) {

5248 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);

5250 SliceTy = CommonUseTy.first;

5251 }

5252

5253 if (!SliceTy)

5255 P.beginOffset(), P.size()))

5256 SliceTy = TypePartitionTy;

5257

5258

5259 if (!SliceTy && CommonUseTy.second)

5260 if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size())

5261 SliceTy = CommonUseTy.second;

5262 if ((!SliceTy || (SliceTy->isArrayTy() &&

5264 DL.isLegalInteger(P.size() * 8)) {

5265 SliceTy = Type::getIntNTy(*C, P.size() * 8);

5266 }

5267

5268 if (!SliceTy)

5269 SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());

5270 assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());

5271

5273

5276 if (VecTy)

5277 SliceTy = VecTy;

5278

5279

5280

5281

5282

5283

5284

5285 AllocaInst *NewAI;

5286 if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {

5287 NewAI = &AI;

5288

5289

5290

5291 } else {

5292

5294

5295

5296 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);

5297 NewAI = new AllocaInst(

5298 SliceTy, AI.getAddressSpace(), nullptr,

5299 IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,

5300 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),

5301 AI.getIterator());

5302

5304 ++NumNewAllocas;

5305 }

5306

5307 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()

5308 << "," << P.endOffset() << ") to: " << *NewAI << "\n");

5309

5310

5311

5312

5313 unsigned PPWOldSize = PostPromotionWorklist.size();

5314 unsigned NumUses = 0;

5315 SmallSetVector<PHINode *, 8> PHIUsers;

5316 SmallSetVector<SelectInst *, 8> SelectUsers;

5317

5318 AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),

5319 P.endOffset(), IsIntegerPromotable, VecTy,

5320 PHIUsers, SelectUsers);

5321 bool Promotable = true;

5322

5323 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {

5324 NumUses += DeletedValues->size() + 1;

5325 for (Value *V : *DeletedValues)

5326 DeadInsts.push_back(V);

5327 } else {

5328 for (Slice *S : P.splitSliceTails()) {

5329 Promotable &= Rewriter.visit(S);

5330 ++NumUses;

5331 }

5332 for (Slice &S : P) {

5333 Promotable &= Rewriter.visit(&S);

5334 ++NumUses;

5335 }

5336 }

5337

5338 NumAllocaPartitionUses += NumUses;

5339 MaxUsesPerAllocaPartition.updateMax(NumUses);

5340

5341

5342

5343 for (PHINode *PHI : PHIUsers)

5345 Promotable = false;

5346 PHIUsers.clear();

5347 SelectUsers.clear();

5348 break;

5349 }

5350

5352 NewSelectsToRewrite;

5353 NewSelectsToRewrite.reserve(SelectUsers.size());

5354 for (SelectInst *Sel : SelectUsers) {

5355 std::optional Ops =

5356 isSafeSelectToSpeculate(*Sel, PreserveCFG);

5357 if (Ops) {

5358 Promotable = false;

5359 PHIUsers.clear();

5360 SelectUsers.clear();

5361 NewSelectsToRewrite.clear();

5362 break;

5363 }

5364 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));

5365 }

5366

5367 if (Promotable) {

5368 for (Use *U : AS.getDeadUsesIfPromotable()) {

5370 Value::dropDroppableUse(*U);

5371 if (OldInst)

5373 DeadInsts.push_back(OldInst);

5374 }

5375 if (PHIUsers.empty() && SelectUsers.empty()) {

5376

5377 PromotableAllocas.insert(NewAI);

5378 } else {

5379

5380

5381

5382 SpeculatablePHIs.insert_range(PHIUsers);

5383 SelectsToRewrite.reserve(SelectsToRewrite.size() +

5384 NewSelectsToRewrite.size());

5386 std::make_move_iterator(NewSelectsToRewrite.begin()),

5387 std::make_move_iterator(NewSelectsToRewrite.end())))

5388 SelectsToRewrite.insert(std::move(KV));

5389 Worklist.insert(NewAI);

5390 }

5391 } else {

5392

5393 while (PostPromotionWorklist.size() > PPWOldSize)

5394 PostPromotionWorklist.pop_back();

5395

5396

5397

5398 if (NewAI == &AI)

5399 return nullptr;

5400

5401

5402

5403

5404 Worklist.insert(NewAI);

5405 }

5406

5407 return NewAI;

5408}

5409

5410

5411

5417

5423

5424

5425

5426

5427

5428

5429

5430

5431

5432

5433

5434

5435

5436

5437

5438

5439

5440

5441

5442

5443

5444

5445

5446

5447

5448

5451 int64_t BitExtractOffset) {

5453 bool HasFragment = false;

5454 bool HasBitExtract = false;

5455

5458 HasFragment = true;

5459 continue;

5460 }

5463 HasBitExtract = true;

5464 int64_t ExtractOffsetInBits = Op.getArg(0);

5465 int64_t ExtractSizeInBits = Op.getArg(1);

5466

5467

5468

5469

5470

5472 return nullptr;

5473

5474 assert(BitExtractOffset <= 0);

5475 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;

5476

5477

5478

5479

5480

5481 if (AdjustedOffset < 0)

5482 return nullptr;

5483

5484 Ops.push_back(Op.getOp());

5485 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));

5486 Ops.push_back(ExtractSizeInBits);

5487 continue;

5488 }

5489 Op.appendToVector(Ops);

5490 }

5491

5492

5493

5494 if (HasFragment && HasBitExtract)

5495 return nullptr;

5496

5497 if (!HasBitExtract) {

5501 }

5503}

5504

5505

5506

5507

5508

5509

5510

5511

5512

5513static void

5516 std::optionalDIExpression::FragmentInfo NewFragment,

5517 int64_t BitExtractAdjustment) {

5518 (void)DIB;

5519

5520

5521

5522

5525 if (NewFragment)

5527 BitExtractAdjustment);

5528 if (!NewFragmentExpr)

5529 return;

5530

5534 BeforeInst->getParent()->insertDbgRecordBefore(DVR,

5536 return;

5537 }

5538

5542

5543

5544

5547 BeforeInst->getParent()->insertDbgRecordBefore(DVR,

5549 return;

5550 }

5551

5552

5553 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {

5554 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,

5556 }

5557

5559 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,

5561 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");

5562 (void)NewAssign;

5563}

5564

5565

5566

5567bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {

5568 if (AS.begin() == AS.end())

5569 return false;

5570

5571 unsigned NumPartitions = 0;

5573 const DataLayout &DL = AI.getModule()->getDataLayout();

5574

5575

5576 Changed |= presplitLoadsAndStores(AI, AS);

5577

5578

5579

5580

5581

5582

5583

5584 bool IsSorted = true;

5585

5586 uint64_t AllocaSize =

5587 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();

5588 const uint64_t MaxBitVectorSize = 1024;

5589 if (AllocaSize <= MaxBitVectorSize) {

5590

5591

5592 SmallBitVector SplittableOffset(AllocaSize + 1, true);

5593 for (Slice &S : AS)

5594 for (unsigned O = S.beginOffset() + 1;

5595 O < S.endOffset() && O < AllocaSize; O++)

5596 SplittableOffset.reset(O);

5597

5598 for (Slice &S : AS) {

5599 if (!S.isSplittable())

5600 continue;

5601

5602 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&

5603 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))

5604 continue;

5605

5608 S.makeUnsplittable();

5609 IsSorted = false;

5610 }

5611 }

5612 } else {

5613

5614

5615 for (Slice &S : AS) {

5616 if (!S.isSplittable())

5617 continue;

5618

5619 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)

5620 continue;

5621

5624 S.makeUnsplittable();

5625 IsSorted = false;

5626 }

5627 }

5628 }

5629

5630 if (!IsSorted)

5632

5633

5634

5635 struct Fragment {

5636 AllocaInst *Alloca;

5638 uint64_t Size;

5639 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)

5641 };

5643

5644

5645 for (auto &P : AS.partitions()) {

5646 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {

5647 Changed = true;

5648 if (NewAI != &AI) {

5649 uint64_t SizeOfByte = 8;

5650 uint64_t AllocaSize =

5651 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();

5652

5653 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);

5654 Fragments.push_back(

5655 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));

5656 }

5657 }

5658 ++NumPartitions;

5659 }

5660

5661 NumAllocaPartitions += NumPartitions;

5662 MaxPartitionsPerAlloca.updateMax(NumPartitions);

5663

5664

5665

5666 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {

5667

5669 return;

5670

5671 const Value *DbgPtr = DbgVariable->getAddress();

5673 DbgVariable->getFragmentOrEntireVariable();

5674

5675

5676 int64_t CurrentExprOffsetInBytes = 0;

5677 SmallVector<uint64_t> PostOffsetOps;

5679 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))

5680 return;

5681

5682

5683 int64_t ExtractOffsetInBits = 0;

5687 ExtractOffsetInBits = Op.getArg(0);

5688 break;

5689 }

5690 }

5691

5692 DIBuilder DIB(*AI.getModule(), false);

5693 for (auto Fragment : Fragments) {

5694 int64_t OffsetFromLocationInBits;

5695 std::optionalDIExpression::FragmentInfo NewDbgFragment;

5696

5697

5698

5700 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,

5701 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,

5702 NewDbgFragment, OffsetFromLocationInBits))

5703 continue;

5704

5705

5706

5707

5708 if (NewDbgFragment && !NewDbgFragment->SizeInBits)

5709 continue;

5710

5711

5712

5713 if (!NewDbgFragment)

5714 NewDbgFragment = DbgVariable->getFragment();

5715

5716

5717

5718 int64_t OffestFromNewAllocaInBits =

5719 OffsetFromLocationInBits - ExtractOffsetInBits;

5720

5721

5722 int64_t BitExtractOffset =

5723 std::min<int64_t>(0, OffestFromNewAllocaInBits);

5724

5725

5726

5727

5728 OffestFromNewAllocaInBits =

5729 std::max(int64_t(0), OffestFromNewAllocaInBits);

5730

5731

5732

5733

5734

5735 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);

5736 if (OffestFromNewAllocaInBits > 0) {

5737 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;

5739 }

5740

5741

5742

5743 auto RemoveOne = [DbgVariable](auto *OldDII) {

5744 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {

5745 return LHS->getVariable() == RHS->getVariable() &&

5746 LHS->getDebugLoc()->getInlinedAt() ==

5747 RHS->getDebugLoc()->getInlinedAt();

5748 };

5749 if (SameVariableFragment(OldDII, DbgVariable))

5750 OldDII->eraseFromParent();

5751 };

5754 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,

5755 NewDbgFragment, BitExtractOffset);

5756 }

5757 };

5758

5759

5760

5764

5766}

5767

5768

5769void SROA::clobberUse(Use &U) {

5771

5773

5774

5775

5776

5779 DeadInsts.push_back(OldI);

5780 }

5781}

5782

5783

5785public:

5792

5796

5797private:

5798 Type *ZeroType;

5799};

5800

5801bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {

5802

5803

5804

5805

5806 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");

5807 bool AllSameAndValid = true;

5808 Type *PartitionType = nullptr;

5810 uint64_t BeginOffset = 0;

5811 uint64_t EndOffset = 0;

5812

5813 auto Flush = [&]() {

5814 if (AllSameAndValid && !Insts.empty()) {

5815 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "

5816 << EndOffset << ")\n");

5818 SSAUpdater SSA(&NewPHIs);

5820 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);

5821 Promoter.run(Insts);

5822 }

5823 AllSameAndValid = true;

5824 PartitionType = nullptr;

5826 };

5827

5828 for (Slice &S : AS) {

5832 dbgs() << "Ignoring slice: ";

5833 AS.print(dbgs(), &S);

5834 });

5835 continue;

5836 }

5837 if (S.beginOffset() >= EndOffset) {

5838 Flush();

5839 BeginOffset = S.beginOffset();

5840 EndOffset = S.endOffset();

5841 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {

5842 if (AllSameAndValid) {

5844 dbgs() << "Slice does not match range [" << BeginOffset << ", "

5845 << EndOffset << ")";

5846 AS.print(dbgs(), &S);

5847 });

5848 AllSameAndValid = false;

5849 }

5850 EndOffset = std::max(EndOffset, S.endOffset());

5851 continue;

5852 }

5853

5856

5857 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))

5858 AllSameAndValid = false;

5859 PartitionType = UserTy;

5862 Type *UserTy = SI->getValueOperand()->getType();

5863 if (SI->isSimple() || (PartitionType && UserTy != PartitionType))

5864 AllSameAndValid = false;

5865 PartitionType = UserTy;

5867 } else {

5868 AllSameAndValid = false;

5869 }

5870 }

5871

5872 Flush();

5873 return true;

5874}

5875

5876

5877

5878

5879

5880

5881std::pair<bool , bool >

5882SROA::runOnAlloca(AllocaInst &AI) {

5884 bool CFGChanged = false;

5885

5886 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");

5887 ++NumAllocasAnalyzed;

5888

5889

5890 if (AI.use_empty()) {

5891 AI.eraseFromParent();

5893 return {Changed, CFGChanged};

5894 }

5895 const DataLayout &DL = AI.getDataLayout();

5896

5897

5898 auto *AT = AI.getAllocatedType();

5899 TypeSize Size = DL.getTypeAllocSize(AT);

5900 if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||

5901 Size.getFixedValue() == 0)

5902 return {Changed, CFGChanged};

5903

5904

5905

5906 IRBuilderTy IRB(&AI);

5907 AggLoadStoreRewriter AggRewriter(DL, IRB);

5908 Changed |= AggRewriter.rewrite(AI);

5909

5910

5911 AllocaSlices AS(DL, AI);

5913 if (AS.isEscaped())

5914 return {Changed, CFGChanged};

5915

5916 if (AS.isEscapedReadOnly()) {

5917 Changed |= propagateStoredValuesToLoads(AI, AS);

5918 return {Changed, CFGChanged};

5919 }

5920

5921 for (auto &P : AS.partitions()) {

5922

5923

5924

5925

5926

5927 std::optional<Value *> ProtectedFieldDisc;

5928 auto SliceHasMismatch = [&](Slice &S) {

5930 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||

5931 II->getIntrinsicID() == Intrinsic::lifetime_end)

5932 return false;

5933 if (!ProtectedFieldDisc)

5934 ProtectedFieldDisc = S.ProtectedFieldDisc;

5935 return *ProtectedFieldDisc != S.ProtectedFieldDisc;

5936 };

5937 for (Slice &S : P)

5938 if (SliceHasMismatch(S))

5939 return {Changed, CFGChanged};

5940 for (Slice *S : P.splitSliceTails())

5941 if (SliceHasMismatch(*S))

5942 return {Changed, CFGChanged};

5943 }

5944

5945

5946 for (Instruction *DeadUser : AS.getDeadUsers()) {

5947

5948 for (Use &DeadOp : DeadUser->operands())

5949 clobberUse(DeadOp);

5950

5951

5952 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));

5953

5954

5955 DeadInsts.push_back(DeadUser);

5957 }

5958 for (Use *DeadOp : AS.getDeadOperands()) {

5959 clobberUse(*DeadOp);

5961 }

5962 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {

5963 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));

5964

5965 DeadInsts.push_back(PFPUser);

5967 }

5968

5969

5970 if (AS.begin() == AS.end())

5971 return {Changed, CFGChanged};

5972

5973 Changed |= splitAlloca(AI, AS);

5974

5976 while (!SpeculatablePHIs.empty())

5978

5980 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();

5981 while (!RemainingSelectsToRewrite.empty()) {

5982 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();

5983 CFGChanged |=

5985 }

5986

5987 return {Changed, CFGChanged};

5988}

5989

5990

5991

5992

5993

5994

5995

5996

5997

5998

5999bool SROA::deleteDeadInstructions(

6000 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {

6002 while (!DeadInsts.empty()) {

6004 if (I)

6005 continue;

6006 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");

6007

6008

6009

6010

6012 DeletedAllocas.insert(AI);

6014 OldDII->eraseFromParent();

6015 }

6016

6019

6020 for (Use &Operand : I->operands())

6022

6023 Operand = nullptr;

6025 DeadInsts.push_back(U);

6026 }

6027

6028 ++NumDeleted;

6029 I->eraseFromParent();

6031 }

6033}

6034

6035

6036

6037

6038

6039bool SROA::promoteAllocas() {

6040 if (PromotableAllocas.empty())

6041 return false;

6042

6044 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");

6045 } else {

6046 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");

6047 NumPromoted += PromotableAllocas.size();

6048 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);

6049 }

6050

6051 PromotableAllocas.clear();

6052 return true;

6053}

6054

6055std::pair<bool , bool > SROA::runSROA(Function &F) {

6056 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");

6057

6058 const DataLayout &DL = F.getDataLayout();

6059 BasicBlock &EntryBB = F.getEntryBlock();

6061 I != E; ++I) {

6063 if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&

6065 PromotableAllocas.insert(AI);

6066 else

6067 Worklist.insert(AI);

6068 }

6069 }

6070

6072 bool CFGChanged = false;

6073

6074

6075 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;

6076

6077 do {

6078 while (!Worklist.empty()) {

6079 auto [IterationChanged, IterationCFGChanged] =

6080 runOnAlloca(*Worklist.pop_back_val());

6081 Changed |= IterationChanged;

6082 CFGChanged |= IterationCFGChanged;

6083

6084 Changed |= deleteDeadInstructions(DeletedAllocas);

6085

6086

6087

6088 if (!DeletedAllocas.empty()) {

6089 Worklist.set_subtract(DeletedAllocas);

6090 PostPromotionWorklist.set_subtract(DeletedAllocas);

6091 PromotableAllocas.set_subtract(DeletedAllocas);

6092 DeletedAllocas.clear();

6093 }

6094 }

6095

6096 Changed |= promoteAllocas();

6097

6098 Worklist = PostPromotionWorklist;

6099 PostPromotionWorklist.clear();

6100 } while (!Worklist.empty());

6101

6102 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");

6104 "Should not have modified the CFG when told to preserve it.");

6105

6107 for (auto &BB : F) {

6109 }

6110 }

6111

6112 return {Changed, CFGChanged};

6113}

6114

6118 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

6119 auto [Changed, CFGChanged] =

6120 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);

6124 if (!CFGChanged)

6127 return PA;

6128}

6129

6133 OS, MapClassName2PassName);

6135 : "");

6136}

6137

6139

6140namespace {

6141

6142

6145

6146public:

6147 static char ID;

6148

6152 }

6153

6155 if (skipFunction(F))

6156 return false;

6157

6158 DominatorTree &DT = getAnalysis().getDomTree();

6160 getAnalysis().getAssumptionCache(F);

6161 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

6163 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);

6165 }

6166

6167 void getAnalysisUsage(AnalysisUsage &AU) const override {

6168 AU.addRequired();

6169 AU.addRequired();

6171 AU.addPreserved();

6172 }

6173

6174 StringRef getPassName() const override { return "SROA"; }

6175};

6176

6177}

6178

6179char SROALegacyPass::ID = 0;

6180

6185

6187 "Scalar Replacement Of Aggregates", false, false)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

#define LLVM_DUMP_METHOD

Mark debug helper function definitions like dump() that should not be stripped from debug builds.

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

static bool runOnFunction(Function &F, bool PostInlining)

This is the interface for a simple mod/ref and alias analysis over globals.

Module.h This file contains the declarations for the Module class.

This header defines various interfaces for pass management in LLVM.

This defines the Use class.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

print mir2vec MIR2Vec Vocabulary Printer Pass

This file implements a map that provides insertion order iteration.

uint64_t IntrinsicInst * II

PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the PointerIntPair class.

This file provides a collection of visitors which walk the (instruction) uses of a pointer.

const SmallVectorImpl< MachineOperand > & Cond

Remove Loads Into Fake Uses

static unsigned getNumElements(Type *Ty)

bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)

void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)

static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)

Find linked dbg.assign and generate a new one with the correct FragmentInfo.

Definition SROA.cpp:343

static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)

Test whether the given alloca partitioning and range of slices can be promoted to a vector.

Definition SROA.cpp:2376

static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)

Compute the adjusted alignment for a load or store from an offset.

Definition SROA.cpp:1961

static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)

Test whether any vector type in CandidateTys is viable for promotion.

Definition SROA.cpp:2227

static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)

Walk the range of a partitioning looking for a common type to cover this sequence of slices.

Definition SROA.cpp:1527

static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)

Strip aggregate type wrapping.

Definition SROA.cpp:4580

static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)

Definition SROA.cpp:278

static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)

Create or replace an existing fragment in a DIExpression with Frag.

Definition SROA.cpp:5449

static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)

Definition SROA.cpp:2619

static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)

Test whether the given slice use can be promoted to a vector.

Definition SROA.cpp:2152

static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)

Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.

Definition SROA.cpp:1950

static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)

Test whether a slice of an alloca is valid for integer widening.

Definition SROA.cpp:2458

static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)

Definition SROA.cpp:2652

static Value * foldPHINodeOrSelectInst(Instruction &I)

A helper that folds a PHI node or a select.

Definition SROA.cpp:1020

static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)

Definition SROA.cpp:1916

static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)

Definition SROA.cpp:1849

static Value * foldSelectInst(SelectInst &SI)

Definition SROA.cpp:1007

bool isKillAddress(const DbgVariableRecord *DVR)

Definition SROA.cpp:5412

static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)

Definition SROA.cpp:2674

static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)

Test whether the given alloca partition's integer operations can be widened to promotable ones.

Definition SROA.cpp:2553

static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)

Definition SROA.cpp:1667

static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)

Definition SROA.cpp:2332

static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)

Definition SROA.cpp:324

static bool isSafePHIToSpeculate(PHINode &PN)

PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...

Definition SROA.cpp:1593

static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)

Definition SROA.cpp:2594

static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)

Insert a new DbgRecord.

Definition SROA.cpp:5514

static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)

Definition SROA.cpp:1810

static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)

This function takes two vector values and combines them into a single vector by concatenating their e...

Definition SROA.cpp:2748

const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)

Definition SROA.cpp:5418

static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)

Try to find a partition of the aggregate type passed in for a given offset and size.

Definition SROA.cpp:4618

static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)

Test whether we can convert a value from the old to the new type.

Definition SROA.cpp:1971

static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)

Definition SROA.cpp:1748

static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)

Generic routine to convert an SSA value to a value of a different type.

Definition SROA.cpp:2061

This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.

This file implements a set that has insertion order iteration characteristics.

This file implements the SmallBitVector class.

This file defines the SmallPtrSet class.

This file defines the SmallVector class.

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

static SymbolRef::Type getType(const Symbol *Sym)

static unsigned getBitWidth(Type *Ty, const DataLayout &DL)

Returns the bitwidth of the given scalar or pointer type.

Virtual Register Rewriter

Builder for the alloca slices.

Definition SROA.cpp:1032

SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)

Definition SROA.cpp:1052

An iterator over partitions of the alloca's slices.

Definition SROA.cpp:820

bool operator==(const partition_iterator &RHS) const

Definition SROA.cpp:967

friend class AllocaSlices

Definition SROA.cpp:821

partition_iterator & operator++()

Definition SROA.cpp:987

Partition & operator*()

Definition SROA.cpp:992

Class for arbitrary precision integers.

an instruction to allocate memory on the stack

LLVM_ABI bool isStaticAlloca() const

Return true if this alloca is in the entry block of the function and is a constant size.

Align getAlign() const

Return the alignment of the memory that is being allocated by the instruction.

PointerType * getType() const

Overload to return most specific pointer type.

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

AnalysisUsage & addRequired()

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)

This static method is the primary way to construct an ArrayType.

A function analysis which provides an AssumptionCache.

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

Represents analyses that only rely on functions' control flow.

LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const

Return which pointer components this operand may capture.

bool onlyReadsMemory(unsigned OpNo) const

bool isDataOperand(const Use *U) const

This is the shared class of boolean and integer constants.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static DIAssignID * getDistinct(LLVMContext &Context)

LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)

Insert a new llvm.dbg.assign intrinsic call.

iterator_range< expr_op_iterator > expr_ops() const

DbgVariableFragmentInfo FragmentInfo

LLVM_ABI bool startsWithDeref() const

Return whether the first element a DW_OP_deref.

static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)

Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...

static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)

Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...

static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)

Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...

A parsed version of the target data layout string in and methods for querying it.

LLVM_ABI void moveBefore(DbgRecord *MoveBefore)

DebugLoc getDebugLoc() const

void setDebugLoc(DebugLoc Loc)

Record of a variable value-assignment, aka a non instruction representation of the dbg....

LLVM_ABI void setKillAddress()

Kill the address component.

LLVM_ABI bool isKillLocation() const

LocationType getType() const

LLVM_ABI bool isKillAddress() const

Check whether this kills the address component.

LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)

Value * getValue(unsigned OpIdx=0) const

static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)

LLVM_ABI void setAssignId(DIAssignID *New)

DIExpression * getExpression() const

static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)

static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)

DILocalVariable * getVariable() const

LLVM_ABI void setKillLocation()

bool isDbgDeclare() const

void setAddress(Value *V)

DIExpression * getAddressExpression() const

LLVM_ABI DILocation * getInlinedAt() const

Identifies a unique instance of a variable.

ValueT lookup(const_arg_type_t< KeyT > Val) const

lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...

iterator find(const_arg_type_t< KeyT > Val)

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

Class to represent fixed width SIMD vectors.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

unsigned getVScaleValue() const

Return the value for vscale based on the vscale_range attribute or 0 when unknown.

const BasicBlock & getEntryBlock() const

LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const

Accumulate the constant address offset of this GEP if possible.

Value * getPointerOperand()

iterator_range< op_iterator > indices()

Type * getSourceElementType() const

LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const

Get the nowrap flags for the GEP instruction.

This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...

virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

Base class for instruction visitors.

LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY

Return the number of successors that this instruction has.

const DebugLoc & getDebugLoc() const

Return the debug location for this node as a DebugLoc.

LLVM_ABI const Module * getModule() const

Return the module owning the function this instruction belongs to or nullptr it the function does not...

LLVM_ABI void setAAMetadata(const AAMDNodes &N)

Sets the AA metadata on this instruction from the AAMDNodes structure.

bool hasMetadata() const

Return true if this instruction has any metadata attached to it.

LLVM_ABI bool isAtomic() const LLVM_READONLY

Return true if this instruction has an AtomicOrdering of unordered or higher.

LLVM_ABI InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

Instruction * user_back()

Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...

LLVM_ABI const Function * getFunction() const

Return the function this instruction belongs to.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY

Return true if the instruction may have side effects.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

LLVM_ABI AAMDNodes getAAMetadata() const

Returns the AA metadata for this instruction.

void setDebugLoc(DebugLoc Loc)

Set the debug location information for this instruction.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

LLVM_ABI const DataLayout & getDataLayout() const

Get the data layout of the module this instruction belongs to.

Class to represent integer types.

@ MAX_INT_BITS

Maximum number of bits that can be specified.

unsigned getBitWidth() const

Get the number of bits in this IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

An instruction for reading from memory.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

void setAlignment(Align Align)

Value * getPointerOperand()

bool isVolatile() const

Return true if this is a load from a volatile memory location.

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this load instruction.

AtomicOrdering getOrdering() const

Returns the ordering constraint of this load instruction.

Type * getPointerOperandType() const

static unsigned getPointerOperandIndex()

SyncScope::ID getSyncScopeID() const

Returns the synchronization scope ID of this load instruction.

Align getAlign() const

Return the alignment of the access that is being performed.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

LLVMContext & getContext() const

LLVM_ABI StringRef getName() const

Return the name of the corresponding LLVM basic block, or an empty string.

This is the common base class for memset/memcpy/memmove.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

op_range incoming_values()

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

Value * getIncomingValue(unsigned i) const

Return incoming value number x.

int getBasicBlockIndex(const BasicBlock *BB) const

Return the first index of the specified basic block in the value list for this PHI.

unsigned getNumIncomingValues() const

Return the number of incoming edges.

static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

PointerIntPair - This class implements a pair of a pointer and small integer.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

PreservedAnalyses & preserve()

Mark an analysis as preserved.

PtrUseVisitor(const DataLayout &DL)

PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)

Run the pass over the function.

Definition SROA.cpp:6115

void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)

Definition SROA.cpp:6130

SROAPass(SROAOptions PreserveCFG)

If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...

Definition SROA.cpp:6138

Helper class for SSA formation on a set of values defined in multiple blocks.

This class represents the LLVM 'select' instruction.

A vector that has set insertion semantics.

size_type size() const

Determine the number of elements in the SetVector.

void clear()

Completely clear the SetVector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

bool erase(PtrType Ptr)

Remove pointer from the set.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void reserve(size_type N)

typename SuperClass::const_iterator const_iterator

typename SuperClass::iterator iterator

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

void setAlignment(Align Align)

Value * getValueOperand()

static unsigned getPointerOperandIndex()

Value * getPointerOperand()

void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)

Sets the ordering constraint and the synchronization scope ID of this store instruction.

StringRef - Represent a constant reference to a string, i.e.

static constexpr size_t npos

constexpr StringRef substr(size_t Start, size_t N=npos) const

Return a reference to the substring from [Start, Start + N).

size_t rfind(char C, size_t From=npos) const

Search for the last character C in the string.

size_t find(char C, size_t From=0) const

Search for the first character C in the string.

LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const

Find the first character in the string that is not C or npos if not found.

Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...

TypeSize getSizeInBytes() const

LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const

Given a valid byte offset into the structure, returns the structure index that contains it.

TypeSize getElementOffset(unsigned Idx) const

TypeSize getSizeInBits() const

Class to represent struct types.

static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)

This static method is the primary way to create a literal StructType.

element_iterator element_end() const

element_iterator element_begin() const

Type * getElementType(unsigned N) const

Type::subtype_iterator element_iterator

Target - Wrapper for Target specific information.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

static constexpr TypeSize getFixed(ScalarTy ExactSize)

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getIntegerBitWidth() const

bool isArrayTy() const

True if this is an instance of ArrayType.

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

bool isPointerTy() const

True if this is an instance of PointerType.

Type * getArrayElementType() const

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

bool isSingleValueType() const

Return true if the type is a valid type for a register in codegen.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

bool isStructTy() const

True if this is an instance of StructType.

bool isTargetExtTy() const

Return true if this is a target extension type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const

Strip off pointer casts and inbounds GEPs.

iterator_range< user_iterator > users()

LLVM_ABI void dropDroppableUsesIn(User &Usr)

Remove every use of this value in User that can safely be removed.

LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const

Accumulate the constant offset this value has compared to a base pointer.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

iterator_range< use_iterator > uses()

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)

This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...

static LLVM_ABI bool isValidElementType(Type *ElemTy)

Return true if the specified type is valid as a element type.

constexpr ScalarTy getFixedValue() const

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr bool isFixed() const

Returns true if the quantity is not scaled by vscale.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

An efficient, type-erasing, non-owning reference to a callable.

const ParentTy * getParent() const

self_iterator getIterator()

CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...

A range adaptor for a pair of iterators.

This class implements an extremely fast bulk output stream that can only output to a stream.

This provides a very simple, boring adaptor for a begin and end iterator into a range type.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr char IsVolatile[]

Key for Kernel::Arg::Metadata::mIsVolatile.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Tail

Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

Offsets

Offsets in bytes from the start of the input buffer.

SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)

Return a range of dbg_assign records for which Inst performs the assignment they encode.

LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)

Delete the llvm.dbg.assign intrinsics linked to Inst.

initializer< Ty > init(const Ty &Val)

@ DW_OP_LLVM_extract_bits_zext

Only used in LLVM metadata.

@ DW_OP_LLVM_fragment

Only used in LLVM metadata.

@ DW_OP_LLVM_extract_bits_sext

Only used in LLVM metadata.

@ User

could "use" a pointer

NodeAddr< PhiNode * > Phi

NodeAddr< UseNode * > Use

Context & getContext() const

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI iterator begin() const

This is an optimization pass for GlobalISel generic memory operations.

static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)

Disable running mem2reg during SROA in order to test or debug SROA.

void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)

bool operator<(int64_t V1, const APSInt &V2)

FunctionAddr VTableAddr Value

void stable_sort(R &&Range)

LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)

Try to remove redundant dbg.value instructions from given basic block.

UnaryFunction for_each(R &&Range, UnaryFunction F)

Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)

Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...

LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)

Return true if it is an intrinsic that cannot be speculated but also cannot trap.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

auto successors(const MachineBasicBlock *BB)

bool operator!=(uint64_t V1, const APInt &V2)

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

auto unique(Range &&R, Predicate P)

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)

Return true if this alloca is legal for promotion.

auto dyn_cast_or_null(const Y &Val)

void erase(Container &C, ValueType V)

Wrapper function to remove a value from a container:

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)

Return true if the result produced by the instruction is not used, and the instruction will return.

bool capturesFullProvenance(CaptureComponents CC)

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)

SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...

LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)

Return true if we know that executing a load from this value cannot trap.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)

As above, for DVRValues.

LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)

This function calls abort(), and prints the optional message to stderr.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

constexpr int PoisonMaskElem

iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)

Return true if assignment tracking is enabled for module M.

DWARFExpression::Operation Op

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)

Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))

LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)

Split the containing block at the specified instruction - everything before SplitBefore stays in the ...

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)

Definition SROA.cpp:6181

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

A collection of metadata nodes that might be associated with a memory access used by the alias-analys...

AAMDNodes shift(size_t Offset) const

Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...

LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)

Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.

This struct is a compact representation of a valid (non-zero power of two) alignment.

Describes an element of a Bitfield.

static Bitfield::Type get(StorageType Packed)

Unpacks the field from the Packed value.

static void set(StorageType &Packed, typename Bitfield::Type Value)

Sets the typed value in the provided Packed value.

A CRTP mix-in to automatically provide informational APIs needed for passes.