LLVM: lib/CodeGen/InterleavedLoadCombinePass.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

42

43#include

44#include

45#include

46

47using namespace llvm;

48

49#define DEBUG_TYPE "interleaved-load-combine"

50

51namespace {

52

53

54STATISTIC(NumInterleavedLoadCombine, "Number of combined loads");

55

56

57static cl::opt DisableInterleavedLoadCombine(

59 cl::desc("Disable combining of interleaved loads"));

60

61struct VectorInfo;

62

63struct InterleavedLoadCombineImpl {

64public:

68 : F(F), DT(DT), MSSA(MSSA),

70

71

72

73 bool run();

74

75private:

76

78

79

81

82

84

85

87

88

90

91

92

93 LoadInst *findFirstLoad(const std::set<LoadInst *> &LIs);

94

95

96

97

98 bool combine(std::list &InterleavedLoad,

100

101

102

103 bool findPattern(std::list &Candidates,

104 std::list &InterleavedLoad, unsigned Factor,

106};

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164class Polynomial {

165

166 enum BOps {

167 LShr,

169 SExt,

170 Trunc,

171 };

172

173

174 unsigned ErrorMSBs = (unsigned)-1;

175

176

177 Value *V = nullptr;

178

179

181

182

184

185public:

186 Polynomial(Value *V) : V(V) {

188 if (Ty) {

189 ErrorMSBs = 0;

190 this->V = V;

191 A = APInt(Ty->getBitWidth(), 0);

192 }

193 }

194

195 Polynomial(const APInt &A, unsigned ErrorMSBs = 0)

196 : ErrorMSBs(ErrorMSBs), A(A) {}

197

198 Polynomial(unsigned BitWidth, uint64_t A, unsigned ErrorMSBs = 0)

199 : ErrorMSBs(ErrorMSBs), A(BitWidth, A) {}

200

201 Polynomial() = default;

202

203

204 void incErrorMSBs(unsigned amt) {

205 if (ErrorMSBs == (unsigned)-1)

206 return;

207

208 ErrorMSBs += amt;

209 if (ErrorMSBs > A.getBitWidth())

210 ErrorMSBs = A.getBitWidth();

211 }

212

213

214 void decErrorMSBs(unsigned amt) {

215 if (ErrorMSBs == (unsigned)-1)

216 return;

217

218 if (ErrorMSBs > amt)

219 ErrorMSBs -= amt;

220 else

221 ErrorMSBs = 0;

222 }

223

224

225 Polynomial &add(const APInt &C) {

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242 if (C.getBitWidth() != A.getBitWidth()) {

244 return *this;

245 }

246

247 A += C;

248 return *this;

249 }

250

251

252 Polynomial &mul(const APInt &C) {

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303 if (C.getBitWidth() != A.getBitWidth()) {

305 return *this;

306 }

307

308

309 if (C.isOne()) {

310 return *this;

311 }

312

313

314 if (C.isZero()) {

315 ErrorMSBs = 0;

316 deleteB();

317 }

318

319

320

321 decErrorMSBs(C.countr_zero());

322

323 A *= C;

324 pushBOperation(Mul, C);

325 return *this;

326 }

327

328

329 Polynomial &lshr(const APInt &C) {

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460 if (C.getBitWidth() != A.getBitWidth()) {

462 return *this;

463 }

464

465 if (C.isZero())

466 return *this;

467

468

469 unsigned shiftAmt = C.getZExtValue();

470 if (shiftAmt >= C.getBitWidth())

471 return mul(APInt(C.getBitWidth(), 0));

472

473

474

475

476

477

478 if (A.countr_zero() < shiftAmt)

479 ErrorMSBs = A.getBitWidth();

480 else

481 incErrorMSBs(shiftAmt);

482

483

484 pushBOperation(LShr, C);

485 A = A.lshr(shiftAmt);

486

487 return *this;

488 }

489

490

491 Polynomial &sextOrTrunc(unsigned n) {

492 if (n < A.getBitWidth()) {

493

494

495 decErrorMSBs(A.getBitWidth() - n);

496 A = A.trunc(n);

497 pushBOperation(Trunc, APInt(sizeof(n) * 8, n));

498 }

499 if (n > A.getBitWidth()) {

500

501

502 incErrorMSBs(n - A.getBitWidth());

503 A = A.sext(n);

504 pushBOperation(SExt, APInt(sizeof(n) * 8, n));

505 }

506

507 return *this;

508 }

509

510

511 bool isFirstOrder() const { return V != nullptr; }

512

513

514 bool isCompatibleTo(const Polynomial &o) const {

515

516 if (A.getBitWidth() != o.A.getBitWidth())

517 return false;

518

519

520 if (!isFirstOrder() && !o.isFirstOrder())

521 return true;

522

523

524 if (V != o.V)

525 return false;

526

527

528 if (B.size() != o.B.size())

529 return false;

530

531 auto *ob = o.B.begin();

532 for (const auto &b : B) {

533 if (b != *ob)

534 return false;

535 ob++;

536 }

537

538 return true;

539 }

540

541

542

543 Polynomial operator-(const Polynomial &o) const {

544

545 if (!isCompatibleTo(o))

546 return Polynomial();

547

548

549

550

551 return Polynomial(A - o.A, std::max(ErrorMSBs, o.ErrorMSBs));

552 }

553

554

556 Polynomial Result(*this);

557 Result.A -= C;

558 return Result;

559 }

560

561

563 Polynomial Result(*this);

564 Result.A += C;

565 return Result;

566 }

567

568

569 bool isProvenEqualTo(const Polynomial &o) {

570

571 Polynomial r = *this - o;

572 return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());

573 }

574

575

577 OS << "[{#ErrBits:" << ErrorMSBs << "} ";

578

579 if (V) {

580 for (auto b : B)

581 OS << "(";

582 OS << "(" << *V << ") ";

583

584 for (auto b : B) {

585 switch (b.first) {

586 case LShr:

587 OS << "LShr ";

588 break;

589 case Mul:

590 OS << "Mul ";

591 break;

592 case SExt:

593 OS << "SExt ";

594 break;

595 case Trunc:

596 OS << "Trunc ";

597 break;

598 }

599

600 OS << b.second << ") ";

601 }

602 }

603

604 OS << "+ " << A << "]";

605 }

606

607private:

608 void deleteB() {

609 V = nullptr;

610 B.clear();

611 }

612

613 void pushBOperation(const BOps Op, const APInt &C) {

614 if (isFirstOrder()) {

615 B.push_back(std::make_pair(Op, C));

616 return;

617 }

618 }

619};

620

621#ifndef NDEBUG

623 S.print(OS);

624 return OS;

625}

626#endif

627

628

629

630

631

632

633

634

635struct VectorInfo {

636private:

637 VectorInfo(const VectorInfo &c) : VTy(c.VTy) {

639 "Copying VectorInfo is neither implemented nor necessary,");

640 }

641

642public:

643

644 struct ElementInfo {

645

646 Polynomial Ofs;

647

648

649

651

652 ElementInfo(Polynomial Offset = Polynomial(), LoadInst *LI = nullptr)

653 : Ofs(Offset), LI(LI) {}

654 };

655

656

658

659

660 Value *PV = nullptr;

661

662

663 std::set<LoadInst *> LIs;

664

665

666 std::set<Instruction *> Is;

667

668

670

671

672 ElementInfo *EI;

673

674

676

679 }

680

681 VectorInfo &operator=(const VectorInfo &other) = delete;

682

683 virtual ~VectorInfo() { delete[] EI; }

684

685 unsigned getDimension() const { return VTy->getNumElements(); }

686

687

688

689

690

691

692

693

694 bool isInterleaved(unsigned Factor, const DataLayout &DL) const {

696 for (unsigned i = 1; i < getDimension(); i++) {

697 if (!EI[i].Ofs.isProvenEqualTo(EI[0].Ofs + i * Factor * Size)) {

698 return false;

699 }

700 }

701 return true;

702 }

703

704

705

706

707

708

709

710

711

712 static bool compute(Value *V, VectorInfo &Result, const DataLayout &DL) {

714 if (SVI)

715 return computeFromSVI(SVI, Result, DL);

717 if (LI)

718 return computeFromLI(LI, Result, DL);

720 if (BCI)

721 return computeFromBCI(BCI, Result, DL);

722 return false;

723 }

724

725

726

727

728

729

730

731 static bool computeFromBCI(BitCastInst *BCI, VectorInfo &Result,

734

735 if (Op)

736 return false;

737

739 if (!VTy)

740 return false;

741

742

743 if (Result.VTy->getNumElements() % VTy->getNumElements())

744 return false;

745

746 unsigned Factor = Result.VTy->getNumElements() / VTy->getNumElements();

747 unsigned NewSize = DL.getTypeAllocSize(Result.VTy->getElementType());

748 unsigned OldSize = DL.getTypeAllocSize(VTy->getElementType());

749

750 if (NewSize * Factor != OldSize)

751 return false;

752

753 VectorInfo Old(VTy);

754 if (!compute(Op, Old, DL))

755 return false;

756

757 for (unsigned i = 0; i < Result.VTy->getNumElements(); i += Factor) {

758 for (unsigned j = 0; j < Factor; j++) {

759 Result.EI[i + j] =

760 ElementInfo(Old.EI[i / Factor].Ofs + j * NewSize,

761 j == 0 ? Old.EI[i / Factor].LI : nullptr);

762 }

763 }

764

765 Result.BB = Old.BB;

766 Result.PV = Old.PV;

767 Result.LIs.insert(Old.LIs.begin(), Old.LIs.end());

768 Result.Is.insert(Old.Is.begin(), Old.Is.end());

769 Result.Is.insert(BCI);

770 Result.SVI = nullptr;

771

772 return true;

773 }

774

775

776

777

778

779

780

781

782

783

784

785

786 static bool computeFromSVI(ShuffleVectorInst *SVI, VectorInfo &Result,

790

791

792 VectorInfo LHS(ArgTy);

794 LHS.BB = nullptr;

795

796

797 VectorInfo RHS(ArgTy);

799 RHS.BB = nullptr;

800

801

802 if (LHS.BB && RHS.BB)

803 return false;

804

805 else if (LHS.BB) {

806 Result.BB = RHS.BB;

807 Result.PV = RHS.PV;

808 }

809

810 else if (RHS.BB) {

811 Result.BB = LHS.BB;

812 Result.PV = LHS.PV;

813 }

814

815 else if ((LHS.BB == RHS.BB) && (LHS.PV == RHS.PV)) {

816 Result.BB = LHS.BB;

817 Result.PV = LHS.PV;

818 }

819

820 else {

821 return false;

822 }

823

824

825 if (LHS.BB) {

826 Result.LIs.insert(LHS.LIs.begin(), LHS.LIs.end());

827 Result.Is.insert(LHS.Is.begin(), LHS.Is.end());

828 }

829 if (RHS.BB) {

830 Result.LIs.insert(RHS.LIs.begin(), RHS.LIs.end());

831 Result.Is.insert(RHS.Is.begin(), RHS.Is.end());

832 }

833 Result.Is.insert(SVI);

834 Result.SVI = SVI;

835

836 int j = 0;

839 "Invalid ShuffleVectorInst (index out of bounds)");

840

841 if (i < 0)

842 Result.EI[j] = ElementInfo();

844 if (LHS.BB)

845 Result.EI[j] = LHS.EI[i];

846 else

847 Result.EI[j] = ElementInfo();

848 } else {

849 if (RHS.BB)

851 else

852 Result.EI[j] = ElementInfo();

853 }

854 j++;

855 }

856

857 return true;

858 }

859

860

861

862

863

864

865

866

867

868 static bool computeFromLI(LoadInst *LI, VectorInfo &Result,

872

874 return false;

875

877 return false;

878

879 if (DL.typeSizeEqualsStoreSize(Result.VTy->getElementType()))

880 return false;

881

882

884

886 Result.PV = BasePtr;

887 Result.LIs.insert(LI);

888 Result.Is.insert(LI);

889

890 for (unsigned i = 0; i < Result.getDimension(); i++) {

891 Value *Idx[2] = {

894 };

895 int64_t Ofs = DL.getIndexedOffsetInType(Result.VTy, Idx);

896 Result.EI[i] = ElementInfo(Offset + Ofs, i == 0 ? LI : nullptr);

897 }

898

899 return true;

900 }

901

902

903

904

905

906 static void computePolynomialBinOp(BinaryOperator &BO, Polynomial &Result) {

909

910

914 if (C)

916 }

917

919 case Instruction::Add:

920 if (C)

921 break;

922

923 computePolynomial(*LHS, Result);

924 Result.add(C->getValue());

925 return;

926

927 case Instruction::LShr:

928 if (C)

929 break;

930

931 computePolynomial(*LHS, Result);

932 Result.lshr(C->getValue());

933 return;

934

935 default:

936 break;

937 }

938

939 Result = Polynomial(&BO);

940 }

941

942

943

944

945

946 static void computePolynomial(Value &V, Polynomial &Result) {

948 computePolynomialBinOp(*BO, Result);

949 else

950 Result = Polynomial(&V);

951 }

952

953

954

955

956

957

958

959 static void computePolynomialFromPointer(Value &Ptr, Polynomial &Result,

962

964 if (!PtrTy) {

965 Result = Polynomial();

966 BasePtr = nullptr;

967 return;

968 }

969 unsigned PointerBits =

970 DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());

971

972

976 case Instruction::BitCast:

977 computePolynomialFromPointer(*CI.getOperand(0), Result, BasePtr, DL);

978 break;

979 default:

980 BasePtr = &Ptr;

981 Polynomial(PointerBits, 0);

982 break;

983 }

984 }

985

988

989 APInt BaseOffset(PointerBits, 0);

990

991

992 if (GEP.accumulateConstantOffset(DL, BaseOffset)) {

993 Result = Polynomial(BaseOffset);

994 BasePtr = GEP.getPointerOperand();

995 return;

996 } else {

997

998

999 unsigned idxOperand, e;

1001 for (idxOperand = 1, e = GEP.getNumOperands(); idxOperand < e;

1002 idxOperand++) {

1004 if (!IDX)

1005 break;

1007 }

1008

1009

1010 if (idxOperand + 1 != e) {

1011 Result = Polynomial();

1012 BasePtr = nullptr;

1013 return;

1014 }

1015

1016

1017 computePolynomial(*GEP.getOperand(idxOperand), Result);

1018

1019

1020

1021 BaseOffset =

1022 DL.getIndexedOffsetInType(GEP.getSourceElementType(), Indices);

1023

1024

1025 unsigned ResultSize = DL.getTypeAllocSize(GEP.getResultElementType());

1026 Result.sextOrTrunc(PointerBits);

1027 Result.mul(APInt(PointerBits, ResultSize));

1028 Result.add(BaseOffset);

1029 BasePtr = GEP.getPointerOperand();

1030 }

1031 }

1032

1033

1034 else {

1035 BasePtr = &Ptr;

1036 Polynomial(DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace()), 0);

1037 }

1038 }

1039

1040#ifndef NDEBUG

1042 if (PV)

1043 OS << *PV;

1044 else

1045 OS << "(none)";

1046 OS << " + ";

1047 for (unsigned i = 0; i < getDimension(); i++)

1048 OS << ((i == 0) ? "[" : ", ") << EI[i].Ofs;

1049 OS << "]";

1050 }

1051#endif

1052};

1053

1054}

1055

1056bool InterleavedLoadCombineImpl::findPattern(

1057 std::list &Candidates, std::list &InterleavedLoad,

1059 for (auto C0 = Candidates.begin(), E0 = Candidates.end(); C0 != E0; ++C0) {

1060 unsigned i;

1061

1062 unsigned Size = DL.getTypeAllocSize(C0->VTy->getElementType());

1063

1064

1065 std::vector<std::list::iterator> Res(Factor, Candidates.end());

1066

1067 for (auto C = Candidates.begin(), E = Candidates.end(); C != E; C++) {

1068 if (C->VTy != C0->VTy)

1069 continue;

1070 if (C->BB != C0->BB)

1071 continue;

1072 if (C->PV != C0->PV)

1073 continue;

1074

1075

1076 for (i = 1; i < Factor; i++) {

1077 if (C->EI[0].Ofs.isProvenEqualTo(C0->EI[0].Ofs + i * Size)) {

1078 Res[i] = C;

1079 }

1080 }

1081

1082 for (i = 1; i < Factor; i++) {

1083 if (Res[i] == Candidates.end())

1084 break;

1085 }

1086 if (i == Factor) {

1087 Res[0] = C0;

1088 break;

1089 }

1090 }

1091

1092 if (Res[0] != Candidates.end()) {

1093

1094 for (unsigned i = 0; i < Factor; i++) {

1095 InterleavedLoad.splice(InterleavedLoad.end(), Candidates, Res[i]);

1096 }

1097

1098 return true;

1099 }

1100 }

1101 return false;

1102}

1103

1104LoadInst *

1105InterleavedLoadCombineImpl::findFirstLoad(const std::set<LoadInst *> &LIs) {

1106 assert(!LIs.empty() && "No load instructions given.");

1107

1108

1109 BasicBlock *BB = (*LIs.begin())->getParent();

1111 *BB, [&LIs](Instruction &I) -> bool { return is_contained(LIs, &I); });

1113

1115}

1116

1117bool InterleavedLoadCombineImpl::combine(std::list &InterleavedLoad,

1118 OptimizationRemarkEmitter &ORE) {

1119 LLVM_DEBUG(dbgs() << "Checking interleaved load\n");

1120

1121

1122

1123

1124 LoadInst *InsertionPoint = InterleavedLoad.front().EI[0].LI;

1125

1126

1127 if (!InsertionPoint)

1128 return false;

1129

1130 std::set<LoadInst *> LIs;

1131 std::set<Instruction *> Is;

1132 std::set<Instruction *> SVIs;

1133

1137

1138

1139 unsigned Factor = InterleavedLoad.size();

1140

1141

1142 for (auto &VI : InterleavedLoad) {

1143

1144 LIs.insert(VI.LIs.begin(), VI.LIs.end());

1145

1146

1147

1148

1149 Is.insert(VI.Is.begin(), VI.Is.end());

1150

1151

1152 SVIs.insert(VI.SVI);

1153 }

1154

1155

1156 if (LIs.size() < 2)

1157 return false;

1158

1159

1160

1161

1162 for (const auto &I : Is) {

1163

1165

1166

1167 if (SVIs.find(I) != SVIs.end())

1168 continue;

1169

1170

1171

1172 for (auto *U : I->users()) {

1174 return false;

1175 }

1176 }

1177

1178

1180 return false;

1181

1182

1183

1184 LoadInst *First = findFirstLoad(LIs);

1185

1186

1187

1188

1190 for (auto *LI : LIs) {

1192 if (!MSSA.dominates(MADef, FMA))

1193 return false;

1194 }

1195 assert(!LIs.empty() && "There are no LoadInst to combine");

1196

1197

1198 for (auto &VI : InterleavedLoad) {

1199 if (!DT.dominates(InsertionPoint, VI.SVI))

1200 return false;

1201 }

1202

1203

1204

1206 Type *ETy = InterleavedLoad.front().SVI->getType()->getElementType();

1207 unsigned ElementsPerSVI =

1209 ->getNumElements();

1211

1214 Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),

1216

1218 return false;

1219 }

1220

1221

1223 auto LI = Builder.CreateAlignedLoad(ILTy, Ptr, InsertionPoint->getAlign(),

1224 "interleaved.wide.load");

1225 auto MSSAU = MemorySSAUpdater(&MSSA);

1226 MemoryUse *MSSALoad = cast(MSSAU.createMemoryAccessBefore(

1228 MSSAU.insertUse(MSSALoad, true);

1229

1230

1231 int i = 0;

1232 for (auto &VI : InterleavedLoad) {

1233 SmallVector<int, 4> Mask;

1234 for (unsigned j = 0; j < ElementsPerSVI; j++)

1235 Mask.push_back(i + j * Factor);

1236

1237 Builder.SetInsertPoint(VI.SVI);

1238 auto SVI = Builder.CreateShuffleVector(LI, Mask, "interleaved.shuffle");

1239 VI.SVI->replaceAllUsesWith(SVI);

1240 i++;

1241 }

1242

1243 NumInterleavedLoadCombine++;

1244 ORE.emit([&]() {

1245 return OptimizationRemark(DEBUG_TYPE, "Combined Interleaved Load", LI)

1246 << "Load interleaved combined with factor "

1247 << ore::NV("Factor", Factor);

1248 });

1249

1250 return true;

1251}

1252

1253bool InterleavedLoadCombineImpl::run() {

1254 OptimizationRemarkEmitter ORE(&F);

1255 bool changed = false;

1257

1258 auto &DL = F.getDataLayout();

1259

1260

1261 for (unsigned Factor = MaxFactor; Factor >= 2; Factor--) {

1262 std::list Candidates;

1263

1264 for (BasicBlock &BB : F) {

1265 for (Instruction &I : BB) {

1267

1269 continue;

1270

1272

1273 if (!VectorInfo::computeFromSVI(SVI, Candidates.back(), DL)) {

1274 Candidates.pop_back();

1275 continue;

1276 }

1277

1278 if (!Candidates.back().isInterleaved(Factor, DL)) {

1279 Candidates.pop_back();

1280 }

1281 }

1282 }

1283 }

1284

1285 std::list InterleavedLoad;

1286 while (findPattern(Candidates, InterleavedLoad, Factor, DL)) {

1287 if (combine(InterleavedLoad, ORE)) {

1288 changed = true;

1289 } else {

1290

1291

1292 Candidates.splice(Candidates.begin(), InterleavedLoad,

1293 std::next(InterleavedLoad.begin()),

1294 InterleavedLoad.end());

1295 }

1296 InterleavedLoad.clear();

1297 }

1298 }

1299

1300 return changed;

1301}

1302

1303namespace {

1304

1305

1306struct InterleavedLoadCombine : public FunctionPass {

1307 static char ID;

1308

1309 InterleavedLoadCombine() : FunctionPass(ID) {

1311 }

1312

1313 StringRef getPassName() const override {

1314 return "Interleaved Load Combine Pass";

1315 }

1316

1318 if (DisableInterleavedLoadCombine)

1319 return false;

1320

1321 auto *TPC = getAnalysisIfAvailable();

1322 if (!TPC)

1323 return false;

1324

1325 LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName()

1326 << "\n");

1327

1328 return InterleavedLoadCombineImpl(

1329 F, getAnalysis().getDomTree(),

1330 getAnalysis().getMSSA(),

1331 getAnalysis().getTTI(F),

1332 TPC->getTM())

1333 .run();

1334 }

1335

1336 void getAnalysisUsage(AnalysisUsage &AU) const override {

1338 AU.addRequired();

1339 AU.addRequired();

1340 FunctionPass::getAnalysisUsage(AU);

1341 }

1342

1343private:

1344};

1345}

1346

1347PreservedAnalyses

1349

1353 bool Changed = InterleavedLoadCombineImpl(F, DT, MemSSA, TTI, *TM).run();

1355}

1356

1357char InterleavedLoadCombine::ID = 0;

1358

1361 "Combine interleaved loads into wide loads and shufflevector instructions",

1362 false, false)

1368 "Combine interleaved loads into wide loads and shufflevector instructions",

1370

1373 auto P = new InterleavedLoadCombine();

1374 return P;

1375}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

static bool runOnFunction(Function &F, bool PostInlining)

This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

#define STATISTIC(VARNAME, DESC)

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

Class for arbitrary precision integers.

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

AnalysisUsage & addRequired()

LLVM Basic Block Representation.

InstListType::iterator iterator

Instruction iterators...

BinaryOps getOpcode() const

This class represents a no-op cast from one type to another.

This is the base class for all instructions that perform data casts.

Instruction::CastOps getOpcode() const

Return the opcode of this CastInst.

This is the shared class of boolean and integer constants.

A parsed version of the target data layout string in and methods for querying it.

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

Class to represent fixed width SIMD vectors.

unsigned getNumElements() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

FunctionPass class - This class is used to implement most global optimizations.

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

LLVM_ABI bool isCommutative() const LLVM_READONLY

Return true if the instruction is commutative:

LLVM_ABI bool isAtomic() const LLVM_READONLY

Return true if this instruction has an AtomicOrdering of unordered or higher.

Class to represent integer types.

PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)

Definition InterleavedLoadCombinePass.cpp:1348

An instruction for reading from memory.

unsigned getPointerAddressSpace() const

Returns the address space of the pointer operand.

Value * getPointerOperand()

bool isVolatile() const

Return true if this is a load from a volatile memory location.

Align getAlign() const

Return the alignment of the access that is being performed.

An analysis that produces MemorySSA for a function.

Legacy analysis pass which computes MemorySSA.

Encapsulates MemorySSA, including all data associated with memory accesses.

LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const

Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...

MemoryUseOrDef * getMemoryAccess(const Instruction *I) const

Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.

MemoryAccess * getDefiningAccess() const

Get the access that produces the memory state used by this Use.

static LLVM_ABI PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

static PreservedAnalyses none()

Convenience factory function for the empty preserved set.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

This instruction constructs a fixed permutation of two input vectors.

static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)

Convert the input shuffle mask operand to a vector of integers.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Analysis pass providing the TargetTransformInfo.

virtual unsigned getMaxSupportedInterleaveFactor() const

Get the maximum supported factor for interleaved memory accesses.

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

virtual const TargetLowering * getTargetLowering() const

Wrapper pass for TargetTransformInfo.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

LLVM_ABI InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const

TargetCostKind

The kind of cost model.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

LLVM_ABI InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

Type * getElementType() const

const ParentTy * getParent() const

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

@ BasicBlock

Various leaf nodes.

initializer< Ty > init(const Ty &Val)

DiagnosticInfoOptimizationBase::Argument NV

This is an optimization pass for GlobalISel generic memory operations.

Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI void initializeInterleavedLoadCombinePass(PassRegistry &)

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)

Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

DWARFExpression::Operation Op

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

APInt operator+(APInt a, const APInt &b)

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

AnalysisManager< Function > FunctionAnalysisManager

Convenience typedef for the Function analysis manager.

LLVM_ABI FunctionPass * createInterleavedLoadCombinePass()

InterleavedLoadCombines Pass - This pass identifies interleaved loads and combines them into wide loa...

Definition InterleavedLoadCombinePass.cpp:1372

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.