MLIR: lib/Dialect/Affine/Transforms/LoopFusion.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

14

26 #include "llvm/ADT/DenseMap.h"

27 #include "llvm/ADT/DenseSet.h"

28 #include "llvm/ADT/STLExtras.h"

29 #include "llvm/Support/CommandLine.h"

30 #include "llvm/Support/Debug.h"

31 #include "llvm/Support/raw_ostream.h"

32 #include

33 #include

34 #include

35

36 namespace mlir {

37 namespace affine {

38 #define GEN_PASS_DEF_AFFINELOOPFUSION

39 #include "mlir/Dialect/Affine/Passes.h.inc"

40 }

41 }

42

43 #define DEBUG_TYPE "affine-fusion"

44

45 using namespace mlir;

47

48 namespace {

49

50

51

52

53

54 struct LoopFusion : public affine::impl::AffineLoopFusionBase {

55 LoopFusion() = default;

56 LoopFusion(unsigned fastMemorySpace, uint64_t localBufSizeThresholdBytes,

57 bool maximalFusion, enum FusionMode affineFusionMode) {

58 this->fastMemorySpace = fastMemorySpace;

59 this->localBufSizeThreshold = localBufSizeThresholdBytes / 1024;

60 this->maximalFusion = maximalFusion;

61 this->affineFusionMode = affineFusionMode;

62 }

63

64 void runOnBlock(Block *block);

65 void runOnOperation() override;

66 };

67

68 }

69

70

71

72

73

74

75

76

81

83 bool hasOutDepsAfterFusion = false;

84

85 for (auto &outEdge : mdg.outEdges.lookup(srcId)) {

87

88 if (depNodeOp == dstNodeOp)

89 continue;

90

91

92

94 return false;

95

96

97

98 if (fusedLoopInsPoint != depNodeOp &&

100 LLVM_DEBUG(llvm::dbgs() << "Src loop can't be removed: dst loop doesn't "

101 "dominate dependence\n");

102 return false;

103 }

104

105 hasOutDepsAfterFusion = true;

106 }

107

108

109

110

111 if (hasOutDepsAfterFusion || !escapingMemRefs.empty()) {

112 std::optional isMaximal = fusionSlice.isMaximal();

113 if (!isMaximal) {

114 LLVM_DEBUG(llvm::dbgs() << "Src loop can't be removed: can't determine "

115 "if fusion is maximal\n");

116 return false;

117 }

118

119 if (!*isMaximal) {

120 LLVM_DEBUG(llvm::dbgs()

121 << "Src loop can't be removed: fusion is not maximal\n");

122 return false;

123 }

124 }

125

126 return true;

127 }

128

129

130

131

132

133

134

135

139

140 if (mdg.inEdges.count(dstId) == 0)

141 return;

142

143

144 auto *dstNode = mdg.getNode(dstId);

146 for (Operation *load : dstNode->loads)

147 consumedMemrefs.insert(cast(load).getMemRef());

148

149

150

151 for (const auto &srcEdge : mdg.inEdges.lookup(dstId)) {

152 const auto *srcNode = mdg.getNode(srcEdge.id);

153

154 if (!isa(srcNode->op))

155 continue;

156

157 if (any_of(srcNode->stores, [&](Operation *op) {

158 auto storeOp = cast(op);

159 return consumedMemrefs.count(storeOp.getMemRef()) > 0;

160 }))

161 srcIdCandidates.push_back(srcNode->id);

162 }

163

164 llvm::sort(srcIdCandidates);

165 srcIdCandidates.erase(llvm::unique(srcIdCandidates), srcIdCandidates.end());

166 }

167

168

169

170 static void

174 auto *dstNode = mdg.getNode(dstId);

175 auto *srcNode = mdg.getNode(srcId);

177 producerConsumerMemrefs);

178 }

179

180

181

182

183

184

185

186

189

190 if (!defOp)

191 return true;

192

193

194 if (auto viewOp = dyn_castmlir::ViewLikeOpInterface(defOp))

196 return true;

197

198

199 if (!hasSingleEffectmlir::MemoryEffects::Allocate(defOp, memref))

200 return true;

201

202

203

205

206 Operation *ancestorOp = block->getParent()->findAncestorOpInRegion(*user);

207 if (!ancestorOp)

208 return true;

209 if (ancestorOp->getBlock() != block)

210 return false;

211 return !isa(*user);

212 });

213 }

214

215

216

219 auto *node = mdg.getNode(id);

220 for (Operation *storeOp : node->stores) {

221 auto memref = cast(storeOp).getMemRef();

222 if (escapingMemRefs.count(memref))

223 continue;

225 escapingMemRefs.insert(memref);

226 }

227 }

228

229

230

231

232

233

235 assert(isa(node->op));

237 node->op = newRootForOp;

238 }

239

240

241

242

243

247 assert(!producerStores.empty() && "expected producer store");

248

249

250

251

252

253 Block *commonBlock = nullptr;

254

255 for (Operation *store : producerStores) {

257 !commonBlock ? &*sliceInsertionBlock->begin() : &*commonBlock->begin();

259 }

260 assert(commonBlock &&

261 "common block of producer stores and slice should exist");

262

263

264

265 Operation *firstAncestor = nullptr;

266 for (Operation *store : producerStores) {

268 assert(ancestor && "producer store should be contained in common block");

269 firstAncestor = !firstAncestor || ancestor->isBeforeInBlock(firstAncestor)

270 ? ancestor

271 : firstAncestor;

272 }

273 return firstAncestor;

274 }

275

276

277

278

279

281 AffineForOp srcForOp, AffineForOp dstForOp, unsigned depth,

283 int64_t &fusedLoopNestComputeCost) {

284 LLVM_DEBUG(llvm::dbgs() << "Determining additional compute fraction...\n";);

285

286

289 LLVM_DEBUG(llvm::dbgs() << "Failed to get source loop nest stats.\n");

290 return std::nullopt;

291 }

292

293

296 LLVM_DEBUG(llvm::dbgs() << "Failed to get destination loop nest stats.\n");

297 return std::nullopt;

298 }

299

300

301 uint64_t srcLoopNestCost = getComputeCost(srcForOp, srcLoopNestStats);

302

303

304 uint64_t dstLoopNestCost = getComputeCost(dstForOp, dstLoopNestStats);

305

307

309 LLVM_DEBUG(llvm::dbgs() << "Slice wasn't computed.\n");

310 return std::nullopt;

311 }

312

314 dstLoopNestStats, slice,

315 &fusedLoopNestComputeCost)) {

316 LLVM_DEBUG(llvm::dbgs() << "Unable to compute fusion compute cost\n");

317 return std::nullopt;

318 }

319

320 double additionalComputeFraction =

321 fusedLoopNestComputeCost /

322 (static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -

323 1;

324

325 return additionalComputeFraction;

326 }

327

328

329

330

331

332

333

336 unsigned dstLoopDepth,

337 std::optional fastMemorySpace,

338 Block *sliceInsertionBlock,

339 uint64_t localBufSizeThreshold) {

340 assert(!storeOps.empty() && "no source stores supplied");

341

342

343

344

345

346 if (storeOps.size() > 1 &&

347 !std::equal(std::next(storeOps.begin()), storeOps.end(), storeOps.begin(),

349 MemRefAccess aM(cast(a));

350 MemRefAccess bM(cast(b));

351 return aM == bM;

352 })) {

353 LLVM_DEBUG(llvm::dbgs()

354 << "Private memref creation unsupported for multiple producer "

355 "stores with different access functions.\n");

356 return nullptr;

357 }

358

359 Operation *srcStoreOp = storeOps[0];

360

361

363

364 OpBuilder top(forOp->getParentRegion());

365

366 auto oldMemRef = cast(srcStoreOp).getMemRef();

367 auto oldMemRefType = cast(oldMemRef.getType());

368 unsigned rank = oldMemRefType.getRank();

369

370

372 bool validRegion = succeeded(

373 region.compute(srcStoreOp, dstLoopDepth, nullptr,

374 true, false));

375

376 (void)validRegion;

377 assert(validRegion && "unexpected memref region failure");

380 lbs.reserve(rank);

381

382

383 std::optional<int64_t> numElements =

385 assert(numElements && "non-constant number of elts in local buffer");

386

388

389

390

393

394

396 offsets.reserve(rank);

397

398

399

401 for (unsigned j = 0, e = lbs[0].getNumSymbols(); j < e; ++j)

403 for (unsigned d = 0; d < rank; ++d) {

404 assert(lbs[d].getNumResults() == 1 &&

405 "invalid private memref bound calculation");

406 offsets.push_back(lbs[d].getResult(0).replaceSymbols(replacements));

407 }

408

409

410

412 assert(eltSize && "memrefs with size elt types expected");

413 uint64_t bufSize = *eltSize * *numElements;

415 if (bufSize <= localBufSizeThreshold && fastMemorySpace.has_value()) {

416 newMemSpace = b.getI64IntegerAttr(*fastMemorySpace);

417 } else {

418 newMemSpace = oldMemRefType.getMemorySpace();

419 }

420 auto newMemRefType = MemRefType::get(newShape, oldMemRefType.getElementType(),

421 AffineMap(), newMemSpace);

422

423

424

425

426

427

428

429 Value newMemRef = top.creatememref::AllocOp(forOp.getLoc(), newMemRefType);

430

431

433 remapExprs.reserve(rank);

434 for (unsigned i = 0; i < rank; i++) {

435 auto dimExpr = b.getAffineDimExpr(outerIVs.size() + i);

436

437 auto remapExpr =

439 remapExprs.push_back(remapExpr);

440 }

441

442 auto indexRemap =

443 AffineMap::get(outerIVs.size() + rank, 0, remapExprs, forOp.getContext());

444

445

449 oldMemRef, newMemRef, {}, indexRemap,

450 outerIVs,

451 {}, domFilter);

452 assert(succeeded(res) &&

453 "replaceAllMemrefUsesWith should always succeed here");

454 (void)res;

455 LLVM_DEBUG(llvm::dbgs() << "Created private memref of type: " << newMemRefType

456 << '\n');

457 return newMemRef;

458 }

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

500 AffineForOp dstForOp,

502 unsigned maxLegalFusionDepth,

503 unsigned *dstLoopDepth,

504 double computeToleranceThreshold) {

505 LLVM_DEBUG({

506 llvm::dbgs()

507 << "Checking whether fusion is profitable between source nest:\n";

508 llvm::dbgs() << ' ' << srcForOp << " and destination nest:\n";

509 llvm::dbgs() << dstForOp << "\n";

510 });

511

512 if (maxLegalFusionDepth == 0) {

513 LLVM_DEBUG(llvm::dbgs() << "Can't fuse: maxLegalFusionDepth is 0\n");

514 return false;

515 }

516

517

518

519

522 return false;

523

524

527 return false;

528

529

530

531

532

533

534

535

536 if (producerStores.size() > 1) {

537 LLVM_DEBUG(llvm::dbgs() << "Limited profitability analysis. Not "

538 "supported for multiple producer store case.\n");

539 int64_t sliceCost;

540 int64_t fusedLoopNestComputeCost;

541

542

544 srcForOp, dstForOp, maxLegalFusionDepth, depthSliceUnions, sliceCost,

545 fusedLoopNestComputeCost);

546 if (!fraction || fraction > computeToleranceThreshold) {

547 LLVM_DEBUG(llvm::dbgs() << "Additional computation exceeds "

548 "compute tolerance. Not fusing.\n");

549 return false;

550 }

551 LLVM_DEBUG(llvm::dbgs()

552 << "Considering fusion profitable at max legal depth.\n");

553 return true;

554 }

555

556 Operation *srcStoreOp = producerStores.front();

557

558

559

560

561

562

563

565 double maxStorageReduction = 0.0;

566 std::optional<uint64_t> sliceMemEstimate;

567

568

569 std::optional bestDstLoopDepth;

570

571

573 if (failed(srcWriteRegion.compute(srcStoreOp, 0))) {

574 LLVM_DEBUG(llvm::dbgs()

575 << "Unable to compute MemRefRegion for source operation\n");

576 return false;

577 }

578

579 std::optional<int64_t> maybeSrcWriteRegionSizeBytes =

581 if (!maybeSrcWriteRegionSizeBytes.has_value())

582 return false;

583 int64_t srcWriteRegionSizeBytes = *maybeSrcWriteRegionSizeBytes;

584

585

586 uint64_t srcLoopNestCost = getComputeCost(srcForOp, srcLoopNestStats);

587

588

589 uint64_t dstLoopNestCost = getComputeCost(dstForOp, dstLoopNestStats);

590

591

592

593 for (unsigned i = maxLegalFusionDepth; i >= 1; --i) {

595

597 continue;

598

599

600

601 int64_t sliceCost;

602

603 int64_t fusedLoopNestComputeCost;

604

605 auto mayAdditionalComputeFraction =

607 sliceCost, fusedLoopNestComputeCost);

608 if (!mayAdditionalComputeFraction) {

609 LLVM_DEBUG(llvm::dbgs()

610 << "Can't determine additional compute fraction.\n");

611 continue;

612 }

613 double additionalComputeFraction = *mayAdditionalComputeFraction;

614

615

616

617

619 if (failed(sliceWriteRegion.compute(srcStoreOp, 0, &slice))) {

620 LLVM_DEBUG(llvm::dbgs()

621 << "Failed to compute slice write region at loopDepth: " << i

622 << "\n");

623 continue;

624 }

625

626 std::optional<int64_t> maybeSliceWriteRegionSizeBytes =

628 if (!maybeSliceWriteRegionSizeBytes.has_value() ||

629 *maybeSliceWriteRegionSizeBytes == 0) {

630 LLVM_DEBUG(llvm::dbgs()

631 << "Failed to get slice write region size at loopDepth: " << i

632 << "\n");

633 continue;

634 }

635 int64_t sliceWriteRegionSizeBytes = *maybeSliceWriteRegionSizeBytes;

636

637 double storageReduction = static_cast<double>(srcWriteRegionSizeBytes) /

638 static_cast<double>(sliceWriteRegionSizeBytes);

639

640 LLVM_DEBUG({

641 std::stringstream msg;

642 msg << " evaluating fusion profitability at depth : " << i << "\n"

643 << std::fixed << std::setprecision(2)

644 << " additional compute fraction: "

645 << 100.0 * additionalComputeFraction << "%\n"

646 << " storage reduction factor: " << storageReduction << "x\n"

647 << " fused nest cost: " << fusedLoopNestComputeCost << "\n"

648 << " src write region size: " << srcWriteRegionSizeBytes << "\n"

649 << " slice write region size: " << sliceWriteRegionSizeBytes

650 << "\n";

651 llvm::dbgs() << msg.str();

652 });

653

654

655

656

657

658 if ((storageReduction > maxStorageReduction) &&

659 (additionalComputeFraction <= computeToleranceThreshold)) {

660 maxStorageReduction = storageReduction;

661 bestDstLoopDepth = i;

662 minFusedLoopNestComputeCost = fusedLoopNestComputeCost;

663 sliceMemEstimate = sliceWriteRegionSizeBytes;

664 }

665 }

666

667

668

669 if (!bestDstLoopDepth) {

670 LLVM_DEBUG(

671 llvm::dbgs()

672 << "All fusion choices involve more than the threshold amount of "

673 "redundant computation; NOT fusing.\n");

674 return false;

675 }

676

677 if (!bestDstLoopDepth) {

678 LLVM_DEBUG(llvm::dbgs() << "no fusion depth could be evaluated.\n");

679 return false;

680 }

681

682

683 *dstLoopDepth = *bestDstLoopDepth;

684

685 LLVM_DEBUG(

686 llvm::dbgs() << " LoopFusion fusion stats:"

687 << "\n best loop depth: " << bestDstLoopDepth

688 << "\n src loop nest compute cost: " << srcLoopNestCost

689 << "\n dst loop nest compute cost: " << dstLoopNestCost

690 << "\n fused loop nest compute cost: "

691 << minFusedLoopNestComputeCost << "\n");

692

695

696 std::optional storageReduction;

697

698 if (!dstMemSize || !srcMemSize) {

699 LLVM_DEBUG(llvm::dbgs()

700 << " fusion memory benefit cannot be evaluated; NOT fusing.\n");

701 return false;

702 }

703

704 auto srcMemSizeVal = *srcMemSize;

705 auto dstMemSizeVal = *dstMemSize;

706

707 assert(sliceMemEstimate && "expected value");

708 auto fusedMem = dstMemSizeVal + *sliceMemEstimate;

709

710 LLVM_DEBUG(llvm::dbgs() << " src mem: " << srcMemSizeVal << "\n"

711 << " dst mem: " << dstMemSizeVal << "\n"

712 << " fused mem: " << fusedMem << "\n"

713 << " slice mem: " << sliceMemEstimate << "\n");

714

715 if (static_cast<long>(fusedMem) > srcMemSizeVal + dstMemSizeVal) {

716 LLVM_DEBUG(llvm::dbgs() << "Fusion is not profitable; NOT fusing.\n");

717 return false;

718 }

719 storageReduction =

720 100.0 *

721 (1.0 - fusedMem / (static_cast<double>(srcMemSizeVal) + dstMemSizeVal));

722

723 double additionalComputeFraction =

724 100.0 * (minFusedLoopNestComputeCost /

725 (static_cast<double>(srcLoopNestCost) + dstLoopNestCost) -

726 1);

727 (void)additionalComputeFraction;

728 LLVM_DEBUG({

729 std::stringstream msg;

730 msg << " fusion is most profitable at depth " << *dstLoopDepth << " with "

731 << std::setprecision(2) << additionalComputeFraction

732 << "% redundant computation and a ";

733 msg << (storageReduction ? std::to_string(*storageReduction) : "");

734 msg << "% storage reduction.\n";

735 llvm::dbgs() << msg.str();

736 });

737

738 return true;

739 }

740

741 namespace {

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789 struct GreedyFusion {

790 public:

791

793

795

796 unsigned localBufSizeThreshold;

797

798 std::optional fastMemorySpace;

799

800

801 bool maximalFusion;

802

803

804 double computeToleranceThreshold;

805

807

809 std::optional fastMemorySpace, bool maximalFusion,

810 double computeToleranceThreshold)

811 : mdg(mdg), localBufSizeThreshold(localBufSizeThreshold),

812 fastMemorySpace(fastMemorySpace), maximalFusion(maximalFusion),

813 computeToleranceThreshold(computeToleranceThreshold) {}

814

815

816 void init() {

817

818

819 worklist.clear();

820 for (auto &idAndNode : mdg->nodes) {

821 const Node &node = idAndNode.second;

822 worklist.push_back(node.id);

823 }

824 }

825

826 void runSiblingFusionOnly() {

827 fuseSiblingNodes();

828 eraseUnusedMemRefAllocations();

829 }

830

831

832 void runProducerConsumerFusionOnly() {

833 fuseProducerConsumerNodes(

835 eraseUnusedMemRefAllocations();

836 }

837

838

839

840

841

842

843 void runGreedyFusion() {

844

845 fuseProducerConsumerNodes(1);

846 fuseSiblingNodes();

847 fuseProducerConsumerNodes(

849 eraseUnusedMemRefAllocations();

850 }

851

852

853

854 bool canCreatePrivateMemRef(Value memref,

856 unsigned producerId, unsigned consumerId,

857 bool removeSrcNode) {

858

860 return false;

861 const Node *consumerNode = mdg->getNode(consumerId);

862

863

864

865

866

867

868

869 if (srcEscapingMemRefs.count(memref) > 0 &&

870 (removeSrcNode || consumerNode->getStoreOpCount(memref) > 0))

871 return false;

872

873

874

877 return false;

878

879

880

881

882 if (removeSrcNode &&

883 any_of(mdg->outEdges[producerId], [&](const auto &edge) {

884 return edge.value == memref && edge.id != consumerId;

885 }))

886 return false;

887

888 return true;

889 }

890

891

892

893

894 void performFusionsIntoDest(unsigned dstId, unsigned maxSrcUserCount) {

895 LLVM_DEBUG(llvm::dbgs() << "Evaluating dst loop " << dstId << "\n");

896

897 if (mdg->nodes.count(dstId) == 0)

898 return;

899

900 auto *dstNode = mdg->getNode(dstId);

901

902 if (!isa(dstNode->op))

903 return;

904

905

906 if (dstNode->op->getNumResults() > 0)

907 return;

908

909 LLVM_DEBUG(llvm::dbgs() << "Evaluating dst loop " << dstId << "\n");

910

911

912

913

914

916 auto dstAffineForOp = cast(dstNode->op);

917

918

919

920 bool dstNodeChanged;

921 do {

922

923

924

925

926

927 dstNodeChanged = false;

930

931 for (unsigned srcId : llvm::reverse(srcIdCandidates)) {

932

933 auto *srcNode = mdg->getNode(srcId);

934 auto srcAffineForOp = cast(srcNode->op);

935

936 LLVM_DEBUG(llvm::dbgs()

937 << "Trying to fuse producer loop nest " << srcId

938 << " with consumer loop nest " << dstId << "\n");

939 LLVM_DEBUG(llvm::dbgs() << "Compute tolerance threshold: "

940 << computeToleranceThreshold << '\n');

941 LLVM_DEBUG(llvm::dbgs()

942 << "Producer loop nest:\n"

943 << *srcNode->op << "\n and consumer loop nest:\n"

944 << *dstNode->op << '\n');

945

946 LLVM_DEBUG(llvm::dbgs() << "Evaluating src loop " << srcId

947 << " for dst loop " << dstId << "\n");

948

949

950

951 if (isa(srcNode->op) && srcNode->op->getNumResults() > 0)

952 continue;

953

956 producerConsumerMemrefs);

957

958

959

960 if (any_of(producerConsumerMemrefs, [&](Value memref) {

962 maxSrcUserCount;

963 }))

964 continue;

965

966

967

968

971

972

973

976 if (fusedLoopInsPoint == nullptr)

977 continue;

978

979

980

981

982

983

984

987 unsigned numSurroundingLoops = surroundingLoops.size();

988

989

990

992 for (Operation *op : dstNode->loads)

993 if (producerConsumerMemrefs.count(

994 cast(op).getMemRef()) > 0)

995 dstMemrefOps.push_back(op);

996 for (Operation *op : dstNode->stores)

997 if (producerConsumerMemrefs.count(

998 cast(op).getMemRef()))

999 dstMemrefOps.push_back(op);

1000 unsigned dstLoopDepthTest =

1002

1003

1004

1005 unsigned maxLegalFusionDepth = 0;

1007 depthSliceUnions.resize(dstLoopDepthTest);

1009 for (unsigned i = 1; i <= dstLoopDepthTest; ++i) {

1012 i + numSurroundingLoops,

1013 &depthSliceUnions[i - 1], strategy);

1015 maxLegalFusionDepth = i;

1016 LLVM_DEBUG(llvm::dbgs()

1017 << "Found valid slice for depth: " << i << '\n');

1018 }

1019 }

1020

1021 if (maxLegalFusionDepth == 0) {

1022 LLVM_DEBUG(llvm::dbgs()

1023 << "Can't fuse: fusion is not legal at any depth\n");

1024 continue;

1025 }

1026

1027 LLVM_DEBUG(llvm::dbgs() << "Max legal depth for fusion: "

1028 << maxLegalFusionDepth << '\n');

1029

1030 double computeToleranceThresholdToUse = computeToleranceThreshold;

1031

1032

1033

1034

1035

1036

1038 LLVM_DEBUG(llvm::dbgs() << "Source nest has a cyclic dependence.\n");

1039

1040

1041

1042 if (maximalFusion) {

1043 auto srcForOp = cast(srcNode->op);

1044 auto dstForOp = cast(dstNode->op);

1045 int64_t sliceCost;

1046 int64_t fusedLoopNestComputeCost;

1048 srcForOp, dstForOp, maxLegalFusionDepth, depthSliceUnions,

1049 sliceCost, fusedLoopNestComputeCost);

1050 if (!fraction || fraction > 0) {

1051 LLVM_DEBUG(

1052 llvm::dbgs()

1053 << "Can't perform maximal fusion with a cyclic dependence "

1054 "and non-zero additional compute.\n");

1055 return;

1056 }

1057 } else {

1058

1059

1060 LLVM_DEBUG(llvm::dbgs()

1061 << "Setting compute tolerance to zero since "

1062 "source has a cylic dependence.\n");

1063 computeToleranceThresholdToUse = 0;

1064 }

1065 }

1066

1067

1068

1069

1070 unsigned bestDstLoopDepth = maxLegalFusionDepth;

1071 if (!maximalFusion) {

1072

1074 for (Operation *op : srcNode->stores)

1075 if (producerConsumerMemrefs.count(

1076 cast(op).getMemRef()))

1077 producerStores.push_back(op);

1078

1079 assert(!producerStores.empty() && "Expected producer store");

1081 dstAffineForOp, depthSliceUnions,

1082 maxLegalFusionDepth, &bestDstLoopDepth,

1083 computeToleranceThresholdToUse)) {

1084 continue;

1085 }

1086 }

1087

1088 assert(bestDstLoopDepth > 0 && "Unexpected loop fusion depth");

1090 depthSliceUnions[bestDstLoopDepth - 1];

1091 assert(!bestSlice.isEmpty() && "Missing slice union for depth");

1092

1093

1094

1095

1097 srcId, dstId, bestSlice, fusedLoopInsPoint, srcEscapingMemRefs,

1098 *mdg);

1099

1101 for (Value memref : producerConsumerMemrefs) {

1102 if (canCreatePrivateMemRef(memref, srcEscapingMemRefs, srcId, dstId,

1103 removeSrcNode)) {

1104

1105 LLVM_DEBUG(llvm::dbgs()

1106 << "Creating private memref for " << memref << '\n');

1107

1108 privateMemrefs.insert(memref);

1109 }

1110 }

1111

1112

1113 fuseLoops(srcAffineForOp, dstAffineForOp, bestSlice);

1114 dstNodeChanged = true;

1115

1116 LLVM_DEBUG(llvm::dbgs()

1117 << "Fused src loop " << srcId << " into dst loop " << dstId

1118 << " at depth " << bestDstLoopDepth << ":\n"

1119 << dstAffineForOp << "\n");

1120

1121

1122 if (fusedLoopInsPoint != dstAffineForOp)

1123 dstAffineForOp->moveBefore(fusedLoopInsPoint);

1124

1125

1126 mdg->updateEdges(srcNode->id, dstNode->id, privateMemrefs,

1127 removeSrcNode);

1128

1129

1130 if (!privateMemrefs.empty()) {

1131

1132

1133 Block *sliceInsertionBlock = bestSlice.insertPoint->getBlock();

1134

1135

1137 dstAffineForOp.walk([&](AffineWriteOpInterface storeOp) {

1138 Value storeMemRef = storeOp.getMemRef();

1139 if (privateMemrefs.count(storeMemRef) > 0)

1140 privateMemRefToStores[storeMemRef].push_back(storeOp);

1141 });

1142

1143

1144

1145

1146

1147 for (auto &memrefToStoresPair : privateMemRefToStores) {

1150 dstAffineForOp, storesForMemref, bestDstLoopDepth,

1151 fastMemorySpace, sliceInsertionBlock, localBufSizeThreshold);

1152 if (!newMemRef)

1153 continue;

1154

1156

1157 mdg->addEdge(newMemRefNodeId, dstId, newMemRef);

1158 }

1159

1160

1161

1162 dstNode = mdg->getNode(dstId);

1163 }

1164

1165

1167 dstLoopCollector.collect(dstAffineForOp);

1168

1169

1175

1176 if (removeSrcNode) {

1177 LLVM_DEBUG(llvm::dbgs()

1178 << "Removing src loop " << srcId << " after fusion\n");

1179

1180 srcAffineForOp.erase();

1182 srcNode = nullptr;

1183 }

1184 }

1185 } while (dstNodeChanged);

1186 }

1187

1188

1189

1190

1191

1192 void fuseProducerConsumerNodes(unsigned maxSrcUserCount) {

1193 LLVM_DEBUG(llvm::dbgs() << "--- Producer/Consumer Fusion ---\n");

1194 init();

1195 while (!worklist.empty()) {

1196 unsigned dstId = worklist.back();

1197 worklist.pop_back();

1198 performFusionsIntoDest(dstId, maxSrcUserCount);

1199 }

1200 }

1201

1202

1203

1204 void fuseSiblingNodes() {

1205 LLVM_DEBUG(llvm::dbgs() << "--- Sibling Fusion ---\n");

1206 init();

1207 while (!worklist.empty()) {

1208 unsigned dstId = worklist.back();

1209 worklist.pop_back();

1210

1211

1212 if (mdg->nodes.count(dstId) == 0)

1213 continue;

1214

1215 auto *dstNode = mdg->getNode(dstId);

1216

1217 if (!isa(dstNode->op))

1218 continue;

1219

1220 fuseWithSiblingNodes(dstNode);

1221 }

1222 }

1223

1224

1225 void fuseWithSiblingNodes(Node *dstNode) {

1227 std::pair<unsigned, Value> idAndMemref;

1228 auto dstAffineForOp = cast(dstNode->op);

1229

1230 while (findSiblingNodeToFuse(dstNode, &visitedSibNodeIds, &idAndMemref)) {

1231 unsigned sibId = idAndMemref.first;

1232 Value memref = idAndMemref.second;

1233

1234

1235 auto *sibNode = mdg->getNode(sibId);

1236

1237

1238 assert(sibNode->op->getBlock() == dstNode->op->getBlock());

1243 if (insertPointInst == nullptr)

1244 continue;

1245

1246

1247

1248

1250 sibNode->getLoadOpsForMemref(memref, &sibLoadOpInsts);

1251

1252 Operation *sibLoadOpInst = llvm::getSingleElement(sibLoadOpInsts);

1253

1254

1256 dstNode->getLoadOpsForMemref(memref, &dstLoadOpInsts);

1257

1258

1259

1260

1261

1262

1263

1266 unsigned numSurroundingLoops = surroundingLoops.size();

1269 unsigned dstLoopDepthTest = dstLoopIVs.size() - numSurroundingLoops;

1270 auto sibAffineForOp = cast(sibNode->op);

1271

1272

1274 depthSliceUnions.resize(dstLoopDepthTest);

1275 unsigned maxLegalFusionDepth = 0;

1277 for (unsigned i = 1; i <= dstLoopDepthTest; ++i) {

1280 i + numSurroundingLoops,

1281 &depthSliceUnions[i - 1], strategy);

1282

1284 maxLegalFusionDepth = i;

1285 }

1286

1287 LLVM_DEBUG(llvm::dbgs() << "Max legal depth for fusion: "

1288 << maxLegalFusionDepth << '\n');

1289

1290

1291 if (maxLegalFusionDepth == 0)

1292 continue;

1293

1294 double computeToleranceThresholdToUse = computeToleranceThreshold;

1295

1296

1297

1298

1299

1300

1302 LLVM_DEBUG(llvm::dbgs() << "Source nest has a cyclic dependence.\n");

1303

1304

1305

1306 if (maximalFusion) {

1307 auto dstForOp = cast(dstNode->op);

1308 int64_t sliceCost;

1309 int64_t fusedLoopNestComputeCost;

1311 sibAffineForOp, dstForOp, maxLegalFusionDepth, depthSliceUnions,

1312 sliceCost, fusedLoopNestComputeCost);

1313 if (!fraction || fraction > 0) {

1314 LLVM_DEBUG(

1315 llvm::dbgs()

1316 << "Can't perform maximal fusion with a cyclic dependence "

1317 "and non-zero additional compute.\n");

1318 return;

1319 }

1320 } else {

1321

1322

1323 LLVM_DEBUG(llvm::dbgs() << "Setting compute tolerance to zero since "

1324 "source has a cyclic dependence.\n");

1325 computeToleranceThresholdToUse = 0.0;

1326 }

1327 }

1328

1329 unsigned bestDstLoopDepth = maxLegalFusionDepth;

1330 if (!maximalFusion) {

1331

1332

1333

1334

1335 if (isFusionProfitable(sibAffineForOp, sibLoadOpInst, dstAffineForOp,

1336 depthSliceUnions, maxLegalFusionDepth,

1337 &bestDstLoopDepth,

1338 computeToleranceThresholdToUse))

1339 continue;

1340 }

1341

1342 assert(bestDstLoopDepth > 0 && "Unexpected loop fusion depth");

1343

1345 depthSliceUnions[bestDstLoopDepth - 1];

1346 assert(!bestSlice.isEmpty() &&

1347 "Fusion depth has no computed slice union");

1348

1349

1350

1351

1352 auto isMaximal = bestSlice.isMaximal();

1353 if (!isMaximal.value_or(false)) {

1354 LLVM_DEBUG(llvm::dbgs()

1355 << "Slice isn't maximal; not performing sibling fusion.\n");

1356 continue;

1357 }

1358

1359

1360

1361

1362 bool isInnermostInsertion = (bestDstLoopDepth == dstLoopDepthTest);

1363

1365 isInnermostInsertion);

1366

1367 auto dstForInst = cast(dstNode->op);

1368

1369 if (insertPointInst != dstForInst)

1370 dstForInst->moveBefore(insertPointInst);

1371

1372 LLVM_DEBUG(llvm::dbgs()

1373 << "Fused sibling nest " << sibId << " into destination nest "

1374 << dstNode->id << " at depth " << bestDstLoopDepth << ":\n"

1375 << dstAffineForOp << "\n");

1376

1377

1378 updateStateAfterSiblingFusion(sibNode, dstNode);

1379

1380

1381

1385 }

1386 }

1387

1388

1389

1390

1391

1392 bool findSiblingNodeToFuse(Node *dstNode,

1394 std::pair<unsigned, Value> *idAndMemrefToFuse) {

1395

1396

1397 auto canFuseWithSibNode = [&](Node *sibNode, Value memref) {

1398

1399

1400 if (sibNode->getLoadOpCount(memref) != 1)

1401 return false;

1402

1403

1406 return false;

1407

1408

1410 sibNode->getLoadAndStoreMemrefSet(&loadAndStoreMemrefSet);

1411 if (llvm::any_of(loadAndStoreMemrefSet, [=](Value memref) {

1413 }))

1414 return false;

1415

1416

1418 for (auto *storeOpInst : sibNode->stores) {

1419 storeMemrefs.insert(

1420 cast(storeOpInst).getMemRef());

1421 }

1422 return storeMemrefs.size() <= 1;

1423 };

1424

1425

1426 Block *block = dstNode->op->getBlock();

1427 for (unsigned i = 0, e = block->getNumArguments(); i != e; ++i) {

1429 auto loadOp = dyn_cast(user);

1430 if (!loadOp)

1431 continue;

1432

1435

1436

1437

1438 auto *it = llvm::find_if(loops, [&](AffineForOp loop) {

1439 return loop->getBlock() == &mdg->block;

1440 });

1441

1442 if (it == loops.end())

1443 continue;

1445 assert(sibNode != nullptr);

1446

1447 if (sibNode->id == dstNode->id)

1448 continue;

1449

1450 if (visitedSibNodeIds->count(sibNode->id) > 0)

1451 continue;

1452

1453 auto memref = loadOp.getMemRef();

1454 if (dstNode->getLoadOpCount(memref) == 0)

1455 continue;

1456

1457 if (canFuseWithSibNode(sibNode, memref)) {

1458 visitedSibNodeIds->insert(sibNode->id);

1459 idAndMemrefToFuse->first = sibNode->id;

1460 idAndMemrefToFuse->second = memref;

1461 return true;

1462 }

1463 }

1464 }

1465

1466

1467

1471

1472 if (dstNode->getLoadOpCount(inEdge.value) > 0 &&

1473 mdg->getNode(inEdge.id)->getStoreOpCount(inEdge.value) > 0)

1474 inEdges.push_back(inEdge);

1475 });

1476

1477

1478

1479 for (auto &inEdge : inEdges) {

1480

1484 unsigned sibNodeId = outEdge.id;

1485 if (visitedSibNodeIds->count(sibNodeId) > 0)

1486 return;

1487

1488 if (outEdge.id == dstNode->id || outEdge.value != inEdge.value)

1489 return;

1490 auto *sibNode = mdg->getNode(sibNodeId);

1491 if (!isa(sibNode->op))

1492 return;

1493

1494 if (canFuseWithSibNode(sibNode, outEdge.value)) {

1495

1496 outEdges.push_back(outEdge);

1497 }

1498 });

1499

1500

1501 if (!outEdges.empty()) {

1502 visitedSibNodeIds->insert(outEdges[0].id);

1503 idAndMemrefToFuse->first = outEdges[0].id;

1504 idAndMemrefToFuse->second = outEdges[0].value;

1505 return true;

1506 }

1507 }

1508 return false;

1509 }

1510

1511

1512

1513 void updateStateAfterSiblingFusion(Node *sibNode, Node *dstNode) {

1514

1515 mdg->updateEdges(sibNode->id, dstNode->id);

1516

1517

1518 auto dstForInst = cast(dstNode->op);

1520 dstLoopCollector.collect(dstForInst);

1521

1526 }

1527

1528

1529 void eraseUnusedMemRefAllocations() {

1531 if (pair.second > 0)

1532 continue;

1533 auto memref = pair.first;

1534

1536 continue;

1537

1539 if (isa_and_nonnullmemref::AllocOp(op))

1541 }

1542 }

1543 };

1544

1545 }

1546

1547

1548 void LoopFusion::runOnBlock(Block *block) {

1550 if (!g.init()) {

1551 LLVM_DEBUG(llvm::dbgs() << "MDG init failed\n");

1552 return;

1553 }

1554

1555 std::optional fastMemorySpaceOpt;

1556 if (fastMemorySpace.hasValue())

1557 fastMemorySpaceOpt = fastMemorySpace;

1558 unsigned localBufSizeThresholdBytes = localBufSizeThreshold * 1024;

1559 GreedyFusion fusion(&g, localBufSizeThresholdBytes, fastMemorySpaceOpt,

1560 maximalFusion, computeToleranceThreshold);

1561

1563 fusion.runProducerConsumerFusionOnly();

1565 fusion.runSiblingFusionOnly();

1566 else

1567 fusion.runGreedyFusion();

1568 }

1569

1570 void LoopFusion::runOnOperation() {

1571

1572

1573 getOperation()->walk([&](Operation *op) {

1575 for (Block &block : region.getBlocks()) {

1576 auto affineFors = block.getOps();

1577 if (!affineFors.empty() && !llvm::hasSingleElement(affineFors))

1578 runOnBlock(&block);

1579 }

1580 }

1581 });

1582 }

1583

1585 unsigned fastMemorySpace, uint64_t localBufSizeThreshold,

1586 bool maximalFusion, enum FusionMode affineFusionMode) {

1587 return std::make_unique(fastMemorySpace, localBufSizeThreshold,

1588 maximalFusion, affineFusionMode);

1589 }

MemRefDependenceGraph::Node Node

static Operation * getDominanceFilterForPrivateMemRefRepl(Block *sliceInsertionBlock, ArrayRef< Operation * > producerStores)

Get the operation that should act as a dominance filter while replacing memref uses with a private me...

static void getProducerCandidates(unsigned dstId, const MemRefDependenceGraph &mdg, SmallVectorImpl< unsigned > &srcIdCandidates)

Returns in 'srcIdCandidates' the producer fusion candidates for consumer 'dstId'.

static bool isFusionProfitable(AffineForOp srcForOp, ArrayRef< Operation * > producerStores, AffineForOp dstForOp, ArrayRef< ComputationSliceState > depthSliceUnions, unsigned maxLegalFusionDepth, unsigned *dstLoopDepth, double computeToleranceThreshold)

static bool isEscapingMemref(Value memref, Block *block)

A memref escapes in the context of the fusion pass if either:

static bool canRemoveSrcNodeAfterFusion(unsigned srcId, unsigned dstId, const ComputationSliceState &fusionSlice, Operation *fusedLoopInsPoint, const DenseSet< Value > &escapingMemRefs, const MemRefDependenceGraph &mdg)

Returns true if node 'srcId' can be removed after fusing it with node 'dstId'.

static Value createPrivateMemRef(AffineForOp forOp, ArrayRef< Operation * > storeOps, unsigned dstLoopDepth, std::optional< unsigned > fastMemorySpace, Block *sliceInsertionBlock, uint64_t localBufSizeThreshold)

static void gatherEscapingMemrefs(unsigned id, const MemRefDependenceGraph &mdg, DenseSet< Value > &escapingMemRefs)

Returns in 'escapingMemRefs' the memrefs from affine store ops in node 'id' that escape the block or ...

static std::optional< double > getAdditionalComputeFraction(AffineForOp srcForOp, AffineForOp dstForOp, unsigned depth, ArrayRef< ComputationSliceState > depthSliceUnions, int64_t &sliceCost, int64_t &fusedLoopNestComputeCost)

Returns the amount of additional (redundant) computation that will be done as a fraction of the total...

static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)

A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.

static AffineMap get(MLIRContext *context)

Returns a zero result affine map with no dimensions or symbols: () -> ().

Attributes are known-constant values of operations.

Block represents an ordered list of Operations.

Operation * findAncestorOpInBlock(Operation &op)

Returns 'op' if 'op' lies in this block, or otherwise finds the ancestor operation of 'op' that lies ...

BlockArgument getArgument(unsigned i)

unsigned getNumArguments()

void getValues(unsigned start, unsigned end, SmallVectorImpl< Value > *values) const

Returns the Values associated with variables in range [start, end).

This class helps build Operations.

Operation * create(const OperationState &state)

Creates an operation given the fields represented as an OperationState.

Operation is the basic unit of execution within MLIR.

bool isBeforeInBlock(Operation *other)

Given an operation 'other' that is within the same parent block, return whether the current operation...

Location getLoc()

The source location the operation was defined or derived from.

Block * getBlock()

Returns the operation block that contains this operation.

MutableArrayRef< Region > getRegions()

Returns the regions held by this operation.

void erase()

Remove this operation from its parent block and delete it.

This class contains a list of basic blocks and a link to the parent operation it is attached to.

This class represents an instance of an SSA value in the MLIR system, representing a computable value...

bool use_empty() const

Returns true if this value has no uses.

Type getType() const

Return the type of this value.

user_range getUsers() const

Operation * getDefiningOp() const

If this value is the result of an operation, return the operation that defines it.

FlatAffineValueConstraints is an extension of FlatLinearValueConstraints with helper functions for Af...

Describes the fusion strategy to be used in the Affine loop fusion utilities.

unsigned getNumDimAndSymbolVars() const

bool getFusionComputeCost(AffineForOp srcForOp, LoopNestStats &srcStats, AffineForOp dstForOp, LoopNestStats &dstStats, const ComputationSliceState &slice, int64_t *computeCost)

Computes and returns in 'computeCost', the total compute cost of fusing the 'slice' of the loop nest ...

void gatherProducerConsumerMemrefs(ArrayRef< Operation * > srcOps, ArrayRef< Operation * > dstOps, DenseSet< Value > &producerConsumerMemrefs)

Returns in 'producerConsumerMemrefs' the memrefs involved in a producer-consumer dependence between w...

int64_t getComputeCost(AffineForOp forOp, LoopNestStats &stats)

Computes the total cost of the loop nest rooted at 'forOp' using 'stats'.

void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, const ComputationSliceState &srcSlice, bool isInnermostSiblingInsertionFusion=false)

Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point and source slice loop bo...

void getAffineForIVs(Operation &op, SmallVectorImpl< AffineForOp > *loops)

Populates 'loops' with IVs of the affine.for ops surrounding 'op' ordered from the outermost 'affine....

std::optional< int64_t > getMemoryFootprintBytes(AffineForOp forOp, int memorySpace=-1)

Gets the memory footprint of all data touched in the specified memory space in bytes; if the memory s...

std::unique_ptr< Pass > createLoopFusionPass(unsigned fastMemorySpace=0, uint64_t localBufSizeThreshold=0, bool maximalFusion=false, enum FusionMode fusionMode=FusionMode::Greedy)

Creates a loop fusion pass which fuses affine loop nests at the top-level of the operation the pass i...

FusionMode

Fusion mode to attempt.

unsigned getInnermostCommonLoopDepth(ArrayRef< Operation * > ops, SmallVectorImpl< AffineForOp > *surroundingLoops=nullptr)

Returns the innermost common loop depth for the set of operations in 'ops'.

bool getLoopNestStats(AffineForOp forOp, LoopNestStats *stats)

Collect loop nest statistics (eg.

AffineForOp sinkSequentialLoops(AffineForOp forOp)

bool hasCyclicDependence(AffineForOp root)

Returns true if the affine nest rooted at root has a cyclic dependence among its affine memory access...

mlir::Block * findInnermostCommonBlockInScope(mlir::Operation *a, mlir::Operation *b)

Find the innermost common Block of a and b in the affine scope that a and b are part of.

FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp, unsigned dstLoopDepth, ComputationSliceState *srcSlice, FusionStrategy fusionStrategy=FusionStrategy::Generic)

Checks the feasibility of fusing the loop nest rooted at 'srcForOp' into the loop nest rooted at 'dst...

LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef, ArrayRef< Value > extraIndices={}, AffineMap indexRemap=AffineMap(), ArrayRef< Value > extraOperands={}, ArrayRef< Value > symbolOperands={}, Operation *domOpFilter=nullptr, Operation *postDomOpFilter=nullptr, bool allowNonDereferencingOps=false, bool replaceInDeallocOp=false)

Replaces all "dereferencing" uses of oldMemRef with newMemRef while optionally remapping the old memr...

std::optional< int64_t > getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType)

Returns the memref's element type's size in bytes where the elemental type is an int or float or a ve...

Include the generated interface declarations.

auto get(MLIRContext *context, Ts &&...params)

Helper method that injects context only if needed, this helps unify some of the attribute constructio...

AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols)

Simplify an affine expression by flattening and some amount of simple analysis.

AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)

These free functions allow clients of the API to not use classes in detail.

ComputationSliceState aggregates loop IVs, loop bound AffineMaps and their associated operands for a ...

bool isEmpty() const

Returns true if the computation slice is empty.

std::optional< bool > isMaximal() const

Returns true if the computation slice encloses all the iterations of the sliced loop nest.

Block::iterator insertPoint

enum mlir::affine::FusionResult::ResultEnum value

SmallVector< Operation *, 4 > memrefFrees

SmallVector< Operation *, 4 > loadOpInsts

SmallVector< Operation *, 4 > memrefStores

void collect(Operation *opToWalk)

SmallVector< Operation *, 4 > memrefLoads

SmallVector< Operation *, 4 > storeOpInsts

LoopNestStats aggregates various per-loop statistics (eg.

DenseMap< unsigned, SmallVector< Edge, 2 > > outEdges

Block & block

The block for which this graph is created to perform fusion.

unsigned addNode(Operation *op)

void addEdge(unsigned srcId, unsigned dstId, Value value)

DenseMap< unsigned, Node > nodes

bool hasDependencePath(unsigned srcId, unsigned dstId) const

void clearNodeLoadAndStores(unsigned id)

const Node * getForOpNode(AffineForOp forOp) const

Operation * getFusedLoopNestInsertionPoint(unsigned srcId, unsigned dstId) const

void updateEdges(unsigned srcId, unsigned dstId, const DenseSet< Value > &privateMemRefs, bool removeSrcId)

DenseMap< unsigned, SmallVector< Edge, 2 > > inEdges

void forEachMemRefInputEdge(unsigned id, const std::function< void(Edge)> &callback)

unsigned getOutEdgeCount(unsigned id, Value memref=nullptr) const

const Node * getNode(unsigned id) const

void removeNode(unsigned id)

void forEachMemRefOutputEdge(unsigned id, const std::function< void(Edge)> &callback)

void addToNode(unsigned id, ArrayRef< Operation * > loads, ArrayRef< Operation * > stores, ArrayRef< Operation * > memrefLoads, ArrayRef< Operation * > memrefStores, ArrayRef< Operation * > memrefFrees)

unsigned getIncomingMemRefAccesses(unsigned id, Value memref) const

DenseMap< Value, unsigned > memrefEdgeCount

A region of a memref's data space; this is typically constructed by analyzing load/store op's on this...

std::optional< int64_t > getConstantBoundingSizeAndShape(SmallVectorImpl< int64_t > *shape=nullptr, SmallVectorImpl< AffineMap > *lbs=nullptr) const

Returns a constant upper bound on the number of elements in this region if bounded by a known constan...

FlatAffineValueConstraints * getConstraints()

LogicalResult compute(Operation *op, unsigned loopDepth, const ComputationSliceState *sliceState=nullptr, bool addMemRefDimBounds=true, bool dropLocalVars=true, bool dropOuterIVs=true)

Computes the memory region accessed by this memref with the region represented as constraints symboli...

std::optional< int64_t > getRegionSize()

Returns the size of this MemRefRegion in bytes.

Eliminates variable at the specified position using Fourier-Motzkin variable elimination.