MLIR: lib/Dialect/Linalg/Transforms/Tiling.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
14
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/Support/CommandLine.h"
35 #include
36
37 namespace mlir {
38 #define GEN_PASS_DEF_LINALGTILINGPASS
39 #include "mlir/Dialect/Linalg/Passes.h.inc"
40 }
41
42 using namespace mlir;
46
47 #define DEBUG_TYPE "linalg-tiling"
48
53 assert(allTileSizes.size() == map.getNumResults());
54
58
59
61 for (int idx = 0, e = tileSizes.size(), zerosCount = 0; idx < e; ++idx) {
63 static_cast<int64_t>(0)) {
64 shapeSizes.erase(shapeSizes.begin() + idx - zerosCount);
65 tileSizes.erase(tileSizes.begin() + idx - zerosCount);
66 ++zerosCount;
67 continue;
68 }
69 loopIndexToRangeIndex[idx] = idx - zerosCount;
70 }
71
72
74 for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx)
75 res.push_back(Range{b.getIndexAttr(0), shapeSizes[idx], tileSizes[idx]});
76 return std::make_tuple(res, loopIndexToRangeIndex);
77 }
78
83 for (auto en : enumerate(allIvs)) {
84 auto rangeIndex = loopIndexToRangeIndex.find(en.index());
85 if (rangeIndex == loopIndexToRangeIndex.end())
86 continue;
87 en.value() = ivs[rangeIndex->second];
88 }
90 }
91
92
93
94
97 if (auto attr = llvm::dyn_cast_if_present(value)) {
98 assert(cast(attr).getValue().isStrictlyPositive() &&
99 "expected strictly positive tile size and divisor");
100 return;
101 }
102
103 Value zero = b.createarith::ConstantIndexOp(0);
104 Value condition = b.createarith::CmpIOp(arith::CmpIPredicate::sgt,
105 cast(value), zero);
106 b.createcf::AssertOp(
107 condition,
108 b.getStringAttr("expected strictly positive tile size and divisor"));
109 }
110
111 FailureOr
113 unsigned dimension,
114 unsigned targetSize) {
115
116 assert(!op.hasDynamicShape() &&
117 "cannot compute static multi-tile sizes for an op with dynamic shape");
118 assert(targetSize > 0 && "target size must be non-negative");
119 assert(dimension < op.getNumLoops() && "dimension overflow");
120
122 int64_t loopRange = op.getStaticLoopRanges()[dimension];
123 int64_t tripCount = loopRange / targetSize;
124
125 unsigned tileSize = targetSize;
126
127 spec.tileSizes.push_back(tileSize);
128 spec.tripCounts.push_back(tripCount);
129
130 int64_t remainderChunk = loopRange % targetSize;
131
132 while (tileSize > 1 && remainderChunk != 0) {
133
134 uint64_t maxPower = llvm::bit_floor(tileSize);
135 tileSize = maxPower == tileSize ? maxPower >> 1 : maxPower;
136
137 tripCount = remainderChunk / tileSize;
138
139 if (tripCount > 0) {
140 spec.tileSizes.push_back(tileSize);
141 spec.tripCounts.push_back(tripCount);
142 }
143
144 remainderChunk = remainderChunk % tileSize;
145 }
146
149 int64_t range) -> bool {
150 int64_t computedRange = 0;
151 for (auto [tileSize, tripCount] : llvm::zip(tileSizes, tripCounts))
152 computedRange += tileSize * tripCount;
153 return range == computedRange;
154 };
155
157 return failure();
158
159 return spec;
160 }
161
162 FailureOr
164 unsigned dimension,
166 bool emitAssertions) {
167
169 unsigned numLoops = loopRanges.size();
170
171
172 if (dimension >= numLoops)
173 return failure();
174
175
178 if (emitAssertions) {
180 }
181 Value targetSizeValue =
183
184
185
187 loopRanges[dimension].size);
189
190
195 };
196
197 Value tripCountValue = apply(s0.floorDiv(s1), {loopRange, targetSizeValue});
198 Value remainderChunkValue = apply(s0 % s1, {loopRange, targetSizeValue});
199
201 b, b.getLoc(), s0.floorDiv(s1), {loopRange, targetSizeValue});
202
203
204
206
207 assert(tileSizeInt > 0 && "target size must be non-negative");
208
209 spec.tileSizes.push_back(targetSizeValue);
210 spec.tripCounts.push_back(tripCountValue);
211
212 while (tileSizeInt > 1) {
213 uint64_t maxPower = llvm::bit_floor(tileSizeInt);
214 tileSizeInt = maxPower == tileSizeInt ? maxPower >> 1 : maxPower;
215 auto constStepOp =
217 tripCountValue = apply(s0.floorDiv(s1), {remainderChunkValue, constStepOp});
218
220 b, b.getLoc(), s0.floorDiv(s1), {remainderChunkValue, constStepOp});
221
222
223 if (Attribute attr = llvm::dyn_cast_if_present(tripCountSize)) {
224 auto intAttr = cast(attr);
225 bool isTripCountZero = intAttr.getValue().isZero();
226
227 if (!isTripCountZero) {
228 spec.tileSizes.push_back(constStepOp);
229 spec.tripCounts.push_back(tripCountValue);
230 }
231 } else {
232 spec.tileSizes.push_back(constStepOp);
233 spec.tripCounts.push_back(tripCountValue);
234 }
235
236 remainderChunkValue = apply(s0 % s1, {remainderChunkValue, constStepOp});
237 }
238
239 return spec;
240 }
241
242 FailureOr
244 int64_t targetSize, int64_t divisor) {
245 assert(!op.hasDynamicShape() &&
246 "cannot compute static multi-tile sizes for an op with dynamic shape");
247 assert(targetSize > 0 && "target size must be non-negative");
248 assert(divisor > 0 && "divisor must be non-negative");
249 assert(dimension < op.getNumLoops() && "dimension overflow");
250
252 int64_t tripCount = op.getStaticLoopRanges()[dimension];
253 int64_t a = tripCount / divisor;
254 int64_t t = (targetSize + divisor - 1) / divisor;
255 int64_t totalTripCount = (a + t - 1) / t;
256 spec.lowTileSize = (a / totalTripCount) * divisor;
262 tripCount) {
263 return failure();
264 }
265 return spec;
266 }
267
268 FailureOr
272
273 if (dimension >= op.getNumLoops())
274 return failure();
275
276
279 if (emitAssertions) {
282 }
283 Value targetSizeValue =
286
287
288
290 op.createFlatListOfOperandDims(b, b.getLoc());
291 AffineMap shapesToLoops = op.getShapesToLoopsMap();
294 allShapes);
295 Value tripCount =
297
298
304 };
305 Value a = apply(s0.floorDiv(s1), {tripCount, divisorValue});
306 Value t = apply((s0 + s1 - 1).floorDiv(s1), {targetSizeValue, divisorValue});
307 Value d = apply((s0 + s1 - 1).floorDiv(s1), {a, t});
308 Value s = apply(s0.floorDiv(s1) * s2, {a, d, divisorValue});
309 Value v = apply(s0 % s1, {a, d});
310 Value u = apply(s0 - s1, {d, v});
311
314 spec.highTileSize = apply(s0 + s1, {s, divisorValue});
315 spec.lowTripCount = u;
316 spec.highTripCount = v;
317
318
319
320
321
322 if (emitAssertions) {
324 Value coveredSize =
325 apply(s0 * s1 + s2 * s3, {spec.lowTileSize, spec.lowTripCount,
326 spec.highTileSize, spec.highTripCount});
327 Value equals = b.createarith::CmpIOp(arith::CmpIPredicate::eq,
328 coveredSize, tripCount);
329 b.createcf::AssertOp(
331 "could not compute dynamic multi-size tile shapes"));
332 }
333
334 return spec;
335 }
336
337
338
343 std::optional<int64_t> numThreadsConst = getConstantIntValue(numThreads);
344 std::optional<int64_t> iterSizeConst = getConstantIntValue(iterationSize);
345 if (!tileSizeConst || !numThreadsConst || !iterSizeConst)
346 return false;
347 return *tileSizeConst * (*numThreadsConst - 1) < *iterSizeConst;
348 }
349
350
355 vals);
356 }
357
358
363 vals);
364 }
365
366
367
371 bool omitTileOffsetBoundsCheck,
377
381 int64_t nLoops = loopRanges.size();
382 tiledOffsets.reserve(nLoops);
383 tiledSizes.reserve(nLoops);
384 for (unsigned loopIdx = 0, threadIdIdx = 0; loopIdx < nLoops; ++loopIdx) {
385 bool overflow = loopIdx >= numThreads.size();
386 bool isZero = !overflow && isZeroInteger(numThreads[loopIdx]);
387
388 if (overflow || isZero) {
389 tiledOffsets.push_back(loopRanges[loopIdx].offset);
390 tiledSizes.push_back(loopRanges[loopIdx].size);
391 continue;
392 }
393
394
398 OpFoldResult size = loopRanges[loopIdx].size;
399 OpFoldResult offset = loopRanges[loopIdx].offset;
400 OpFoldResult threadId = threadIds[threadIdIdx];
401
402
404 nominalTileSizes.has_value()
405 ? (*nominalTileSizes)[loopIdx]
407 b, loc, m.ceilDiv(n),
409
410
412 b, loc, i + j * m, {offset, threadId, tileSizePerThread});
413
415 b, loc, i + j * m - n,
416 {offset, nonZeroNumThreads[threadIdIdx], tileSizePerThread, size});
419 b, loc, -i + m, {offsetPerThread, size});
420 tileSizePerThread =
421 buildMin(b, loc, {sizeMinusOffsetPerThread, tileSizePerThread});
422 }
423
424 tiledOffsets.push_back(offsetPerThread);
425
426 if (!omitTileOffsetBoundsCheck &&
428 nonZeroNumThreads[threadIdIdx], size))
429 tileSizePerThread =
431
432 tiledSizes.push_back(tileSizePerThread);
433 ++threadIdIdx;
434 }
435 }
436
437 template
438 static FailureOr
442
443 auto nLoops = op.getNumLoops();
444
445 tileSizes = tileSizes.take_front(nLoops);
446
447 if (llvm::all_of(tileSizes, [](OpFoldResult ofr) {
449 })) {
451 tiledOp.op = cast(b.clone(*op.getOperation()));
453 tiledOp.op->result_end());
454 return tiledOp;
455 }
456
457
459 op.createFlatListOfOperandDims(b, op.getLoc());
460 AffineMap shapeSizesToLoopsMap = op.getShapesToLoopsMap();
461 if (!shapeSizesToLoopsMap)
462 return failure();
463
465 b, op.getLoc(), shapeSizesToLoopsMap, allShapeSizes, tileSizes);
466
468 for (const auto &attr : enumerate(op.getIteratorTypesArray())) {
469 if (loopIndexToRangeIndex.count(attr.index()))
470 iteratorTypes.push_back(attr.value());
471 }
472
473
474 auto invPermutationMap =
476 if (.interchangeVector.empty()) {
477
478
480 interchangeVector.reserve(options.interchangeVector.size());
481 for (auto pos : options.interchangeVector) {
482 auto it = loopIndexToRangeIndex.find(pos);
483 if (it == loopIndexToRangeIndex.end())
484 continue;
485 interchangeVector.push_back(it->second);
486 }
487
488
491 assert(invPermutationMap);
493 interchangeVector.end());
496 }
497
498
499
501 if (options.distribution) {
502 procInfo.resize(
503 iteratorTypes.size(),
504 linalg::ProcInfo{nullptr, nullptr, linalg::DistributionMethod::None});
505
507 for (const auto &iteratorType : llvm::enumerate(iteratorTypes)) {
509 break;
510 parallelLoopRanges.push_back(loopRanges[iteratorType.index()]);
511 }
512 auto returnedProcInfo =
513 options.distribution->procInfo(b, op.getLoc(), parallelLoopRanges);
514 unsigned procIdIdx = 0;
515
516 for (const auto &iteratorType : llvm::enumerate(iteratorTypes)) {
518 break;
519 procInfo[iteratorType.index()] = returnedProcInfo[procIdIdx++];
520 }
521 }
522
523
524 LinalgOp res = op;
526 auto tiledLoopBodyBuilder =
529 ivs.assign(localIvs.begin(), localIvs.end());
530
531
532
533
535 if (.interchangeVector.empty()) {
536 for (AffineExpr result : invPermutationMap.getResults())
537 interchangedIvs.push_back(
538 ivs[cast(result).getPosition()]);
539 } else {
540 interchangedIvs.assign(ivs.begin(), ivs.end());
541 }
542
543
544
545 assert(operandValuesToUse.size() ==
546 static_cast<size_t>(op->getNumOperands()) &&
547 "expect the number of operands and inputs and outputs to match");
551 allShapeSizes);
553 b, loc, op, valuesToTile, getAsOpFoldResult(interchangedIvs), tileSizes,
554 sizeBounds,
555 false);
556
559 res = clone(b, op, resultTensorTypes, tiledOperands);
560 tensorResults =
561 insertSlicesBack(builder, loc, op, tiledOperands, res->getResults());
562 return scf::ValueVector(tensorResults.begin(), tensorResults.end());
563 };
565 tiledLoopBodyBuilder, procInfo);
566
567
569
570
572 loops.reserve(ivs.size());
573 for (auto iv : ivs) {
574 if (isa(iv)) {
575 loops.push_back(cast(iv).getOwner()->getParentOp());
576 assert(loops.back() && "no owner found for induction variable!");
577 } else {
578
579
580 loops.push_back(nullptr);
581 }
582 }
583
584
585
586 Operation *outermostLoop = nullptr;
588 if ((outermostLoop = loop))
589 break;
590
592 res, loops, outermostLoop ? outermostLoop->getResults() : tensorResults};
593 }
594
596 RewriterBase &b, PartialReductionOpInterface op,
598 std::optional mapping) {
601
602
603
604
605 auto tilingInterfaceOp = cast(op.getOperation());
606
607
608
609
610
611 auto destinationStyleOp =
612 dyn_cast(op.getOperation());
613 if (!destinationStyleOp)
615
616
617 auto linalgOp = dyn_castlinalg::LinalgOp(op.getOperation());
618 if (!linalgOp)
620
621 SmallVector iterationDomain = tilingInterfaceOp.getIterationDomain(b);
622 if (op->getNumResults() != 1)
624 op, "don't support ops with multiple results for now");
625
627 tilingInterfaceOp.getLoopIteratorTypes();
629 linalgOp.getReductionDims(redDims);
630 if (redDims.size() != 1)
632 op, "only support ops with one reduction dimension.");
633 if (!tileSizes.empty() && tileSizes.size() != numThreads.size())
634 return b.notifyMatchFailure(op, "if tile sizes are present it must have as "
635 "many elements as number of threads");
636 int reductionDim = static_cast<int>(redDims.front());
637
638 if (redDims.front() >= numThreads.size())
640 op, "reduction dimension must be mapped to threads");
641
642
643 FailureOr<SmallVector> maybeInitTensors =
644 op.generateInitialTensorForPartialReduction(b, loc, numThreads,
645 reductionDim);
646 if (failed(maybeInitTensors))
648 op, "Failed to create inital tensors for partial reduction");
650
651
654 return b.notifyMatchFailure(op, "failed to get destination tensors");
655
657
662
663
664 scf::ForallOp forallOp = b.createscf::ForallOp(
665 loc, getAsOpFoldResult(materializedNonZeroNumThreads), initTensors,
666 mapping);
667
668
669
672 false,
673 std::nullopt, tiledOffsets,
674 tiledSizes);
675
676
677
680 {
681
684
686 for (Value initOperand : destinationStyleOp.getDpsInits()) {
687 auto *it = llvm::find(dest, initOperand);
688 assert(it != dest.end() && "dest operand not found in dest");
689 unsigned destNum = std::distance(dest.begin(), it);
695 outOffsets[reductionDim] = forallOp.getInductionVars()[0];
696
697 tiledDpsInitOperands.push_back(b.createtensor::ExtractSliceOp(
698 loc, cast(initOperand.getType()),
699 destBbArgs[destNum], outOffsets, sizes, strides));
700 }
701
702
703
704
707 for (auto [initOperandPtr, tiledInitValue] : llvm::zip_equal(
708 cast(clonedOp).getDpsInitsMutable(),
709 tiledDpsInitOperands)) {
710 initOperandPtr.set(tiledInitValue);
711 }
712 });
713
714
715 if (tileSizes.empty()) {
716 FailureOr tilingResult =
717 cast(clonedOp).getTiledImplementation(
718 b, tiledOffsets, tiledSizes);
719 if (failed(tilingResult))
720 return clonedOp->emitError("Failed to tile op: ");
721 if (tilingResult->tiledOps.size() != 1) {
722 return clonedOp->emitError("expected a single produced tiled op, got ")
723 << tilingResult->tiledOps.size();
724 }
725 tiledOp = tilingResult->tiledOps.front();
726 tilingResults = tilingResult->tiledValues;
727 } else {
729 FailureOr maybeTiled = tileLinalgOpImplscf::ForOp(
730 b, cast(clonedOp), tileSizes, options);
731 if (failed(maybeTiled))
733
736 materializedNonZeroNumThreads);
737 if (maybeTiled->loops.size() != 1) {
738 return clonedOp->emitError("expected a single produced loop");
739 }
740 tiledOp = maybeTiled->op;
741 tilingResults = maybeTiled->loops.front()->getResults();
742 }
743
745 }
746
747
748 for (auto [index, result, bbArg] : llvm::zip(
749 llvm::seq(0, dest.size()), tilingResults, destBbArgs)) {
750
753
755 if (failed(tilingInterfaceOp.getResultTilePosition(
756 b, index, tiledOffsets, tiledSizes, resultOffsets, resultSizes)))
757 return op->emitOpError("output offsets couldn't be calculated");
759 int64_t offIdx = 0;
760 int64_t sizeIdx = 0;
761 for (int64_t i = 0, e = numThreads.size(); i < e; ++i) {
762 if (i == reductionDim) {
763 resultOffsetsRank.push_back(forallOp.getInductionVars()[0]);
764 resultSizesRank.push_back(b.getIndexAttr(1));
765 continue;
766 }
767 resultOffsetsRank.push_back(resultOffsets[offIdx++]);
768 resultSizesRank.push_back(resultSizes[sizeIdx++]);
769 }
772
773
774
776 b.createtensor::ParallelInsertSliceOp(
777 loc, result, bbArg, resultOffsetsRank, resultSizesRank, strides);
778 }
779
780
782 FailureOr mergeResult =
783 op.mergeReductions(b, loc, forallOp->getResults(), reductionDim);
784 if (failed(mergeResult)) {
785 return failure();
786 }
787 b.replaceOp(op, mergeResult->replacements);
788
789
792 results.loops = forallOp;
794 results.mergeOps.append(mergeResult->mergeOps);
795 return results;
796 }
797
798 template
803
804 if (.tileSizeComputationFunction)
805 return failure();
806
807
808
809
810 auto nLoops = op.getNumLoops();
813 if (tileSizeVector.size() < nLoops) {
814 tileSizeVector.append(nLoops - tileSizeVector.size(), b.getIndexAttr(0));
815 }
816
817 return tileLinalgOpImpl(b, op, tileSizeVector, options);
818 }
819
820 FailureOr
823 switch (options.loopType) {
825 return tileLinalgOpImplscf::ForOp(b, op, options);
826 case LinalgTilingLoopType::ParallelLoops:
827 return tileLinalgOpImplscf::ParallelOp(b, op, options);
828 default:;
829 }
830 return failure();
831 }
832
833 namespace {
834
835 template <typename... OpTypes>
836 class CanonicalizationPatternList;
837
838 template <>
839 class CanonicalizationPatternList<> {
840 public:
842 };
843
844 template <typename OpTy, typename... OpTypes>
845 class CanonicalizationPatternList<OpTy, OpTypes...> {
846 public:
848 OpTy::getCanonicalizationPatterns(patterns, patterns.getContext());
849 CanonicalizationPatternList<OpTypes...>::insert(patterns);
850 }
851 };
852 }
853
859 }
860
863 auto *ctx = patterns.getContext();
864 affine::AffineApplyOp::getCanonicalizationPatterns(patterns, ctx);
865 affine::AffineForOp::getCanonicalizationPatterns(patterns, ctx);
866 affine::AffineMinOp::getCanonicalizationPatterns(patterns, ctx);
867 affine::AffineMaxOp::getCanonicalizationPatterns(patterns, ctx);
868 arith::ConstantIndexOp::getCanonicalizationPatterns(patterns, ctx);
869
870 memref::SubViewOp::getCanonicalizationPatterns(patterns, ctx);
871 memref::ViewOp::getCanonicalizationPatterns(patterns, ctx);
872
873 scf::ForOp::getCanonicalizationPatterns(patterns, ctx);
874 scf::ParallelOp::getCanonicalizationPatterns(patterns, ctx);
875
876 tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
877 tensor::EmptyOp::getCanonicalizationPatterns(patterns, ctx);
878 tensor::ExtractSliceOp::getCanonicalizationPatterns(patterns, ctx);
879 tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
880 tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
881 ctx->getLoadedDialect()->getCanonicalizationPatterns(patterns);
882
883 CanonicalizationPatternList<
884 #define GET_OP_LIST
885 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
887 }
DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, transform::ApplyToEachResultList &results, transform::TransformState &state)
static llvm::ManagedStatic< PassManagerOptions > options
static bool canOmitTileOffsetInBoundsCheck(OpFoldResult tileSize, OpFoldResult numThreads, OpFoldResult iterationSize)
Returns true if the maximum tile offset tileSize * numThreads-1 is less than iterationSize.
static void emitIsPositiveIndexAssertion(ImplicitLocOpBuilder &b, OpFoldResult value)
Asserts that the given index-typed value is strictly positive.
static OpFoldResult buildMax(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > vals)
Build an affine_max of all the vals.
static void calculateTileOffsetsAndSizes(RewriterBase &b, Location loc, scf::ForallOp forallOp, ArrayRef< OpFoldResult > numThreads, SmallVector< Range > loopRanges, bool omitTileOffsetBoundsCheck, std::optional< ArrayRef< OpFoldResult >> nominalTileSizes, SmallVector< OpFoldResult > &tiledOffsets, SmallVector< OpFoldResult > &tiledSizes)
Fill out the tiledOffsets and tiledSizes to be used to tile to a given number of threads.
static FailureOr< TiledLinalgOp > tileLinalgOpImpl(RewriterBase &b, LinalgOp op, ArrayRef< OpFoldResult > tileSizes, const LinalgTilingOptions &options)
static OpFoldResult buildMin(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > vals)
Build an affine_min of all the vals.
Base type for affine expression.
AffineExpr floorDiv(uint64_t v) const
A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.
static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)
Returns an AffineMap with 'numDims' identity result dim exprs.
unsigned getNumResults() const
static AffineMap getPermutationMap(ArrayRef< unsigned > permutation, MLIRContext *context)
Returns an AffineMap representing a permutation.
Attributes are known-constant values of operations.
IntegerAttr getIndexAttr(int64_t value)
AffineExpr getAffineSymbolExpr(unsigned position)
StringAttr getStringAttr(const Twine &bytes)
MLIRContext * getContext() const
ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...
Location getLoc() const
Accessors for the implied location.
OpTy create(Args &&...args)
Create an operation of specific op type at the current insertion point and location.
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext * getContext() const
Return the context this location is uniqued in.
MLIRContext is the top-level object for a collection of MLIR operations.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
Operation is the basic unit of execution within MLIR.
InFlightDiagnostic emitError(const Twine &message={})
Emit an error about fatal conditions with this operation, reporting up to any diagnostic handlers tha...
result_range getResults()
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Specialization of arith.constant op that returns an integer of index type.
SmallVector< OpFoldResult > makeComposedFoldedMultiResultAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Variant of makeComposedFoldedAffineApply suitable for multi-result maps.
AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...
OpFoldResult makeComposedFoldedAffineMax(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a maximum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)
Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)
Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...
void transformIndexOps(RewriterBase &b, LinalgOp op, SmallVectorImpl< Value > &ivs, const LoopIndexToRangeIndexMap &loopIndexToRangeIndex)
All indices returned by IndexOp should be invariant with respect to tiling.
bool isParallelIterator(utils::IteratorType iteratorType)
Check if iterator type has "parallel" semantics.
void populateLinalgTilingCanonicalizationPatterns(RewritePatternSet &patterns)
SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)
Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...
std::tuple< SmallVector< Range, 4 >, LoopIndexToRangeIndexMap > makeTiledLoopRanges(RewriterBase &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > allShapeSizes, ArrayRef< OpFoldResult > allTileSizes)
void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)
Add the specified offsets to any linalg.index ops contained in the given linalgOp.
FailureOr< StaticMultiSizeSpecification > computeStaticMultiTileSizes(LinalgOp op, unsigned dimension, int64_t targetSize, int64_t divisor)
FailureOr< ContinuousTileSizeSpecification > computeContinuousTileSizes(OpBuilder &builder, TilingInterface op, unsigned dimension, OpFoldResult targetSize, bool emitAssertions)
FailureOr< StaticContinuousTileSizeSpecification > computeStaticContinuousTileSizes(LinalgOp op, unsigned dimension, unsigned targetSize)
FailureOr< ForallReductionTilingResult > tileReductionUsingForall(RewriterBase &b, PartialReductionOpInterface op, ArrayRef< OpFoldResult > numThreads, ArrayRef< OpFoldResult > tileSizes={}, std::optional< ArrayAttr > mapping=std::nullopt)
Method to tile a reduction to parallel iterations computing partial reductions.
FailureOr< TiledLinalgOp > tileLinalgOp(RewriterBase &b, LinalgOp op, const LinalgTilingOptions &options)
RewritePatternSet getLinalgTilingCanonicalizationPatterns(MLIRContext *ctx)
Canonicalization patterns relevant to apply after tiling patterns.
SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)
Returns the list of tensor output types produced when the given structured operation op is applied to...
FailureOr< MultiSizeSpecification > computeMultiTileSizes(OpBuilder &builder, LinalgOp op, unsigned dimension, OpFoldResult targetSize, OpFoldResult divisor, bool emitAssertions=true)
Emits the IR computing the multi-sized tiling specification with two tile sizes not exceeding targetS...
SmallVector< Value > ValueVector
An owning vector of values, handy to return from functions.
LogicalResult getOrCreateDestinations(OpBuilder &b, Location loc, Operation *op, SmallVector< Value > &result)
This is a helper function for DestinationStyleOpInterface.
Include the generated interface declarations.
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
AffineMap inversePermutation(AffineMap map)
Returns a map of codomain to domain dimensions such that the first codomain dimension for a particula...
const FrozenRewritePatternSet & patterns
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
Operation * clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands)
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
SmallVector< scf::ForOp, 8 > Loops
Tile a nest of standard for loops rooted at rootForOp by finding such parametric tile sizes that the ...
void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)
Apply the permutation defined by permutation to inVec.
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
Transformation information returned after reduction tiling.
SmallVector< Operation * > mergeOps
The final reduction operation merging all the partial reductions.
SmallVector< Value > initialValues
Initial values used for partial reductions.
scf::ForallOp loops
The scf.forall operation that iterate over the tiles.
SmallVector< Operation * > parallelTiledOps
The partial reduction tiled op generated.
A description of a multi-size tiling comprising tile sizes and numbers of tiles, expressed as Values ...
Callback function type used to get processor ID, and number of processors used for distribution for a...
Perform standalone tiling of a single LinalgOp by tileSizes.
SmallVector< Value, 4 > tensorResults
SmallVector< T > tileSizes
Tile sizes.
SmallVector< T > tripCounts
Number of tiles associated with each size.
T lowTripCount
Number of tiles associated with each size.
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.