MLIR: lib/Dialect/SCF/Transforms/TileUsingInterface.cpp File Reference (original) (raw)

Go to the source code of this file.

Functions
static SmallVector< int64_t > fillInterchangeVector (ArrayRef< int64_t > interchangeVector, size_t iterationDomainSize)
Helper method to adjust the interchange vector to match the iteration domain. More...
static LogicalResult verifyTileSizeOptions (RewriterBase &rewriter, Location loc, const scf::SCFTilingOptions &options)
Verify the tile size options are set in a consistent manner. More...
static std::tuple< SmallVector< OpFoldResult >, SmallVector< OpFoldResult > > getUserTileSizesAndNumThreads (RewriterBase &rewriter, TilingInterface op, ArrayRef< Range > iterationDomain, const scf::SCFTilingOptions &options)
Method to instantiate the tile sizes and/or number of threads specified by the user. More...
static void checkSafeToTileToForall (TilingInterface op, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads)
Checks if any of the tiled loops are not parallel. More...
static bool tileDividesIterationDomain (Range loopRange)
Check if stride evenly divides the trip count size - offset. More...
static OpFoldResult getBoundedTileSize (OpBuilder &b, Location loc, Range loopRange, OpFoldResult offset, OpFoldResult tileSize)
Returns the bounded tile size given the current offset, loopRange and tileSize, i.e., min(tileSize, range.end() - offset). More...
static bool canOmitTileOffsetInBoundsCheck (OpFoldResult tileSize, OpFoldResult numThreads, OpFoldResult iterationSize)
Returns true if the maximum tile offset tileSize * numThreads-1 is less than iterationSize. More...
static std::tuple< SmallVector< OpFoldResult >, SmallVector< OpFoldResult > > getTileOffsetAndSizes (RewriterBase &rewriter, Location loc, ValueRange ivs, ArrayRef< Range > iterationDomain, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads)
Compute the OpFoldResults that represents the multi-dimensional offsets and sizes of the tile of the iteration space that the innermost loop body of the generated tiled loops corresponds to. More...
static std::tuple< SmallVector< OpFoldResult >, SmallVector< OpFoldResult >, SmallVector< OpFoldResult > > getLoopBounds (RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes)
Function to return the bounds of the loops to be generated. More...
static Operation * cloneOpAndUpdateDestinationArgs (RewriterBase &rewriter, Operation *op, ValueRange newDestArgs)
Clones the operation and updates the destination if the operation implements the DestinationStyleOpInterface. More...
static LogicalResult generateLoopNestUsingForOp (RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ValueRange destinationTensors, YieldTiledValuesFn yieldTiledValuesFn, SmallVector< LoopLikeOpInterface > &loops)
Generate the tile-loop nest using scf.for operation. More...
static LogicalResult generateLoopNestUsingForallOp (RewriterBase &rewriter, Location loc, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads, ArrayRef< Attribute > mappingVector, ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, SmallVector< LoopLikeOpInterface > &loops)
Generate the tile-loop nest using scf.forall operation. More...
static LogicalResult generateLoopNest (RewriterBase &rewriter, Location loc, const scf::SCFTilingOptions &options, ArrayRef< Range > loopRanges, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > numThreads, ValueRange destinationTensors, YieldTiledValuesFn tiledBodyFn, SmallVector< LoopLikeOpInterface > &loops)
Generate the tile-loop nest using the loop construct specifed in options. More...
static FailureOr< SmallVector< Value > > createInitialTensorsForTiling (RewriterBase &rewriter, TilingInterface op, ArrayRef< OpFoldResult > tileSizes, const scf::SCFTilingOptions &options)
static FailureOr< TilingResult > getTiledImplementation (RewriterBase &rewriter, TilingInterface op, ValueRange regionIterArg, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, const scf::SCFTilingOptions &options)
static LogicalResult getResultTilePosition (RewriterBase &rewriter, int64_t index, Value tiledResult, TilingInterface op, ArrayRef< OpFoldResult > offsets, ArrayRef< OpFoldResult > sizes, SmallVector< OpFoldResult > &resultOffset, SmallVector< OpFoldResult > &resultSize, const scf::SCFTilingOptions &options)
static FailureOr< MergeResult > mergeTilingResults (RewriterBase &rewriter, TilingInterface op, ValueRange partialResults, const scf::SCFTilingOptions &options)
template
FailureOr< LoopLikeOpInterface > yieldTiledValuesAndReplaceLoop (LoopType loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn)
Append the specified additional newInitOperands operands to the loops existing init operands (or similar), and replace loopOp with the new loop that has the additional init operands. More...
template<>
FailureOr< LoopLikeOpInterface > yieldTiledValuesAndReplaceLoop< scf::ForOp > (scf::ForOp loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn)
Implementation of yieldTiledValuesAndReplaceLoop for scf.for. More...
template<>
FailureOr< LoopLikeOpInterface > yieldTiledValuesAndReplaceLoop< scf::ForallOp > (scf::ForallOp loopOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn)
Implementation of yieldTiledValuesAndReplaceLoop for scf.forall More...
FailureOr< LoopLikeOpInterface > yieldTiledValuesAndReplaceLoop (LoopLikeOpInterface loopLikeOp, RewriterBase &rewriter, ValueRange newInitOperands, YieldTiledValuesFn yieldTiledValuesFn)
Implementation of yieldTiledValuesAndReplaceLoop for LoopLikeOpInterface, that just dispatches to the implementation for each supported loop type. More...
static LogicalResult addInitOperandsToLoopNest (RewriterBase &rewriter, MutableArrayRef< LoopLikeOpInterface > loops, ValueRange newInitValues, YieldTiledValuesFn getNewTiledYieldsFn)
Method to add new init values to a loop nest. More...
static std::tuple< OpResult, std::optional< OpOperand * > > getUntiledProducerFromSliceSource (OpOperand *source, ArrayRef< LoopLikeOpInterface > loops)
Return the untiled producer whose slice is used in a tiled consumer. More...
static LogicalResult checkAssumptionForFusingConsumer (tensor::InsertSliceOp candidateSliceOp)
A utility function that checks whether the only use of the result of a tensor.insert_slice op is in a scf.yield op. More...
static FailureOr< Operation * > getFirstUserOfLoop (Operation *loopOp)
An utility to get the first user of the given loopOp. More...
static FailureOr< llvm::SetVector< Operation * > > checkAssumptionForLoop (Operation *loopOp, Operation *consumerOp, bool reorderOperations)
This utility currently checks whether the first userOp of loop is NOT before the last defineOp of consumer operand. More...
static FailureOr< OpOperand * > getConsumerFromLoopUses (RewriterBase &rewriter, Operation *loopOp, unsigned resultNumber)
Fetches the OpOperand of the first valid user (and use) of the value val which implements TilingInterface and DestinationStyleOpInterface. More...
static bool isPerfectlyNestedForLoops (MutableArrayRef< LoopLikeOpInterface > loops)
Check that the loop is perfectly nested. More...
static FailureOr< OpOperand * > getUntiledConsumerFromSlice (RewriterBase &rewriter, tensor::InsertSliceOp candidateSliceOp, MutableArrayRef< LoopLikeOpInterface > loops)
Fetch the untiled consumer of the outermost scf.for's result which is yielded by a tensor.insert_slice from the innermost scf.for. More...
static FailureOr< OpOperand * > getUntiledConsumerFromSlice (RewriterBase &rewriter, tensor::ParallelInsertSliceOp candidateSliceOp, MutableArrayRef< LoopLikeOpInterface > loops)
Fetch the first untiled consumer of a scf.forall's result which is yielded by a tensor.parallel_insert_slice. More...
static FailureOr< OpOperand * > getUntiledConsumerFromSlice (RewriterBase &rewriter, Operation *sliceOp, MutableArrayRef< LoopLikeOpInterface > loops)
A utility to fetch an untiled consumer of tensor.insert_slice/tensor.parallel_insert_slice. More...

DEBUG_TYPE

#define DEBUG_TYPE "tile-using-interface"

YieldTiledValuesFn

A function that allows returning additional yielded values during yieldTiledValuesAndReplace.

Definition at line 368 of file TileUsingInterface.cpp.

addInitOperandsToLoopNest()

canOmitTileOffsetInBoundsCheck()

checkAssumptionForFusingConsumer()

static LogicalResult checkAssumptionForFusingConsumer ( tensor::InsertSliceOp candidateSliceOp) static

checkAssumptionForLoop()

This utility currently checks whether the first userOp of loop is NOT before the last defineOp of consumer operand.

Because that we need to move the whole loop structure right before the firstUserOfLoop. This utility thus helps ensuring that no invalid IR is formed, i.e. no backward slice of consumerOp is dominated by the firstUserOfLoop. Saying that:

%0 = scf.for() {

...

}

...

%1 = firstUserOfLoop(%0)

...

%2 = lastDefOfConsumerOperand

...

%3 = consumerOp(%2)

If the firstUserOfLoop is before lastDefOfConsumerOperand, then it would be invalid to move the loopOp right before the firstUserOfLoop, a.k.a. use-def chain violation:

%0:2 = scf.for() {

%3 = tiledConsumerOp(%2)

}

%1 = firstUserOfLoop(%0)

...

%2 = lastDefOfConsumerOperand

Parameters

loopOp loop operation
consumerOp consumer operation
reorderOperations the flag controls whether to reorder the backward slice w.r.t. the defineOp of consumerOp operands.

Returns

: computed backward slice of consumerOp, but excluding those already dominates firstUserOfLoop.

Definition at line 1759 of file TileUsingInterface.cpp.

References mlir::getBackwardSlice(), getFirstUserOfLoop(), mlir::Operation::getOperands(), options, and mlir::DominanceInfo::properlyDominates().

Referenced by getConsumerFromLoopUses(), and mlir::scf::tileAndFuseConsumerOfSlice().

checkSafeToTileToForall()

cloneOpAndUpdateDestinationArgs()

createInitialTensorsForTiling()

fillInterchangeVector()

static SmallVector<int64_t> fillInterchangeVector ( ArrayRef< int64_t > interchangeVector, size_t iterationDomainSize ) static

generateLoopNest()

generateLoopNestUsingForallOp()

generateLoopNestUsingForOp()

getBoundedTileSize()

Returns the bounded tile size given the current offset, loopRange and tileSize, i.e., min(tileSize, range.end() - offset).

Definition at line 208 of file TileUsingInterface.cpp.

References mlir::bindDims(), mlir::bindSymbols(), mlir::AffineMap::get(), mlir::getConstantIntValue(), mlir::Builder::getContext(), mlir::getValueOrCreateConstantIndexOp(), mlir::affine::makeComposedFoldedAffineMin(), mlir::Range::offset, mlir::Range::size, and tileDividesIterationDomain().

Referenced by getTileOffsetAndSizes().

getConsumerFromLoopUses()

getFirstUserOfLoop()

getLoopBounds()

getResultTilePosition()

getTiledImplementation()

getTileOffsetAndSizes()

Compute the OpFoldResults that represents the multi-dimensional offsets and sizes of the tile of the iteration space that the innermost loop body of the generated tiled loops corresponds to.

Definition at line 248 of file TileUsingInterface.cpp.

References mlir::bindDims(), mlir::bindSymbols(), canOmitTileOffsetInBoundsCheck(), mlir::getAsOpFoldResult(), getBoundedTileSize(), mlir::Builder::getContext(), mlir::Builder::getIndexAttr(), mlir::AffineMap::getMultiDimIdentityMap(), mlir::isZeroInteger(), mlir::affine::makeComposedFoldedAffineApply(), mlir::affine::makeComposedFoldedAffineMax(), and mlir::affine::makeComposedFoldedAffineMin().

Referenced by mlir::scf::tileUsingSCF().

getUntiledConsumerFromSlice() [1/3]

getUntiledConsumerFromSlice() [2/3]

getUntiledConsumerFromSlice() [3/3]

static FailureOr<OpOperand *> getUntiledConsumerFromSlice ( RewriterBase & rewriter, tensor::ParallelInsertSliceOp candidateSliceOp, MutableArrayRef< LoopLikeOpInterface > loops ) static

getUntiledProducerFromSliceSource()

getUserTileSizesAndNumThreads()

isPerfectlyNestedForLoops()

static bool isPerfectlyNestedForLoops ( MutableArrayRef< LoopLikeOpInterface > loops) static

Check that the loop is perfectly nested.

The loops are expected to be ordered from outer most to inner most. For example:

%0 = scf.for()

%1 = scf.for()

%2 = scf.for()

%3 = ...

yield %3

yield %2

yield %1

Here loops should be [%0, %1].

Definition at line 1864 of file TileUsingInterface.cpp.

Referenced by getUntiledConsumerFromSlice().

mergeTilingResults()

tileDividesIterationDomain()

static bool tileDividesIterationDomain ( Range loopRange) static

verifyTileSizeOptions()

yieldTiledValuesAndReplaceLoop() [1/2]

yieldTiledValuesAndReplaceLoop() [2/2]

template

FailureOr yieldTiledValuesAndReplaceLoop ( LoopType loopOp,
RewriterBase & rewriter,
ValueRange newInitOperands,
YieldTiledValuesFn yieldTiledValuesFn
)

Append the specified additional newInitOperands operands to the loops existing init operands (or similar), and replace loopOp with the new loop that has the additional init operands.

The loop body of this loop is moved over to the new loop. yieldTiledValuesFn is called to get the new tiled values returned, and the offset and sizes at which the tiled value is inserted into the new region iter_args that correspond to the newly added init operands.

Definition at line 727 of file TileUsingInterface.cpp.

References mlir::RewriterBase::notifyMatchFailure().

Referenced by addInitOperandsToLoopNest(), and yieldTiledValuesAndReplaceLoop().

yieldTiledValuesAndReplaceLoop< scf::ForallOp >()

yieldTiledValuesAndReplaceLoop< scf::ForOp >()