MLIR: lib/Dialect/Affine/Utils/LoopUtils.cpp File Reference (original) (raw)

Go to the source code of this file.

Macros
#define DEBUG_TYPE "loop-utils"
Functions
static void getCleanupLoopLowerBound (AffineForOp forOp, unsigned unrollFactor, AffineMap &cleanupLbMap, SmallVectorImpl< Value > &cleanupLbOperands)
Computes the cleanup loop lower bound of the loop being unrolled with the specified unroll factor; this bound will also be upper bound of the main part of the unrolled loop.
static void replaceIterArgsAndYieldResults (AffineForOp forOp)
Helper to replace uses of loop carried values (iter_args) and loop yield values while promoting single iteration affine.for ops.
static AffineForOp generateShiftedLoop (AffineMap lbMap, AffineMap ubMap, const std::vector< std::pair< uint64_t, ArrayRef< Operation * > > > &opGroupQueue, unsigned offset, AffineForOp srcForOp, OpBuilder b)
Generates an affine.for op with the specified lower and upper bounds while generating the right IV remappings to realize shifts for operations in its body.
static LogicalResult checkIfHyperRectangular (MutableArrayRef< AffineForOp > input)
Checks whether a loop nest is hyper-rectangular or not.
template
static LogicalResult performPreTilingChecks (MutableArrayRef< AffineForOp > input, ArrayRef< t > tileSizes)
Check if the input nest is supported for tiling and whether tiling would be legal or not.
static void moveLoopBodyImpl (AffineForOp src, AffineForOp dest, Block::iterator loc)
Move the loop body of AffineForOp 'src' from 'src' into the specified location in destination's body, ignoring the terminator.
static void moveLoopBody (AffineForOp src, AffineForOp dest)
Move the loop body of AffineForOp 'src' from 'src' to the start of dest body.
static void constructTiledLoopNest (MutableArrayRef< AffineForOp > origLoops, AffineForOp rootAffineForOp, unsigned width, MutableArrayRef< AffineForOp > tiledLoops)
Constructs tiled loop nest, without setting the loop bounds and move the body of the original loop nest to the tiled loop nest.
static void setIntraTileBoundsParametric (OpBuilder &b, AffineForOp origLoop, AffineForOp newInterTileLoop, AffineForOp newIntraTileLoop, Value tileSize)
Set lower and upper bounds of intra-tile loops for parametric tiling.
static void setInterTileBoundsParametric (OpBuilder &b, AffineForOp origLoop, AffineForOp newLoop, Value tileSize)
Set lower and upper bounds of inter-tile loops for parametric tiling.
static void constructParametricallyTiledIndexSetHyperRect (MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< Value > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets, where the bounds of one dimension do not depend on other dimensions and tiling parameters are captured from SSA values.
static void constructTiledIndexSetHyperRect (MutableArrayRef< AffineForOp > origLoops, MutableArrayRef< AffineForOp > newLoops, ArrayRef< unsigned > tileSizes)
Constructs and sets new loop bounds after tiling for the case of hyper-rectangular index sets, where the bounds of one dimension do not depend on other dimensions.
static void generateUnrolledLoop (Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap 'forOpIV' for each unrolled body.
static LogicalResult generateCleanupLoopForUnroll (AffineForOp forOp, uint64_t unrollFactor)
Helper to generate cleanup loop for unroll or unroll-and-jam when the trip count is not a multiple of unrollFactor.
static bool areInnerBoundsInvariant (AffineForOp forOp)
Check if all control operands of all loops are defined outside of forOp and return false if not.
static bool checkLoopInterchangeDependences (const std::vector< SmallVector< DependenceComponent, 2 > > &depCompsVec, ArrayRef< AffineForOp > loops, ArrayRef< unsigned > loopPermMap)
static void augmentMapAndBounds (OpBuilder &b, Value iv, AffineMap *map, SmallVector< Value, 4 > *operands, int64_t offset=0)
static SmallVector< AffineForOp, 8 > stripmineSink (AffineForOp forOp, uint64_t factor, ArrayRef< AffineForOp > targets)
template
static AffineForOp stripmineSink (AffineForOp forOp, SizeType factor, AffineForOp target)
static void findHighestBlockForPlacement (const MemRefRegion &region, Block &block, Block::iterator &begin, Block::iterator &end, Block **copyPlacementBlock, Block::iterator *copyInPlacementStart, Block::iterator *copyOutPlacementStart)
Given a memref region, determine the lowest depth at which transfers can be placed for it, and return the corresponding block, start and end positions in the block for placing incoming (read) and outgoing (write) copies respectively.
static void getMultiLevelStrides (const MemRefRegion &region, ArrayRef< int64_t > bufferShape, SmallVectorImpl< StrideInfo > *strideInfos)
Returns striding information for a copy/transfer of this region with potentially multiple striding levels from outermost to innermost.
b getContext ())
for (auto applyOp :mayBeDeadApplys) if(applyOp.use_empty()) applyOp.erase()
if (!isCopyOut)
static InFlightDiagnostic emitRemarkForBlock (Block &block)
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted (the insertion happens right before the *insertion point). Since `begin` can itself be invalidated due to the memref *rewriting done from this method
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement (set to `begin` if no invalidation happens). Since outgoing *copies could have been inserted at `end`
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its the output argument nEnd is set *to the new end sizeInBytes is set to the size of the fast buffer *allocated *static LogicalResult generateCopy (const MemRefRegion &region, Block *block, Block::iterator begin, Block::iterator end, Block *copyPlacementBlock, Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, const AffineCopyOptions &copyOptions, DenseMap< Value, Value > &fastBufferMap, DenseSet< Operation * > &copyNests, uint64_t *sizeInBytes, Block::iterator *nBegin, Block::iterator *nEnd)
static bool getFullMemRefAsRegion (Operation *op, unsigned numParamLoopIVs, MemRefRegion *region)
Construct the memref region to just include the entire memref.
static void gatherLoopsInBlock (Block *block, unsigned currLoopDepth, std::vector< SmallVector< AffineForOp, 2 > > &depthToLoops)
Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
static AffineIfOp createSeparationCondition (MutableArrayRef< AffineForOp > loops, OpBuilder b)
Creates an AffineIfOp that encodes the conditional to choose between the constant trip count version and an unknown trip count version of this nest of loops.
static LogicalResult createFullTiles (MutableArrayRef< AffineForOp > inputNest, SmallVectorImpl< AffineForOp > &fullTileLoops, OpBuilder b)
Create the full tile loop nest (along with its body).
Variables
fastBufExprs
fullyComposeAffineMapAndOperands & fastBufMap = simplifyAffineMap(fastBufMap)
auto load
return copyNestRoot
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing copies
Creates a buffer in the faster memory space for the specified memref region (memref has to be non-zero ranked); generates a copy from the lower memory space to this one, and replaces all loads/stores in the block range [‘begin’, ‘end’) of ‘block’ to load/store from that buffer. Returns failure.
*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing * respectively

DEBUG_TYPE

#define DEBUG_TYPE "loop-utils"

areInnerBoundsInvariant()

bool areInnerBoundsInvariant ( AffineForOp forOp) static

augmentMapAndBounds()

checkIfHyperRectangular()

LogicalResult checkIfHyperRectangular ( MutableArrayRef< AffineForOp > input) static

checkLoopInterchangeDependences()

constructParametricallyTiledIndexSetHyperRect()

constructTiledIndexSetHyperRect()

constructTiledLoopNest()

createFullTiles()

createSeparationCondition()

Creates an AffineIfOp that encodes the conditional to choose between the constant trip count version and an unknown trip count version of this nest of loops.

This is used to separate partial and full tiles if loops has the intra-tile loops. The affine.if op is inserted at the builder insertion point of b.

Definition at line 2584 of file LoopUtils.cpp.

References mlir::presburger::IntegerRelation::atIneq(), b, mlir::affine::canonicalizeSetAndOperands(), mlir::FlatLinearConstraints::getAsIntegerSet(), mlir::affine::getIndexSet(), mlir::presburger::IntegerRelation::getInequality(), mlir::presburger::IntegerRelation::getLowerAndUpperBoundIndices(), mlir::presburger::IntegerRelation::getNumCols(), mlir::presburger::IntegerRelation::getNumDimAndSymbolVars(), mlir::FlatLinearValueConstraints::getValues(), mlir::presburger::IntegerRelation::removeIndependentConstraints(), mlir::presburger::IntegerRelation::removeTrivialRedundancy(), mlir::presburger::IntegerRelation::removeVar(), and mlir::presburger::IntegerRelation::setDimSymbolSeparation().

emitRemarkForBlock()

findHighestBlockForPlacement()

for()

for ( auto applyOp :mayBeDeadApplys )

gatherLoopsInBlock()

generateCleanupLoopForUnroll()

LogicalResult generateCleanupLoopForUnroll ( AffineForOp forOp, uint64_t unrollFactor ) static

generateCopy()

*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its the output argument nEnd is set *to the new end sizeInBytes is set to the size of the fast buffer *allocated *static LogicalResult generateCopy ( const MemRefRegion & region, Block * block, Block::iterator begin, Block::iterator end, Block * copyPlacementBlock, Block::iterator copyInPlacementStart, Block::iterator copyOutPlacementStart, const AffineCopyOptions & copyOptions, DenseMap< Value, Value > & fastBufferMap, DenseSet< Operation * > & copyNests, uint64_t * sizeInBytes, Block::iterator * nBegin, Block::iterator * nEnd ) static

Definition at line 1947 of file LoopUtils.cpp.

References b, mlir::Block::begin(), mlir::affine::AffineDmaStartOp::create(), mlir::affine::AffineDmaWaitOp::create(), mlir::arith::ConstantIndexOp::create(), emitRemarkForBlock(), mlir::affine::AffineCopyOptions::fastMemorySpace, mlir::Block::findAncestorOpInBlock(), mlir::affine::fullyComposeAffineMapAndOperands(), mlir::affine::AffineCopyOptions::generateDma, mlir::AffineMap::get(), mlir::affine::MemRefRegion::getConstantBoundingSizeAndShape(), mlir::affine::MemRefRegion::getConstraints(), mlir::affine::getIntOrFloatMemRefSizeInBytes(), mlir::affine::MemRefRegion::getLowerAndUpperBound(), getMultiLevelStrides(), mlir::presburger::IntegerRelation::getNumCols(), mlir::AffineMap::getNumResults(), mlir::presburger::IntegerRelation::getNumVars(), mlir::Block::getParent(), mlir::Region::getParentOfType(), mlir::FlatLinearValueConstraints::getValues(), mlir::Operation::isBeforeInBlock(), mlir::affine::MemRefRegion::isWrite(), mlir::affine::MemRefRegion::loc, mlir::affine::MemRefRegion::memref, success(), and mlir::affine::AffineCopyOptions::tagMemorySpace.

generateShiftedLoop()

generateUnrolledLoop()

void generateUnrolledLoop ( Block * loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues ) static

Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap 'forOpIV' for each unrolled body.

If specified, annotates the Ops in each unrolled iteration using annotateFn.

Definition at line 899 of file LoopUtils.cpp.

References mlir::OpBuilder::atBlockTerminator(), mlir::Block::begin(), mlir::Operation::clone(), mlir::Block::end(), mlir::Operation::getBlock(), mlir::Block::getTerminator(), mlir::IRMapping::lookup(), mlir::IRMapping::map(), mlir::Operation::setOperands(), and mlir::Value::use_empty().

getCleanupLoopLowerBound()

getContext()

Referenced by mlir::detail::AsmStateImpl::AsmStateImpl(), mlir::computeLinearIndex(), mlir::affine::computeSliceUnion(), constifyIndexValues(), mlir::detail::ConversionPatternRewriterImpl::ConversionPatternRewriterImpl(), mlir::ConvertOpInterfaceToLLVMPattern< SourceOp >::ConvertOpInterfaceToLLVMPattern(), mlir::OneToNOpAdaptor< SelectOp >::ConvertOpToLLVMPattern(), mlir::linalg::ConvMatcherBuilder::ConvMatcherBuilder(), mlir::ConvertToLLVMPattern::copyUnrankedDescriptor(), mlir::LLVM::detail::DebugTranslation::DebugTranslation(), mlir::PassManager::enableCrashReproducerGeneration(), mlir::spirv::ImageType::get(), mlir::sparse_tensor::SparseTensorType::getCOOType(), mlir::SPIRVTypeConverter::getIndexType(), mlir::ConvertToLLVMPattern::getIntPtrType(), mlir::dataflow::CFGEdge::getLoc(), mlir::ConvertToLLVMPattern::getPtrType(), mlir::getSymbolLessAffineMaps(), mlir::ConvertToLLVMPattern::getVoidType(), L0RTContextWrapper::L0RTContextWrapper(), mlirAffineExprGetContext(), mlirAffineMapGetContext(), mlirAttributeGetContext(), mlirDialectGetContext(), mlirIdentifierGetContext(), mlirIntegerSetGetContext(), mlirLocationGetContext(), mlirModuleGetContext(), mlirOperationGetContext(), mlirOperationSetInherentAttributeByName(), mlirRewriterBaseGetContext(), mlirTypeGetContext(), mlirValueGetContext(), mlir::LLVM::ModuleImport::ModuleImport(), mlir::detail::StorageUserBase< ConcreteType, BaseType, StorageType, detail::AttributeUniquer, Traits... >::mutate(), mlir::detail::ParserState::ParserState(), mlir::AffineExpr::print(), mlir::AffineMap::print(), mlir::Attribute::print(), mlir::IntegerSet::print(), mlir::Type::print(), mlir::Attribute::printStripped(), mlir::BytecodeReader::Impl::read(), mlir::detail::RecoveryReproducerContext::RecoveryReproducerContext(), mlir::SymbolTable::rename(), mlir::SymbolTable::rename(), AmdgpuMaskedloadToLoadPass::runOnOperation(), ConvertMathToROCDLPass::runOnOperation(), mlir::amdgpu::AmdgpuFoldMemRefOpsPass::runOnOperation(), mlir::detail::OpToOpPassAdaptor::runOnOperation(), TargetToDataLayoutPass::runOnOperation(), TargetToTargetFeaturesPass::runOnOperation(), mlir::spirv::Serializer::Serializer(), and mlir::StructuredGenerator< StructuredOpInterface, IteratorTypeT >::StructuredGenerator().

getFullMemRefAsRegion()

getMultiLevelStrides()

if()

Definition at line 1914 of file LoopUtils.cpp.

References b, copyNestRoot, fastBufMap, and load.

Referenced by mlir::tblgen::Constraint::Constraint(), mlir::FrozenRewritePatternSet::FrozenRewritePatternSet(), mlir::shard::getResultShardings(), mlir::AsmParser::KeywordSwitch< ResultT >::KeywordSwitch(), mlir::OpPrintingFlags::OpPrintingFlags(), parseBindName(), mlir::remark::detail::RemarkEngine::RemarkEngine(), sortMapIndices(), and mlir::SuccessorRange::SuccessorRange().

inserted()

*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be inserted ( the insertion happens right before the *insertion point )

Referenced by mlir::AsmParserState::addAttrAliasDefinition(), mlir::AsmParserState::addDefinition(), mlir::AnalysisState::addDependency(), mlir::AsmParserState::addTypeAliasDefinition(), mlir::AsmParserState::addUses(), mlir::tblgen::SymbolInfoMap::bindAttr(), mlir::tblgen::SymbolInfoMap::bindMultipleValues(), mlir::tblgen::SymbolInfoMap::bindOpResult(), mlir::tblgen::SymbolInfoMap::bindProp(), mlir::tblgen::SymbolInfoMap::bindValue(), buildPackingLoopNestImpl(), checkCorrectAnyOf(), createIndex(), createSplitPart(), mlir::LLVM::detail::LoopAnnotationTranslation::getAccessGroup(), getConstraintPredicates(), mlir::tblgen::DagNode::getDialectOp(), getMemoryFootprintBytes(), getOrCreateBlockIndices(), mlir::DialectRegistry::insert(), mlir::linalg::insertSlicesBack(), mlir::sparse_tensor::isBlockSparsity(), mlir::CyclicReplacerCache< InT, OutT >::lookupOrInit(), mlir::GPUFuncOpLowering::matchAndRewrite(), mlir::Namespace::newName(), mlir::Namespace::newName(), remapInlinedLocations(), mlir::linalg::DownscaleConv2DOp::returningMatchAndRewrite(), mlir::linalg::DownscaleDepthwiseConv2DNhwcHwcOp::returningMatchAndRewrite(), mlir::linalg::DownscaleSizeOneWindowed2DConvolution< Conv2DOp, Conv1DOp >::returningMatchAndRewrite(), mlir::SymbolTable::SymbolTable(), mlir::transformCFGToSCF(), transformToReduceLoop(), and visitUpward().

moveLoopBody()

void moveLoopBody ( AffineForOp src, AffineForOp dest ) static

moveLoopBodyImpl()

Move the loop body of AffineForOp 'src' from 'src' into the specified location in destination's body, ignoring the terminator.

Definition at line 408 of file LoopUtils.cpp.

Referenced by moveLoopBody().

performPreTilingChecks()

replaceIterArgsAndYieldResults()

void replaceIterArgsAndYieldResults ( AffineForOp forOp) static

replacement()

*if copies could not be generated due to yet unimplemented cases *copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock *specify the insertion points where the incoming copies and outgoing should be the output argument nBegin is set to its * replacement ( set to `begin` if no invalidation happens )

Referenced by mlir::alignAffineMapWithValues(), applyTilingToAll(), mlir::bufferization::func_ext::CallOpInterface::bufferize(), mlir::bufferization::buildSubsetExtraction(), castSameSizedTypes(), mlir::async::cloneConstantsIntoTheRegion(), createExtractAndCast(), mlir::bufferization::eliminateEmptyTensors(), mlir::bufferization::foldToBufferToTensorPair(), generateFusedElementwiseOpRegion(), mlir::LLVM::ModuleTranslation::getOrCreateAliasScope(), mlir::CyclicReplacerCache< void *, const void * >::lookupOrInit(), DropUnitDimsFromScfForOp::matchAndRewrite(), mlir::linalg::FoldTensorCastPackOp::matchAndRewrite(), mlir::linalg::FoldTensorCastUnPackOp::matchAndRewrite(), mlirAffineMapReplace(), mlir::RewriterBase::ForwardingListener::notifyOperationReplaced(), mlir::RewriterBase::Listener::notifyOperationReplaced(), mlir::RewriterBase::Listener::notifyOperationReplaced(), mlir::RewriterBase::PatternLoggingListener::notifyOperationReplaced(), mlir::transform::TransformRewriter::notifyPayloadOperationReplaced(), SliceCanonicalizer::operator()(), pruneRedundantArguments(), mlir::irdl::detail::Template::render(), mlir::AffineExpr::replace(), mlir::AffineMap::replace(), mlir::replaceAllUsesInRegionWith(), mlir::scf::replaceAndCastForOpIterArg(), mlir::detail::AttrTypeReplacerBase< Concrete >::replaceElementsIn(), mlir::transform::TransformState::Extension::replacePayloadOp(), mlir::transform::TransformState::Extension::replacePayloadValue(), replaceUnitMappingIdsHelper(), mlir::memref::replaceWithIndependentOp(), mlir::LLVM::DIExpressionRewriter::simplify(), and mlir::transform::TransformRewriter::TransformState.

setInterTileBoundsParametric()

void setInterTileBoundsParametric ( OpBuilder & b, AffineForOp origLoop, AffineForOp newLoop, Value tileSize ) static

setIntraTileBoundsParametric()

void setIntraTileBoundsParametric ( OpBuilder & b, AffineForOp origLoop, AffineForOp newInterTileLoop, AffineForOp newIntraTileLoop, Value tileSize ) static

stripmineSink() [1/2]

template

AffineForOp stripmineSink ( AffineForOp forOp, SizeType factor, AffineForOp target ) static

stripmineSink() [2/2]

SmallVector< AffineForOp, 8 > stripmineSink ( AffineForOp forOp, uint64_t factor, ArrayRef< AffineForOp > targets ) static

copies

* if copies could not be generated due to yet unimplemented cases* copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock* specify the insertion points where the incoming copies and outgoing copies

Creates a buffer in the faster memory space for the specified memref region (memref has to be non-zero ranked); generates a copy from the lower memory space to this one, and replaces all loads/stores in the block range [‘begin’, ‘end’) of ‘block’ to load/store from that buffer. Returns failure.

Definition at line 1939 of file LoopUtils.cpp.

copyNestRoot

fastBufExprs

fastBufMap

canonicalizeMapAndOperands & fastBufMap = simplifyAffineMap(fastBufMap)

load

Initial value:

=

AffineLoadOp::create(b, loc, fastMemRef, fastBufMap, fastBufMapOperands)

b

Return true if permutation is a valid permutation of the outer_dims_perm (case OuterOrInnerPerm::Oute...

fullyComposeAffineMapAndOperands & fastBufMap

Definition at line 1923 of file LoopUtils.cpp.

Referenced by mlir::affine::canFuseLoops(), convertTransferReadOp(), createVectorLoadForMaskedLoad(), mlir::sparse_tensor::genIndexLoad(), getProducerCandidates(), getValueLoadedFromGlobal(), haveNoReadsAfterWriteExceptSameIndex(), if(), mlir::affine::isVectorizableLoopBody(), isVectorizableLoopBodyWithOpCond(), packFunctionArguments(), and reshapeLoad().

respectively

* if copies could not be generated due to yet unimplemented cases* copyInPlacementStart and copyOutPlacementStart in copyPlacementBlock* specify the insertion points where the incoming copies and outgoing * respectively