MLIR: lib/Dialect/SCF/Utils/Utils.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include
33
34 using namespace mlir;
35
36 #define DEBUG_TYPE "scf-utils"
37 #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
38 #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
39
43 bool replaceIterOperandsUsesInLoop) {
44 if (loopNest.empty())
45 return {};
46
47
48
49 assert(loopNest.size() <= 10 &&
50 "exceeded recursion limit when yielding value from loop nest");
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 if (loopNest.size() == 1) {
83 auto innerMostLoop =
84 castscf::ForOp(*loopNest.back().replaceWithAdditionalYields(
85 rewriter, newIterOperands, replaceIterOperandsUsesInLoop,
86 newYieldValuesFn));
87 return {innerMostLoop};
88 }
89
90
91
97 innerNewBBArgs, newYieldValuesFn,
98 replaceIterOperandsUsesInLoop);
99 return llvm::to_vector(llvm::map_range(
100 newLoopNest.front().getResults().take_back(innerNewBBArgs.size()),
102 };
103 scf::ForOp outerMostLoop =
104 castscf::ForOp(*loopNest.front().replaceWithAdditionalYields(
105 rewriter, newIterOperands, replaceIterOperandsUsesInLoop, fn));
106 newLoopNest.insert(newLoopNest.begin(), outerMostLoop);
107 return newLoopNest;
108 }
109
110
111
112
113
114
115
116
117
118
122 StringRef funcName,
123 func::CallOp *callOp) {
124 assert(!funcName.empty() && "funcName cannot be empty");
126 return failure();
127
128 Block *originalBlock = ®ion.front();
130
131
134
137
138 ValueRange outlinedValues(captures.getArrayRef());
141
142
143
145 outlinedFuncArgTypes.push_back(arg.getType());
146 outlinedFuncArgLocs.push_back(arg.getLoc());
147 }
148 for (Value value : outlinedValues) {
149 outlinedFuncArgTypes.push_back(value.getType());
150 outlinedFuncArgLocs.push_back(value.getLoc());
151 }
152 FunctionType outlinedFuncType =
155 auto outlinedFunc =
156 rewriter.createfunc::FuncOp(loc, funcName, outlinedFuncType);
157 Block *outlinedFuncBody = outlinedFunc.addEntryBlock();
158
159
160
161 int64_t numOriginalBlockArguments = originalBlock->getNumArguments();
162 auto outlinedFuncBlockArgs = outlinedFuncBody->getArguments();
163 {
167 originalBlock, outlinedFuncBody,
168 outlinedFuncBlockArgs.take_front(numOriginalBlockArguments));
169
173 }
174
175
176
178 ®ion, region.begin(),
179 TypeRange{outlinedFuncArgTypes}.take_front(numOriginalBlockArguments),
181 .take_front(numOriginalBlockArguments));
182 {
186 llvm::append_range(callValues, newBlock->getArguments());
187 llvm::append_range(callValues, outlinedValues);
188 auto call = rewriter.createfunc::CallOp(loc, outlinedFunc, callValues);
189 if (callOp)
190 *callOp = call;
191
192
193
194
196 bvm.map(originalTerminator->getOperands(), call->getResults());
197 rewriter.clone(*originalTerminator, bvm);
198 rewriter.eraseOp(originalTerminator);
199 }
200
201
202
203 for (auto it : llvm::zip(outlinedValues, outlinedFuncBlockArgs.take_back(
204 outlinedValues.size()))) {
205 Value orig = std::get<0>(it);
206 Value repl = std::get<1>(it);
207 {
213 }
214 }
216 return outlinedFunc->isProperAncestor(opOperand.getOwner());
217 });
218 }
219
220 return outlinedFunc;
221 }
222
224 func::FuncOp *thenFn, StringRef thenFnName,
225 func::FuncOp *elseFn, StringRef elseFnName) {
227 Location loc = ifOp.getLoc();
228 FailureOrfunc::FuncOp outlinedFuncOpOrFailure;
229 if (thenFn && !ifOp.getThenRegion().empty()) {
231 rewriter, loc, ifOp.getThenRegion(), thenFnName);
232 if (failed(outlinedFuncOpOrFailure))
233 return failure();
234 *thenFn = *outlinedFuncOpOrFailure;
235 }
236 if (elseFn && !ifOp.getElseRegion().empty()) {
238 rewriter, loc, ifOp.getElseRegion(), elseFnName);
239 if (failed(outlinedFuncOpOrFailure))
240 return failure();
241 *elseFn = *outlinedFuncOpOrFailure;
242 }
243 return success();
244 }
245
248 assert(rootOp != nullptr && "Root operation must not be a nullptr.");
249 bool rootEnclosesPloops = false;
251 for (Block &block : region.getBlocks()) {
254 rootEnclosesPloops |= enclosesPloops;
255 if (auto ploop = dyn_castscf::ParallelOp(op)) {
256 rootEnclosesPloops = true;
257
258
259 if (!enclosesPloops)
260 result.push_back(ploop);
261 }
262 }
263 }
264 }
265 return rootEnclosesPloops;
266 }
267
268
269
270
272 int64_t divisor) {
273 assert(divisor > 0 && "expected positive divisor");
275 "expected integer or index-typed value");
276
277 Value divisorMinusOneCst = builder.createarith::ConstantOp(
279 Value divisorCst = builder.createarith::ConstantOp(
281 Value sum = builder.createarith::AddIOp(loc, dividend, divisorMinusOneCst);
282 return builder.createarith::DivUIOp(loc, sum, divisorCst);
283 }
284
285
286
287
288
292 "expected integer or index-typed value");
293 Value cstOne = builder.createarith::ConstantOp(
295 Value divisorMinusOne = builder.createarith::SubIOp(loc, divisor, cstOne);
296 Value sum = builder.createarith::AddIOp(loc, dividend, divisorMinusOne);
297 return builder.createarith::DivUIOp(loc, sum, divisor);
298 }
299
300
301
302
304 std::optional<int64_t> lbCstOp = getConstantIntValue(forOp.getLowerBound());
305 std::optional<int64_t> ubCstOp = getConstantIntValue(forOp.getUpperBound());
307 if (!lbCstOp.has_value() || !ubCstOp.has_value() || !stepCstOp.has_value())
308 return {};
309
310
311 int64_t lbCst = lbCstOp.value();
312 int64_t ubCst = ubCstOp.value();
313 int64_t stepCst = stepCstOp.value();
314 assert(lbCst >= 0 && ubCst >= 0 && stepCst > 0 &&
315 "expected positive loop bounds and step");
316 return llvm::divideCeilSigned(ubCst - lbCst, stepCst);
317 }
318
319
320
321
322
324 Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
328
329
331
332 constexpr auto defaultAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
333 if (!annotateFn)
334 annotateFn = defaultAnnotateFn;
335
336
337
339
340
342
343 for (unsigned i = 1; i < unrollFactor; i++) {
345
346
347 operandMap.map(iterArgs, lastYielded);
348
349
350
352 Value ivUnroll = ivRemapFn(i, forOpIV, builder);
353 operandMap.map(forOpIV, ivUnroll);
354 }
355
356
357 for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
358 Operation *clonedOp = builder.clone(*it, operandMap);
359 annotateFn(i, clonedOp, builder);
360 }
361
362
363 for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
364 lastYielded[i] = operandMap.lookupOrDefault(yieldedValues[i]);
365 }
366
367
368
369 for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
370 annotateFn(0, &*it, builder);
371
372
374 }
375
376
377
379 scf::ForOp forOp, uint64_t unrollFactor,
381 assert(unrollFactor > 0 && "expected positive unroll factor");
382
383
384 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
386
387
388
390 IRRewriter rewriter(forOp.getContext());
391 auto loc = forOp.getLoc();
392 Value step = forOp.getStep();
393 Value upperBoundUnrolled;
394 Value stepUnrolled;
395 bool generateEpilogueLoop = true;
396
398 if (constTripCount) {
399
403 if (unrollFactor == 1) {
404 if (*constTripCount == 1 &&
405 failed(forOp.promoteIfSingleIteration(rewriter)))
406 return failure();
408 }
409
410 int64_t tripCountEvenMultiple =
411 *constTripCount - (*constTripCount % unrollFactor);
412 int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
413 int64_t stepUnrolledCst = stepCst * unrollFactor;
414
415
416 generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
417 if (generateEpilogueLoop)
418 upperBoundUnrolled = boundsBuilder.createarith::ConstantOp(
419 loc, boundsBuilder.getIntegerAttr(forOp.getUpperBound().getType(),
420 upperBoundUnrolledCst));
421 else
422 upperBoundUnrolled = forOp.getUpperBound();
423
424
425 stepUnrolled = stepCst == stepUnrolledCst
426 ? step
427 : boundsBuilder.createarith::ConstantOp(
429 step.getType(), stepUnrolledCst));
430 } else {
431
432
433
434 auto lowerBound = forOp.getLowerBound();
435 auto upperBound = forOp.getUpperBound();
437 boundsBuilder.createarith::SubIOp(loc, upperBound, lowerBound);
439 Value unrollFactorCst = boundsBuilder.createarith::ConstantOp(
440 loc, boundsBuilder.getIntegerAttr(tripCount.getType(), unrollFactor));
441 Value tripCountRem =
442 boundsBuilder.createarith::RemSIOp(loc, tripCount, unrollFactorCst);
443
444 Value tripCountEvenMultiple =
445 boundsBuilder.createarith::SubIOp(loc, tripCount, tripCountRem);
446
447 upperBoundUnrolled = boundsBuilder.createarith::AddIOp(
448 loc, lowerBound,
449 boundsBuilder.createarith::MulIOp(loc, tripCountEvenMultiple, step));
450
451 stepUnrolled =
452 boundsBuilder.createarith::MulIOp(loc, step, unrollFactorCst);
453 }
454
456
457
458 if (generateEpilogueLoop) {
459 OpBuilder epilogueBuilder(forOp->getContext());
461 auto epilogueForOp = castscf::ForOp(epilogueBuilder.clone(*forOp));
462 epilogueForOp.setLowerBound(upperBoundUnrolled);
463
464
465 auto results = forOp.getResults();
466 auto epilogueResults = epilogueForOp.getResults();
467
468 for (auto e : llvm::zip(results, epilogueResults)) {
469 std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
470 }
471 epilogueForOp->setOperands(epilogueForOp.getNumControlOperands(),
472 epilogueForOp.getInitArgs().size(), results);
473 if (epilogueForOp.promoteIfSingleIteration(rewriter).failed())
475 }
476
477
478 forOp.setUpperBound(upperBoundUnrolled);
479 forOp.setStep(stepUnrolled);
480
481 auto iterArgs = ValueRange(forOp.getRegionIterArgs());
482 auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
483
485 forOp.getBody(), forOp.getInductionVar(), unrollFactor,
487
488 auto stride = b.createarith::MulIOp(
489 loc, step,
490 b.createarith::ConstantOp(loc,
491 b.getIntegerAttr(iv.getType(), i)));
492 return b.createarith::AddIOp(loc, iv, stride);
493 },
494 annotateFn, iterArgs, yieldedValues);
495
496 if (forOp.promoteIfSingleIteration(rewriter).failed())
498 return resultLoops;
499 }
500
501
503 IRRewriter rewriter(forOp.getContext());
504 std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
505 if (!mayBeConstantTripCount.has_value())
506 return failure();
507 uint64_t tripCount = *mayBeConstantTripCount;
508 if (tripCount == 0)
509 return success();
510 if (tripCount == 1)
511 return forOp.promoteIfSingleIteration(rewriter);
513 }
514
515
516
518 auto walkResult = forOp.walk([&](scf::ForOp innerForOp) {
519 if (!forOp.isDefinedOutsideOfLoop(innerForOp.getLowerBound()) ||
520 !forOp.isDefinedOutsideOfLoop(innerForOp.getUpperBound()) ||
521 !forOp.isDefinedOutsideOfLoop(innerForOp.getStep()))
523
525 });
526 return !walkResult.wasInterrupted();
527 }
528
529
531 uint64_t unrollJamFactor) {
532 assert(unrollJamFactor > 0 && "unroll jam factor should be positive");
533
534 if (unrollJamFactor == 1)
535 return success();
536
537
538
540 LDBG("failed to unroll and jam: inner bounds are not invariant");
541 return failure();
542 }
543
544
545 if (forOp->getNumResults() > 0) {
546 LDBG("failed to unroll and jam: unsupported loop with results");
547 return failure();
548 }
549
550
551
553 if (!tripCount.has_value()) {
554
555 LDBG("failed to unroll and jam: trip count could not be determined");
556 return failure();
557 }
558 if (unrollJamFactor > *tripCount) {
559 LDBG("unroll and jam factor is greater than trip count, set factor to trip "
560 "count");
561 unrollJamFactor = *tripCount;
562 } else if (*tripCount % unrollJamFactor != 0) {
563 LDBG("failed to unroll and jam: unsupported trip count that is not a "
564 "multiple of unroll jam factor");
565 return failure();
566 }
567
568
569 if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
570 return success();
571
572
574 jbg.walk(forOp);
576
577
579 forOp.walk([&](scf::ForOp innerForOp) { innerLoops.push_back(innerForOp); });
580
581
582
584
585
586
587
589 IRRewriter rewriter(forOp.getContext());
590 for (scf::ForOp oldForOp : innerLoops) {
592 ValueRange oldIterOperands = oldForOp.getInits();
593 ValueRange oldIterArgs = oldForOp.getRegionIterArgs();
595 castscf::YieldOp(oldForOp.getBody()->getTerminator()).getOperands();
596
597
598 for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
599 dupIterOperands.append(oldIterOperands.begin(), oldIterOperands.end());
600 dupYieldOperands.append(oldYieldOperands.begin(), oldYieldOperands.end());
601 }
602
603
604 bool forOpReplaced = oldForOp == forOp;
605 scf::ForOp newForOp =
606 castscf::ForOp(*oldForOp.replaceWithAdditionalYields(
607 rewriter, dupIterOperands, false,
609 return dupYieldOperands;
610 }));
611 newInnerLoops.push_back(newForOp);
612
613 if (forOpReplaced)
614 forOp = newForOp;
615
616 ValueRange newIterArgs = newForOp.getRegionIterArgs();
617 unsigned oldNumIterArgs = oldIterArgs.size();
618 ValueRange newResults = newForOp.getResults();
619 unsigned oldNumResults = newResults.size() / unrollJamFactor;
620 assert(oldNumIterArgs == oldNumResults &&
621 "oldNumIterArgs must be the same as oldNumResults");
622 for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
623 for (unsigned j = 0; j < oldNumIterArgs; ++j) {
624
625
626
627 operandMaps[i - 1].map(newIterArgs[j],
628 newIterArgs[i * oldNumIterArgs + j]);
629 operandMaps[i - 1].map(newResults[j],
630 newResults[i * oldNumResults + j]);
631 }
632 }
633 }
634
635
637 int64_t step = forOp.getConstantStep()->getSExtValue();
638 auto newStep = rewriter.createOrFoldarith::MulIOp(
639 forOp.getLoc(), forOp.getStep(),
641 forOp.getLoc(), rewriter.getIndexAttr(unrollJamFactor)));
642 forOp.setStep(newStep);
643 auto forOpIV = forOp.getInductionVar();
644
645
646 for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
647 for (auto &subBlock : subBlocks) {
648
649
650 OpBuilder builder(subBlock.first->getBlock(), std::next(subBlock.second));
651
652
653
655
656 auto ivTag = builder.createOrFoldarith::ConstantOp(
657 forOp.getLoc(), builder.getIndexAttr(step * i));
658 auto ivUnroll =
659 builder.createOrFoldarith::AddIOp(forOp.getLoc(), forOpIV, ivTag);
660 operandMaps[i - 1].map(forOpIV, ivUnroll);
661 }
662
663 for (auto it = subBlock.first; it != std::next(subBlock.second); ++it)
664 builder.clone(*it, operandMaps[i - 1]);
665 }
666
667 for (auto newForOp : newInnerLoops) {
668 unsigned oldNumIterOperands =
669 newForOp.getNumRegionIterArgs() / unrollJamFactor;
670 unsigned numControlOperands = newForOp.getNumControlOperands();
671 auto yieldOp = castscf::YieldOp(newForOp.getBody()->getTerminator());
672 unsigned oldNumYieldOperands = yieldOp.getNumOperands() / unrollJamFactor;
673 assert(oldNumIterOperands == oldNumYieldOperands &&
674 "oldNumIterOperands must be the same as oldNumYieldOperands");
675 for (unsigned j = 0; j < oldNumIterOperands; ++j) {
676
677
678
679 newForOp.setOperand(numControlOperands + i * oldNumIterOperands + j,
680 operandMaps[i - 1].lookupOrDefault(
681 newForOp.getOperand(numControlOperands + j)));
682 yieldOp.setOperand(
683 i * oldNumYieldOperands + j,
684 operandMaps[i - 1].lookupOrDefault(yieldOp.getOperand(j)));
685 }
686 }
687 }
688
689
690 (void)forOp.promoteIfSingleIteration(rewriter);
691 return success();
692 }
693
697 Range normalizedLoopBounds;
702 AffineExpr e = (s1 - s0).ceilDiv(s2);
703 normalizedLoopBounds.size =
705 return normalizedLoopBounds;
706 }
707
711 if (getType(lb).isIndex()) {
713 }
714
715
716
717 bool isZeroBased = false;
719 isZeroBased = lbCst.value() == 0;
720
721 bool isStepOne = false;
723 isStepOne = stepCst.value() == 1;
724
726 assert(rangeType == getType(ub) && rangeType == getType(step) &&
727 "expected matching types");
728
729
730
731
732 if (isZeroBased && isStepOne)
733 return {lb, ub, step};
734
736 if (!isZeroBased) {
740 }
742 if (!isStepOne) {
743 newUpperBound = rewriter.createOrFoldarith::CeilDivSIOp(
746 }
747
750
751 return {newLowerBound, newUpperBound, newStep};
752 }
753
756 Value normalizedIv,
765 Value denormalizedIvVal =
768
769
770
772 if (Operation *preservedUse = denormalizedIvVal.getDefiningOp()) {
773 preservedUses.insert(preservedUse);
774 }
775 }
776 rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIvVal, preservedUses);
777 }
778
782 if (getType(origLb).isIndex()) {
784 origLb, origStep);
785 }
786 Value denormalizedIv;
790
791 Value scaled = normalizedIv;
792 if (!isStepOne) {
793 Value origStepValue =
795 scaled = rewriter.createarith::MulIOp(loc, normalizedIv, origStepValue);
797 }
798 denormalizedIv = scaled;
799 if (!isZeroBased) {
801 denormalizedIv = rewriter.createarith::AddIOp(loc, scaled, origLbValue);
803 }
804
806 }
807
810 assert(!values.empty() && "unexecpted empty array");
815 for (auto v : values) {
818 }
819 return products;
820 }
821
822
825 assert(!values.empty() && "unexpected empty list");
830 }
831 std::optional productOf;
832 for (auto v : values) {
834 if (vOne && vOne.value() == 1)
835 continue;
836 if (productOf)
837 productOf =
838 rewriter.createarith::MulIOp(loc, productOf.value(), v).getResult();
839 else
840 productOf = v;
841 }
842 if (!productOf) {
843 productOf = rewriter
844 .createarith::ConstantOp(
846 .getResult();
847 }
848 return productOf.value();
849 }
850
851
852
853
854
855
856
857
858
862
865 rewriter.createaffine::AffineDelinearizeIndexOp(loc, linearizedIv,
866 ubs);
867 auto resultVals = llvm::map_to_vector(
870 }
871
874
875 llvm::BitVector isUbOne(ubs.size());
878 if (ubCst && ubCst.value() == 1)
879 isUbOne.set(index);
880 }
881
882
883 unsigned numLeadingOneUbs = 0;
885 if (!isUbOne.test(index)) {
886 break;
887 }
888 delinearizedIvs[index] = rewriter.createarith::ConstantOp(
889 loc, rewriter.getZeroAttr(ub.getType()));
890 numLeadingOneUbs++;
891 }
892
893 Value previous = linearizedIv;
894 for (unsigned i = numLeadingOneUbs, e = ubs.size(); i < e; ++i) {
895 unsigned idx = ubs.size() - (i - numLeadingOneUbs) - 1;
896 if (i != numLeadingOneUbs && !isUbOne.test(idx + 1)) {
897 previous = rewriter.createarith::DivSIOp(loc, previous, ubs[idx + 1]);
898 preservedUsers.insert(previous.getDefiningOp());
899 }
900 Value iv = previous;
901 if (i != e - 1) {
902 if (!isUbOne.test(idx)) {
903 iv = rewriter.createarith::RemSIOp(loc, previous, ubs[idx]);
905 } else {
906 iv = rewriter.createarith::ConstantOp(
908 }
909 }
910 delinearizedIvs[idx] = iv;
911 }
912 return {delinearizedIvs, preservedUsers};
913 }
914
917 if (loops.size() < 2)
918 return failure();
919
920 scf::ForOp innermost = loops.back();
921 scf::ForOp outermost = loops.front();
922
923
924
925 for (auto loop : loops) {
928 Value lb = loop.getLowerBound();
929 Value ub = loop.getUpperBound();
930 Value step = loop.getStep();
931 auto newLoopRange =
933
936 newLoopRange.offset));
938 newLoopRange.size));
940 newLoopRange.stride));
941 });
944 loop.getInductionVar(), lb, step);
945 }
946
947
948
951 Location loc = outermost.getLoc();
953 loops, [](auto loop) { return loop.getUpperBound(); });
955 outermost.setUpperBound(upperBound);
956
959 rewriter, loc, outermost.getInductionVar(), upperBounds);
960 rewriter.replaceAllUsesExcept(outermost.getInductionVar(), delinearizeIvs[0],
961 preservedUsers);
962
963 for (int i = loops.size() - 1; i > 0; --i) {
964 auto outerLoop = loops[i - 1];
965 auto innerLoop = loops[i];
966
967 Operation *innerTerminator = innerLoop.getBody()->getTerminator();
968 auto yieldedVals = llvm::to_vector(innerTerminator->getOperands());
969 assert(llvm::equal(outerLoop.getRegionIterArgs(), innerLoop.getInitArgs()));
970 for (Value &yieldedVal : yieldedVals) {
971
972
973 auto iter = llvm::find(innerLoop.getRegionIterArgs(), yieldedVal);
974 if (iter != innerLoop.getRegionIterArgs().end()) {
975 unsigned iterArgIndex = iter - innerLoop.getRegionIterArgs().begin();
976
977 assert(iterArgIndex < innerLoop.getInitArgs().size());
978 yieldedVal = innerLoop.getInitArgs()[iterArgIndex];
979 }
980 }
981 rewriter.eraseOp(innerTerminator);
982
984 innerBlockArgs.push_back(delinearizeIvs[i]);
985 llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs());
986 rewriter.inlineBlockBefore(innerLoop.getBody(), outerLoop.getBody(),
988 rewriter.replaceOp(innerLoop, yieldedVals);
989 }
990 return success();
991 }
992
994 if (loops.empty()) {
995 return failure();
996 }
997 IRRewriter rewriter(loops.front().getContext());
999 }
1000
1002 LogicalResult result(failure());
1005
1006
1007
1008
1009
1010
1012 for (unsigned i = 0, e = loops.size(); i < e; ++i) {
1013 operandsDefinedAbove[i] = i;
1014 for (unsigned j = 0; j < i; ++j) {
1016 loops[i].getUpperBound(),
1017 loops[i].getStep()};
1019 operandsDefinedAbove[i] = j;
1020 break;
1021 }
1022 }
1023 }
1024
1025
1026
1027
1028
1030 iterArgChainStart[0] = 0;
1031 for (unsigned i = 1, e = loops.size(); i < e; ++i) {
1032
1033 iterArgChainStart[i] = i;
1034 auto outerloop = loops[i - 1];
1035 auto innerLoop = loops[i];
1036 if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) {
1037 continue;
1038 }
1039 if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) {
1040 continue;
1041 }
1042 auto outerloopTerminator = outerloop.getBody()->getTerminator();
1043 if (!llvm::equal(outerloopTerminator->getOperands(),
1044 innerLoop.getResults())) {
1045 continue;
1046 }
1047 iterArgChainStart[i] = iterArgChainStart[i - 1];
1048 }
1049
1050
1051
1052
1053 for (unsigned end = loops.size(); end > 0; --end) {
1054 unsigned start = 0;
1055 for (; start < end - 1; ++start) {
1056 auto maxPos =
1057 *std::max_element(std::next(operandsDefinedAbove.begin(), start),
1058 std::next(operandsDefinedAbove.begin(), end));
1059 if (maxPos > start)
1060 continue;
1061 if (iterArgChainStart[end - 1] > start)
1062 continue;
1065 result = success();
1066 break;
1067 }
1068
1069
1070 if (start != end - 1)
1071 end = start + 1;
1072 }
1073 return result;
1074 }
1075
1077 RewriterBase &rewriter, scf::ParallelOp loops,
1078 ArrayRef<std::vector> combinedDimensions) {
1081 Location loc = loops.getLoc();
1082
1083
1084 auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
1085 for (auto &dims : sortedDimensions)
1086 llvm::sort(dims);
1087
1088
1090 for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
1093 Value lb = loops.getLowerBound()[i];
1094 Value ub = loops.getUpperBound()[i];
1095 Value step = loops.getStep()[i];
1098 rewriter, loops.getLoc(), newLoopRange.size));
1099
1102 step);
1103 }
1104
1105
1109 for (auto &sortedDimension : sortedDimensions) {
1111 for (auto idx : sortedDimension) {
1112 newUpperBound = rewriter.createarith::MulIOp(
1113 loc, newUpperBound, normalizedUpperBounds[idx]);
1114 }
1115 lowerBounds.push_back(cst0);
1116 steps.push_back(cst1);
1117 upperBounds.push_back(newUpperBound);
1118 }
1119
1120
1121
1122
1123
1124
1125
1126 auto newPloop = rewriter.createscf::ParallelOp(
1127 loc, lowerBounds, upperBounds, steps,
1129 for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
1130 Value previous = ploopIVs[i];
1131 unsigned numberCombinedDimensions = combinedDimensions[i].size();
1132
1133 for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
1134 unsigned idx = combinedDimensions[i][j];
1135
1136
1137 Value iv = insideBuilder.createarith::RemSIOp(
1138 loc, previous, normalizedUpperBounds[idx]);
1140 loops.getRegion());
1141
1142
1143
1144 previous = insideBuilder.createarith::DivSIOp(
1145 loc, previous, normalizedUpperBounds[idx]);
1146 }
1147
1148
1149 unsigned idx = combinedDimensions[i][0];
1151 previous, loops.getRegion());
1152 }
1153 });
1154
1155
1156 loops.getBody()->back().erase();
1157 newPloop.getBody()->getOperations().splice(
1159 loops.getBody()->getOperations());
1160 loops.erase();
1161 }
1162
1163
1164
1165
1166
1167
1168 static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
1172 return op != inner.getOperation();
1173 };
1175 LogicalResult status = success();
1177 for (auto &op : outer.getBody()->without_terminator()) {
1178
1179 if (&op == inner.getOperation())
1180 break;
1181
1182 if (forwardSlice.count(&op) > 0) {
1183 status = failure();
1184 continue;
1185 }
1186
1187 if (isascf::ForOp(op))
1188 continue;
1189
1190 if (op.getNumRegions() > 0) {
1191 status = failure();
1192 continue;
1193 }
1194
1195
1197 status = failure();
1198 continue;
1199 }
1200 toHoist.push_back(&op);
1201 }
1202 auto *outerForOp = outer.getOperation();
1203 for (auto *op : toHoist)
1204 op->moveBefore(outerForOp);
1205 return status;
1206 }
1207
1208
1209
1210
1211
1213 LogicalResult status = success();
1214 const Loops &interTile = tileLoops.first;
1215 const Loops &intraTile = tileLoops.second;
1216 auto size = interTile.size();
1217 assert(size == intraTile.size());
1218 if (size <= 1)
1219 return success();
1220 for (unsigned s = 1; s < size; ++s)
1221 status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
1222 : failure();
1223 for (unsigned s = 1; s < size; ++s)
1224 status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
1225 : failure();
1226 return status;
1227 }
1228
1229
1230
1231
1232
1233 template
1237 for (unsigned i = 0; i < maxLoops; ++i) {
1238 forOps.push_back(rootForOp);
1239 Block &body = rootForOp.getRegion().front();
1240 if (body.begin() != std::prev(body.end(), 2))
1241 return;
1242
1243 rootForOp = dyn_cast(&body.front());
1244 if (!rootForOp)
1245 return;
1246 }
1247 }
1248
1251 auto originalStep = forOp.getStep();
1252 auto iv = forOp.getInductionVar();
1253
1255 forOp.setStep(b.createarith::MulIOp(forOp.getLoc(), originalStep, factor));
1256
1257 Loops innerLoops;
1258 for (auto t : targets) {
1259
1260 auto begin = t.getBody()->begin();
1261 auto nOps = t.getBody()->getOperations().size();
1262
1263
1265 Value stepped = b.createarith::AddIOp(t.getLoc(), iv, forOp.getStep());
1267 b.createarith::MinSIOp(t.getLoc(), forOp.getUpperBound(), stepped);
1268
1269
1270 auto newForOp = b.createscf::ForOp(t.getLoc(), iv, ub, originalStep);
1271 newForOp.getBody()->getOperations().splice(
1272 newForOp.getBody()->getOperations().begin(),
1273 t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
1275 newForOp.getRegion());
1276
1277 innerLoops.push_back(newForOp);
1278 }
1279
1280 return innerLoops;
1281 }
1282
1283
1284
1285 template
1286 static scf::ForOp stripmineSink(scf::ForOp forOp, SizeType factor,
1287 scf::ForOp target) {
1288
1289
1290
1291
1293 assert(res.size() == 1 && "Expected 1 inner forOp");
1294 return res[0];
1295 }
1296
1302 for (auto it : llvm::zip(forOps, sizes)) {
1303 auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
1304 res.push_back(step);
1305 currentTargets = step;
1306 }
1307 return res;
1308 }
1309
1311 scf::ForOp target) {
1314 res.push_back(llvm::getSingleElement(loops));
1315 return res;
1316 }
1317
1319
1320
1322 forOps.reserve(sizes.size());
1324 if (forOps.size() < sizes.size())
1325 sizes = sizes.take_front(forOps.size());
1326
1328 }
1329
1331 scf::ForOp root) {
1333 }
1334
1337
1338
1340 forOps.reserve(sizes.size());
1342 if (forOps.size() < sizes.size())
1343 sizes = sizes.take_front(forOps.size());
1344
1345
1346
1347
1348
1350 tileSizes.reserve(sizes.size());
1351 for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
1352 assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
1353
1354 auto forOp = forOps[i];
1356 auto loc = forOp.getLoc();
1357 Value diff = builder.createarith::SubIOp(loc, forOp.getUpperBound(),
1358 forOp.getLowerBound());
1360 Value iterationsPerBlock =
1362 tileSizes.push_back(iterationsPerBlock);
1363 }
1364
1365
1366 auto intraTile = tile(forOps, tileSizes, forOps.back());
1367 TileLoops tileLoops = std::make_pair(forOps, intraTile);
1368
1369
1370
1371
1373
1374 return tileLoops;
1375 }
1376
1378 scf::ForallOp source,
1380 unsigned numTargetOuts = target.getNumResults();
1381 unsigned numSourceOuts = source.getNumResults();
1382
1383
1385 llvm::append_range(fusedOuts, target.getOutputs());
1386 llvm::append_range(fusedOuts, source.getOutputs());
1387
1388
1390 scf::ForallOp fusedLoop = rewriter.createscf::ForallOp(
1391 source.getLoc(), source.getMixedLowerBound(), source.getMixedUpperBound(),
1392 source.getMixedStep(), fusedOuts, source.getMapping());
1393
1394
1396 mapping.map(target.getInductionVars(), fusedLoop.getInductionVars());
1397 mapping.map(source.getInductionVars(), fusedLoop.getInductionVars());
1398
1399
1400 mapping.map(target.getRegionIterArgs(),
1401 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1402 mapping.map(source.getRegionIterArgs(),
1403 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1404
1405
1407 for (Operation &op : target.getBody()->without_terminator())
1408 rewriter.clone(op, mapping);
1409 for (Operation &op : source.getBody()->without_terminator())
1410 rewriter.clone(op, mapping);
1411
1412
1413 scf::InParallelOp targetTerm = target.getTerminator();
1414 scf::InParallelOp sourceTerm = source.getTerminator();
1415 scf::InParallelOp fusedTerm = fusedLoop.getTerminator();
1417 for (Operation &op : targetTerm.getYieldingOps())
1418 rewriter.clone(op, mapping);
1419 for (Operation &op : sourceTerm.getYieldingOps())
1420 rewriter.clone(op, mapping);
1421
1422
1423 rewriter.replaceOp(target, fusedLoop.getResults().take_front(numTargetOuts));
1424 rewriter.replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1425
1426 return fusedLoop;
1427 }
1428
1430 scf::ForOp source,
1432 unsigned numTargetOuts = target.getNumResults();
1433 unsigned numSourceOuts = source.getNumResults();
1434
1435
1437 llvm::append_range(fusedInitArgs, target.getInitArgs());
1438 llvm::append_range(fusedInitArgs, source.getInitArgs());
1439
1440
1441
1443 scf::ForOp fusedLoop = rewriter.createscf::ForOp(
1444 source.getLoc(), source.getLowerBound(), source.getUpperBound(),
1445 source.getStep(), fusedInitArgs);
1446
1447
1449 mapping.map(target.getInductionVar(), fusedLoop.getInductionVar());
1450 mapping.map(target.getRegionIterArgs(),
1451 fusedLoop.getRegionIterArgs().take_front(numTargetOuts));
1452 mapping.map(source.getInductionVar(), fusedLoop.getInductionVar());
1453 mapping.map(source.getRegionIterArgs(),
1454 fusedLoop.getRegionIterArgs().take_back(numSourceOuts));
1455
1456
1458 for (Operation &op : target.getBody()->without_terminator())
1459 rewriter.clone(op, mapping);
1460 for (Operation &op : source.getBody()->without_terminator())
1461 rewriter.clone(op, mapping);
1462
1463
1465 for (Value operand : target.getBody()->getTerminator()->getOperands())
1466 yieldResults.push_back(mapping.lookupOrDefault(operand));
1467 for (Value operand : source.getBody()->getTerminator()->getOperands())
1468 yieldResults.push_back(mapping.lookupOrDefault(operand));
1469 if (!yieldResults.empty())
1470 rewriter.createscf::YieldOp(source.getLoc(), yieldResults);
1471
1472
1473 rewriter.replaceOp(target, fusedLoop.getResults().take_front(numTargetOuts));
1474 rewriter.replaceOp(source, fusedLoop.getResults().take_back(numSourceOuts));
1475
1476 return fusedLoop;
1477 }
1478
1480 scf::ForallOp forallOp) {
1484
1485 if (forallOp.isNormalized())
1486 return forallOp;
1487
1489 auto loc = forallOp.getLoc();
1492 for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1493 Range normalizedLoopParams =
1495 newUbs.push_back(normalizedLoopParams.size);
1496 }
1498
1499
1500
1501 auto normalizedForallOp = rewriter.createscf::ForallOp(
1502 loc, newUbs, forallOp.getOutputs(), forallOp.getMapping(),
1504
1506 normalizedForallOp.getBodyRegion(),
1507 normalizedForallOp.getBodyRegion().begin());
1508
1509 rewriter.eraseBlock(&normalizedForallOp.getBodyRegion().back());
1510
1512
1513 for (auto [idx, iv] :
1514 llvm::enumerate(normalizedForallOp.getInductionVars())) {
1518 }
1519
1520 rewriter.replaceOp(forallOp, normalizedForallOp);
1521 return normalizedForallOp;
1522 }
static std::optional< int64_t > getConstantTripCount(scf::ForOp forOp)
Returns the trip count of forOp if its' low bound, high bound and step are constants,...
static OpFoldResult getProductOfIndexes(RewriterBase &rewriter, Location loc, ArrayRef< OpFoldResult > values)
static LogicalResult tryIsolateBands(const TileLoops &tileLoops)
static void getPerfectlyNestedLoopsImpl(SmallVectorImpl< T > &forOps, T rootForOp, unsigned maxLoops=std::numeric_limits< unsigned >::max())
Collect perfectly nested loops starting from rootForOps.
static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner)
static void generateUnrolledLoop(Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor, function_ref< Value(unsigned, Value, OpBuilder)> ivRemapFn, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn, ValueRange iterArgs, ValueRange yieldedValues)
Generates unrolled copies of scf::ForOp 'loopBodyBlock', with associated 'forOpIV' by 'unrollFactor',...
static Loops stripmineSink(scf::ForOp forOp, Value factor, ArrayRef< scf::ForOp > targets)
static std::pair< SmallVector< Value >, SmallPtrSet< Operation *, 2 > > delinearizeInductionVariable(RewriterBase &rewriter, Location loc, Value linearizedIv, ArrayRef< Value > ubs)
For each original loop, the value of the induction variable can be obtained by dividing the induction...
static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend, int64_t divisor)
static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, ArrayRef< Value > values)
Helper function to multiply a sequence of values.
static void denormalizeInductionVariableForIndexType(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Range emitNormalizedLoopBoundsForIndexType(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
static bool areInnerBoundsInvariant(scf::ForOp forOp)
Check if bounds of all inner loops are defined outside of forOp and return false if not.
static int64_t product(ArrayRef< int64_t > vals)
static llvm::ManagedStatic< PassManagerOptions > options
static Value max(ImplicitLocOpBuilder &builder, Value value, Value bound)
Base type for affine expression.
This class represents an argument of a Block.
Block represents an ordered list of Operations.
OpListType::iterator iterator
unsigned getNumArguments()
Operation * getTerminator()
Get the terminator operation of this block.
BlockArgListType getArguments()
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getIntegerAttr(Type type, int64_t value)
TypedAttr getZeroAttr(Type type)
MLIRContext * getContext() const
TypedAttr getOneAttr(Type type)
This is a utility class for mapping one set of IR entities to another.
auto lookupOrDefault(T from) const
Lookup a mapped value within the map.
void map(Value from, Value to)
Inserts a new mapping for 'from' to 'to'.
This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
Operation * clone(Operation &op, IRMapping &mapper)
Creates a deep copy of the specified operation, remapping any operands that use values outside of the...
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
static OpBuilder atBlockTerminator(Block *block, Listener *listener=nullptr)
Create a builder and set the insertion point to before the block terminator.
void setInsertionPointToEnd(Block *block)
Sets the insertion point to the end of the specified block.
Block * createBlock(Region *parent, Region::iterator insertPt={}, TypeRange argTypes=std::nullopt, ArrayRef< Location > locs=std::nullopt)
Add new block with 'argTypes' arguments and set the insertion point to the end of it.
void createOrFold(SmallVectorImpl< Value > &results, Location location, Args &&...args)
Create an operation of specific op type at the current insertion point, and immediately try to fold i...
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
void setInsertionPointAfter(Operation *op)
Sets the insertion point to the node after the specified operation, which will cause subsequent inser...
This class represents a single result from folding an operation.
This class represents an operand of an operation.
This is a value defined by a result of an operation.
Operation is the basic unit of execution within MLIR.
Operation * clone(IRMapping &mapper, CloneOptions options=CloneOptions::all())
Create a deep copy of this operation, remapping any operands that use values outside of the operation...
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
MutableArrayRef< Region > getRegions()
Returns the regions held by this operation.
operand_type_range getOperandTypes()
result_type_range getResultTypes()
operand_range getOperands()
Returns an iterator on the underlying Value's.
void setOperands(ValueRange operands)
Replace the current operands of this operation with the ones provided in 'operands'.
result_range getResults()
void erase()
Remove this operation from its parent block and delete it.
This class contains a list of basic blocks and a link to the parent operation it is attached to.
BlockArgListType getArguments()
ParentT getParentOfType()
Find the first parent operation of the given type, or nullptr if there is no ancestor operation.
bool hasOneBlock()
Return true if this region has exactly one block.
This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...
virtual void eraseBlock(Block *block)
This method erases all operations in a block.
virtual void replaceOp(Operation *op, ValueRange newValues)
Replace the results of the given (original) operation with the specified list of values (replacements...
void mergeBlocks(Block *source, Block *dest, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into the end of block 'dest'.
virtual void eraseOp(Operation *op)
This method erases an operation that is known to have no uses.
void replaceAllUsesExcept(Value from, Value to, Operation *exceptedUser)
Find uses of from and replace them with to except if the user is exceptedUser.
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
virtual void inlineBlockBefore(Block *source, Block *dest, Block::iterator before, ValueRange argValues=std::nullopt)
Inline the operations of block 'source' into block 'dest' before the given position.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrIndex() const
Return true if this is an integer (of any signedness) or an index type.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
bool use_empty() const
Returns true if this value has no uses.
void replaceUsesWithIf(Value newValue, function_ref< bool(OpOperand &)> shouldReplace)
Replace all uses of 'this' value with 'newValue' if the given callback returns true.
Type getType() const
Return the type of this value.
Operation * getDefiningOp() const
If this value is the result of an operation, return the operation that defines it.
static WalkResult advance()
static WalkResult interrupt()
Specialization of arith.constant op that returns an integer of index type.
Operation * getOwner() const
Return the owner of this operand.
OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)
Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...
SmallVector< SmallVector< AffineForOp, 8 >, 8 > tile(ArrayRef< AffineForOp > forOps, ArrayRef< uint64_t > sizes, ArrayRef< AffineForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
Include the generated interface declarations.
void getPerfectlyNestedLoops(SmallVectorImpl< scf::ForOp > &nestedLoops, scf::ForOp root)
Get perfectly nested sequence of loops starting at root of loop nest (the first op being another Affi...
LogicalResult outlineIfOp(RewriterBase &b, scf::IfOp ifOp, func::FuncOp *thenFn, StringRef thenFnName, func::FuncOp *elseFn, StringRef elseFnName)
Outline the then and/or else regions of ifOp as follows:
void replaceAllUsesInRegionWith(Value orig, Value replacement, Region ®ion)
Replace all uses of orig within the given region with replacement.
SmallVector< scf::ForOp > replaceLoopNestWithNewYields(RewriterBase &rewriter, MutableArrayRef< scf::ForOp > loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, bool replaceIterOperandsUsesInLoop=true)
Update a perfectly nested loop nest to yield new values from the innermost loop and propagating it up...
std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)
If ofr is a constant integer or an IntegerAttr, return the integer.
Type getType(OpFoldResult ofr)
Returns the int type of the integer in ofr.
LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op)
Walk an affine.for to find a band to coalesce.
void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to DimExpr at positions: [0 .
std::pair< Loops, Loops > TileLoops
Value getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
LogicalResult loopUnrollFull(scf::ForOp forOp)
Unrolls this loop completely.
void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef< std::vector< unsigned >> combinedDimensions)
Take the ParallelLoop and for each set of dimension indices, combine them into a single dimension.
bool isMemoryEffectFree(Operation *op)
Returns true if the given operation is free of memory effects.
std::function< SmallVector< Value >(OpBuilder &b, Location loc, ArrayRef< BlockArgument > newBbArgs)> NewYieldValuesFn
A function that returns the additional yielded values during replaceWithAdditionalYields.
Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef< Value > sizes)
Tile a nest of scf::ForOp loops rooted at rootForOp with the given (parametric) sizes.
FailureOr< UnrolledLoopInfo > loopUnrollByFactor(scf::ForOp forOp, uint64_t unrollFactor, function_ref< void(unsigned, Operation *, OpBuilder)> annotateFn=nullptr)
Unrolls this for operation by the specified unroll factor.
LogicalResult loopUnrollJamByFactor(scf::ForOp forOp, uint64_t unrollFactor)
Unrolls and jams this scf.for operation by the specified unroll factor.
bool getInnermostParallelLoops(Operation *rootOp, SmallVectorImpl< scf::ParallelOp > &result)
Get a list of innermost parallel loops contained in rootOp.
bool isZeroInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 0.
void bindSymbols(MLIRContext *ctx, AffineExprTy &...exprs)
Bind a list of AffineExpr references to SymbolExpr at positions: [0 .
void getUsedValuesDefinedAbove(Region ®ion, Region &limit, SetVector< Value > &values)
Fill values with a list of values defined at the ancestors of the limit region and used within region...
Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)
Converts an OpFoldResult to a Value.
SmallVector< Loops, 8 > tile(ArrayRef< scf::ForOp > forOps, ArrayRef< Value > sizes, ArrayRef< scf::ForOp > targets)
Performs tiling fo imperfectly nested loops (with interchange) by strip-mining the forOps by sizes an...
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
FailureOr< func::FuncOp > outlineSingleBlockRegion(RewriterBase &rewriter, Location loc, Region ®ion, StringRef funcName, func::CallOp *callOp=nullptr)
Outline a region with a single block into a new FuncOp.
OpFoldResult getAsOpFoldResult(Value val)
Given a value, try to extract a constant Attribute.
bool areValuesDefinedAbove(Range values, Region &limit)
Check if all values in the provided range are defined above the limit region.
void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value normalizedIv, OpFoldResult origLb, OpFoldResult origStep)
Get back the original induction variable values after loop normalization.
scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target, scf::ForallOp source, RewriterBase &rewriter)
Given two scf.forall loops, target and source, fuses target into source.
LogicalResult coalesceLoops(MutableArrayRef< scf::ForOp > loops)
Replace a perfect nest of "for" loops with a single linearized loop.
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source, RewriterBase &rewriter)
Given two scf.for loops, target and source, fuses target into source.
TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef< int64_t > sizes)
Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, OpFoldResult lb, OpFoldResult ub, OpFoldResult step)
Materialize bounds and step of a zero-based and unit-step loop derived by normalizing the specified b...
bool isOneInteger(OpFoldResult v)
Return true if v is an IntegerAttr with value 1.
LogicalResult foldDynamicIndexList(SmallVectorImpl< OpFoldResult > &ofrs, bool onlyNonNegative=false, bool onlyNonZero=false)
Returns "success" when any of the elements in ofrs is a constant value.
FailureOr< scf::ForallOp > normalizeForallOp(RewriterBase &rewriter, scf::ForallOp forallOp)
Normalize an scf.forall operation.
void getForwardSlice(Operation *op, SetVector< Operation * > *forwardSlice, const ForwardSliceOptions &options={})
Fills forwardSlice with the computed forward slice (i.e.
SmallVector< std::pair< Block::iterator, Block::iterator > > subBlocks
Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...
std::optional< scf::ForOp > epilogueLoopOp
std::optional< scf::ForOp > mainLoopOp
Eliminates variable at the specified position using Fourier-Motzkin variable elimination.