Utils.cpp Source File (original) (raw)

36 #include "llvm/ADT/TypeSwitch.h"

37 #include "llvm/Support/Debug.h"

38 #include

40 #define DEBUG_TYPE "linalg-utils"

42 using namespace mlir;

43 using namespace presburger;

48 namespace {

63 }

68 assert(cast(expr.getRHS()).getValue() > 0 &&

69 "nonpositive multiplying coefficient");

70 }

73 };

75 }

78 if (!expr)

79 return false;

80 TileCheck t(tileSizes);

81 t.visit(expr);

82 return t.isTiled;

83 }

87 if (!map)

88 return false;

89 for (unsigned r = 0; r < map.getNumResults(); ++r)

91 return true;

92 return false;

93 }

95 std::optionalRegionMatcher::BinaryOpKind

96 RegionMatcher::matchAsScalarBinaryOp(GenericOp op) {

97 auto &region = op.getRegion();

98 if (!llvm::hasSingleElement(region))

99 return std::nullopt;

100

105 return std::nullopt;

106

109 return std::nullopt;

110

114

115 auto addPattern = m_Oplinalg::YieldOp(m_Oparith::AddIOp(a, b));

116 if (addPattern.match(&ops.back()))

117 return BinaryOpKind::IAdd;

118

119 return std::nullopt;

120 }

121

122

126

127

128

133 for (Range range : ranges) {

134 lbs.emplace_back(

137 steps.emplace_back(

139 }

140 }

141

142

143

144

145

146

147

148

149

150

151

155 PackingMetadata &packingMetadata) {

157 auto lastDims =

158 llvm::to_vector(llvm::seq<int64_t>(rank - numPackedDims, rank));

159 packingMetadata = computePackingMetadata(rank, innerDimsPos);

162

164 if (!outerPerm.empty())

168

171 return packInverseDestPermutation;

172 }

173

174 namespace mlir {

175 namespace linalg {

176

178

179 PackingMetadata pMetadata;

180 int64_t packedRank = packOp.getDestType().getRank();

185 return packInvDestPerm;

186 }

187

189 PackingMetadata metadata;

191 }

192

194 PackingMetadata &metadata) {

195 int64_t unpackRank = unpackOp.getSourceType().getRank();

200 return unpackInvSrcPerm;

201 }

202

204 return llvm::all_of(op.getIndexingMapsArray(), [](AffineMap m) {

205 return m.isProjectedPermutation(true);

206 });

207 }

208

210 if (!llvm::hasSingleElement(r))

211 return false;

212 for (Operation &op : r.front()) {

213 if (!(isa<arith::ConstantOp, func::ConstantOp, tensor::ExtractOp,

214 linalg::YieldOp, linalg::IndexOp, AffineApplyOp>(op) ||

216 llvm::any_of(op.getResultTypes(),

217 [](Type type) { return !type.isIntOrIndexOrFloat(); }))

218 return false;

219 }

220 return true;

221 }

222

224 if (op.getNumLoops() != op.getNumParallelLoops())

225 return false;

226

228 return false;

229

230

231 for (OpOperand &opOperand : op.getDpsInitsMutable()) {

232 if (!op.getMatchingIndexingMap(&opOperand).isPermutation())

233 return false;

234 }

236 }

237

239 return iteratorType == utils::IteratorType::parallel;

240 }

241

243 return iteratorType == utils::IteratorType::reduction;

244 }

245

247 Value source, Value pad, bool nofold) {

248

249 auto sliceOp = source.getDefiningOp tensor::ExtractSliceOp();

250 if (!sliceOp)

252

253

254 Value current = sliceOp.getSource();

255 while (current) {

256 auto linalgOp = current.getDefiningOp();

257 if (!linalgOp)

258 break;

259 OpResult opResult = cast(current);

260 current = linalgOp.getDpsInitOperand(opResult.getResultNumber())->get();

261 }

262 auto padOp = current ? current.getDefiningOp tensor::PadOp() : nullptr;

263

264

265

266 if (!padOp)

268

269

270 if (sliceOp.getSource().getType() != type)

272

273

274 if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {

275 return getConstantIntValue(ofr) != static_cast<int64_t>(0);

276 }))

278

279

280

281 auto padOpSliceOp = padOp.getSource().getDefiningOptensor::ExtractSliceOp();

282 if (!padOpSliceOp ||

283 sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())

285

286

287

288 if (llvm::any_of(

289 llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),

290 [](std::tuple<OpFoldResult, OpFoldResult> it) {

291 return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));

292 }))

294

295

297 Value padOpPad = padOp.getConstantPaddingValue();

301

302

303 return sliceOp.getSource();

304 }

305

307 auto memrefTypeTo = cast(to.getType());

308 #ifndef NDEBUG

309 auto memrefTypeFrom = cast(from.getType());

310 assert(memrefTypeFrom.getRank() == memrefTypeTo.getRank() &&

311 "`from` and `to` memref must have the same rank");

312 #endif

313

317 utils::IteratorType::parallel);

318 return b.create linalg::GenericOp(

319 loc,

320 from,

321 to,

323 iteratorTypes,

325 b.create linalg::YieldOp(loc, args.front());

326 });

327 }

328

329

330 template <>

336 bodyBuilderFn,

338 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&

339 "expected as many entries for proc info as number of loops, even if "

340 "they are null entries");

342 if (!linalgOp.hasPureBufferSemantics())

343 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());

345 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);

347 b, loc, lbs, ubs, steps, iterArgInitValues,

349 assert(iterArgs.size() == iterArgInitValues.size() &&

350 "expect the number of output tensors and iter args to match");

352 if (!iterArgs.empty()) {

353 operandValuesToUse = linalgOp.getDpsInputs();

354 operandValuesToUse.append(iterArgs.begin(), iterArgs.end());

355 }

356 return bodyBuilderFn(b, loc, ivs, operandValuesToUse);

357 });

358

359 if (loopNest.loops.empty() || procInfo.empty())

360 return;

361

362

364 if (procInfo[loop.index()].distributionMethod ==

365 DistributionMethod::Cyclic) {

367 procInfo[loop.index()].nprocs);

368 }

369 }

370 }

371

372

373 template <>

379 bodyBuilderFn,

382 if (!linalgOp.hasPureBufferSemantics())

383 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());

384 assert(iterArgInitValues.empty() && "unexpected AffineForOp init values");

386 unpackRanges(b, loc, loopRanges, lbs, ubs, steps);

387

388

390 constantSteps.reserve(steps.size());

391 for (Value v : steps) {

393 assert(constVal.has_value() && "Affine loops require constant steps");

394 constantSteps.push_back(constVal.value());

395 }

396

399 bodyBuilderFn(b, loc, ivs,

400 linalgOp->getOperands());

401 });

402 }

403

404

411 lb =

414 }

415

416

417

418

419

420

421

422

423

424

425

432 assert(lbs.size() == ubs.size());

433 assert(lbs.size() == steps.size());

434 assert(lbs.size() == iteratorTypes.size());

435 assert(procInfo.empty() || (lbs.size() == procInfo.size()));

436

437

438

439 if (iteratorTypes.empty()) {

440 bodyBuilderFn(b, loc, ivStorage);

441 return;

442 }

443

444

445

448 b, loc, lbs.take_front(), ubs.take_front(), steps.take_front(),

450 ivStorage.append(ivs.begin(), ivs.end());

451 generateParallelLoopNest(

452 b, loc, lbs.drop_front(), ubs.drop_front(), steps.drop_front(),

453 iteratorTypes.drop_front(),

454 procInfo.empty() ? procInfo : procInfo.drop_front(),

455 bodyBuilderFn, ivStorage);

456 });

457 return;

458 }

459

460 unsigned nLoops = iteratorTypes.size();

461 unsigned numProcessed = 0;

463 if (procInfo.empty()) {

464 numProcessed = nLoops - iteratorTypes.drop_while(isParallelIterator).size();

465 } else {

466 distributionMethod = procInfo.front().distributionMethod;

467 numProcessed =

468 nLoops - procInfo

471 })

472 .size();

473 }

474

475 auto remainderProcInfo =

476 procInfo.empty() ? procInfo : procInfo.drop_front(numProcessed);

477 switch (distributionMethod) {

479

480

481 b.create scf::ParallelOp(

482 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),

483 steps.take_front(numProcessed),

485 ivStorage.append(localIvs.begin(), localIvs.end());

487 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),

488 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),

489 iteratorTypes.drop_front(numProcessed), remainderProcInfo,

490 bodyBuilderFn, ivStorage);

491 });

492 return;

493 }

494 case DistributionMethod::Cyclic: {

495

496

497 b.create scf::ParallelOp(

498 loc, lbs.take_front(numProcessed), ubs.take_front(numProcessed),

499 steps.take_front(numProcessed),

501 ivStorage.append(localIvs.begin(), localIvs.end());

503 nestedBuilder, nestedLoc, lbs.drop_front(numProcessed),

504 ubs.drop_front(numProcessed), steps.drop_front(numProcessed),

505 iteratorTypes.drop_front(numProcessed), remainderProcInfo,

506 bodyBuilderFn, ivStorage);

507 });

508 return;

509 }

510 case DistributionMethod::CyclicNumProcsGeNumIters: {

511

513 Value cond = ab.slt(lbs[0], ubs[0]);

514 for (unsigned i = 1; i < numProcessed; ++i)

515 cond = ab._and(cond, ab.slt(lbs[i], ubs[i]));

516 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));

519 ubs.drop_front(numProcessed),

520 steps.drop_front(numProcessed),

521 iteratorTypes.drop_front(numProcessed),

522 remainderProcInfo, bodyBuilderFn, ivStorage);

523 b.createscf::YieldOp(loc, ValueRange{});

524 });

525 return;

526 }

527 case DistributionMethod::CyclicNumProcsEqNumIters:

528

529

530 ivStorage.append(lbs.begin(), std::next(lbs.begin(), numProcessed));

532 b, loc, lbs.drop_front(numProcessed), ubs.drop_front(numProcessed),

533 steps.drop_front(numProcessed), iteratorTypes.drop_front(numProcessed),

534 remainderProcInfo, bodyBuilderFn, ivStorage);

535 return;

536 }

537 }

538

539

540 template <>

546 bodyBuilderFn,

549 if (!linalgOp.hasPureBufferSemantics())

550 llvm::append_range(iterArgInitValues, linalgOp.getDpsInits());

551 assert(iterArgInitValues.empty() && "unexpected ParallelOp init values");

552

553 assert(iteratorTypes.size() >= loopRanges.size() &&

554 "expected iterator type for all ranges");

555 assert((procInfo.empty() || (procInfo.size() == loopRanges.size())) &&

556 "expected proc information for all loops when present");

557 iteratorTypes = iteratorTypes.take_front(loopRanges.size());

559 unsigned numLoops = iteratorTypes.size();

560 ivs.reserve(numLoops);

561 lbsStorage.reserve(numLoops);

562 ubsStorage.reserve(numLoops);

563 stepsStorage.reserve(numLoops);

564

565

566 unpackRanges(b, loc, loopRanges, lbsStorage, ubsStorage, stepsStorage);

567

568

572 b, loc, it.value().procId, it.value().nprocs, lbsStorage[it.index()],

573 ubsStorage[it.index()], stepsStorage[it.index()]);

574 }

575 }

576 ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage);

578 b, loc, lbs, ubs, steps, iteratorTypes, procInfo,

580 bodyBuilderFn(b, loc, ivs, linalgOp->getOperands());

581 },

582 ivs);

583

584 assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops");

585 }

586

588 Value valueToTile,

590 auto shapedType = dyn_cast(valueToTile.getType());

592 .Case([&](MemRefType) {

593 return builder.create memref::SubViewOp(

594 loc, valueToTile, sliceParams.offsets,

596 })

597 .Case([&](RankedTensorType) {

598 return builder.create tensor::ExtractSliceOp(

599 loc, valueToTile, sliceParams.offsets,

601 })

602 .Default([](ShapedType) -> Operation * {

603 llvm_unreachable("Unexpected shaped type");

604 });

605 return sliceOp;

606 }

607

613 bool omitPartialTileCheck) {

616 ubs, subShapeSizes, omitPartialTileCheck);

618 }

619

625 bool omitPartialTileCheck) {

626 auto shapedType = dyn_cast(valueToTile.getType());

627 assert(shapedType && "only shaped types can be tiled");

629 int64_t rank = shapedType.getRank();

630

631

633 sliceParams.offsets.reserve(rank);

634 sliceParams.sizes.reserve(rank);

635 sliceParams.strides.reserve(rank);

636 for (unsigned r = 0; r < rank; ++r) {

637 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r);

641 sliceParams.sizes.push_back(dim);

643 LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n");

644 continue;

645 }

646 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subsize...\n");

647

648

649

651 LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n");

653

656 [[maybe_unused]] auto res = m.constantFold(zeros, mAtZero);

657 assert(succeeded(res) && "affine_map must be evaluatable (not symbols)");

658 int64_t mAtZeroInt =

659 cast(mAtZero[0]).getValue().getSExtValue();

661 rewriter, loc, m.getResult(0) - mAtZeroInt, lbs);

662 sliceParams.offsets.push_back(offset);

663

666

670 LLVM_DEBUG(llvm::dbgs()

671 << "computeSliceParameters: raw size: " << size << "\n");

672 LLVM_DEBUG(llvm::dbgs()

673 << "computeSliceParameters: new offset: " << offset << "\n");

675

676 if (omitPartialTileCheck) {

677

678

679 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");

680 sliceParams.sizes.push_back(size);

681 continue;

682 }

683

684

685

686

687

688

689 int64_t shapeSize = shape[r];

691 auto hasTileSizeOne = sizeCst && *sizeCst == 1;

692 auto dividesEvenly = sizeCst && !ShapedType::isDynamic(shapeSize) &&

693 ((shapeSize % *sizeCst) == 0);

694 if (!hasTileSizeOne && !dividesEvenly) {

695 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: shapeSize=" << shapeSize

696 << ", size: " << size

697 << ": make sure in bound with affine.min\n");

698

701 bindDims(context, dim0, dim1, dim2);

702

703

704

705

706

707

708

711 .front();

714 .front();

716 llvm::to_vector(llvm::map_range(ubs, [&](OpFoldResult ub) {

718 {ub});

719 }));

724

725

728 .front();

729 size =

731 }

732 LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n");

733 sliceParams.sizes.push_back(size);

734 }

735 return sliceParams;

736 }

737

742 for (unsigned idx = 0, idxIvs = 0, e = tileSizes.size(); idx < e; ++idx) {

743 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for loop#" << idx << "\n");

746 LLVM_DEBUG(llvm::dbgs()

747 << "computeTileOffsets: " << offsets.back() << "\n");

748 }

749 return offsets;

750 }

751

756 for (unsigned idx = 0, e = tileSizes.size(); idx < e; ++idx) {

758

763 LLVM_DEBUG(llvm::dbgs() << "computeTileSizes: " << sizes.back() << "\n");

764 }

765 return sizes;

766 }

767

769 if (op.hasPureBufferSemantics())

770 return {};

771 return llvm::to_vector(

772 llvm::map_range(op.getDpsInitsMutable(), [&](OpOperand &opOperand) {

773 return operands[opOperand.getOperandNumber()].getType();

774 }));

775 }

776

780 if (op.hasPureBufferSemantics())

781 return {};

783 tensorResults.reserve(results.size());

784

785 unsigned resultIdx = 0;

786 for (OpOperand &opOperand : op.getDpsInitsMutable()) {

787

788

789 Value outputTensor = operands[opOperand.getOperandNumber()];

790 if (auto sliceOp = outputTensor.getDefiningOp tensor::ExtractSliceOp()) {

791 Value inserted = builder.create tensor::InsertSliceOp(

792 loc, sliceOp.getSource().getType(), results[resultIdx],

793 sliceOp.getSource(), sliceOp.getOffsets(), sliceOp.getSizes(),

794 sliceOp.getStrides(), sliceOp.getStaticOffsets(),

795 sliceOp.getStaticSizes(), sliceOp.getStaticStrides());

796 tensorResults.push_back(inserted);

797 } else {

798 tensorResults.push_back(results[resultIdx]);

799 }

800 ++resultIdx;

801 }

802 return tensorResults;

803 }

804

810 bool omitPartialTileCheck) {

811 assert(ivs.size() == static_cast<size_t>(llvm::count_if(

812 llvm::make_range(tileSizes.begin(), tileSizes.end()),

813 [](OpFoldResult v) { return !isZeroInteger(v); })) &&

814 "expected as many ivs as non-zero sizes");

815

816

817

822

823 assert(static_cast<int64_t>(valuesToTile.size()) <=

824 linalgOp->getNumOperands() &&

825 "more value to tile than operands.");

827 allSliceParams.reserve(valuesToTile.size());

828 for (auto [opOperand, val] :

829 llvm::zip(linalgOp->getOpOperands(), valuesToTile)) {

830 Value shapedOp = val;

831 LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp);

832 AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand);

833

834

835

836

837

838

839 Type operandType = opOperand.get().getType();

840 if ( isTiled (map, tileSizes) && !(isa(operandType) &&

841 linalgOp.isDpsInit(&opOperand))) {

842 allSliceParams.push_back(std::nullopt);

843 LLVM_DEBUG(llvm::dbgs()

844 << ": not tiled: use shape: " << operandType << "\n");

845 continue;

846 }

847 LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n");

848

850 builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes,

851 omitPartialTileCheck));

852 }

853

854 return allSliceParams;

855 }

856

858 LinalgOp linalgOp, ValueRange valuesToTile,

862 bool omitPartialTileCheck) {

865 tileSizes, sizeBounds, omitPartialTileCheck);

867 for (auto item : llvm::zip(valuesToTile, allSliceParameter)) {

868 Value valueToTile = std::get<0>(item);

869 std::optional sliceParams = std::get<1>(item);

870 tiledShapes.push_back(

871 sliceParams.has_value()

873 ->getResult(0)

874 : valueToTile);

875 }

876 return tiledShapes;

877 }

878

883 }

884

887 if (!linalgOp.hasIndexSemantics())

888 return;

889

890 for (IndexOp indexOp : linalgOp.getBlock()->getOps()) {

891 if (indexOp.getDim() >= offsets.size() || !offsets[indexOp.getDim()])

892 continue;

898 b, indexOp.getLoc(), index + offset,

899 {getAsOpFoldResult(indexOp.getResult()), offsets[indexOp.getDim()]});

900 Value materialized =

904 });

905 }

906 }

907

908

909

910

911

912

913

914 std::optional<SmallVector>

919 auto dim = it.index();

920 auto size = it.value();

921 curr.push_back(dim);

922 auto attr = llvm::dyn_cast_if_present(size);

923 if (attr && cast(attr).getInt() == 1)

924 continue;

926 std::swap(reassociation.back(), curr);

927 }

928

929

930

931 if (!curr.empty() && !reassociation.empty())

932 reassociation.back().append(curr.begin(), curr.end());

933 return reassociation;

934 }

935

936 }

937 }

static bool isTiled(AffineExpr expr, ArrayRef< OpFoldResult > tileSizes)

static void unpackRanges(OpBuilder &builder, Location loc, ArrayRef< Range > ranges, SmallVectorImpl< Value > &lbs, SmallVectorImpl< Value > &ubs, SmallVectorImpl< Value > &steps)

Given a list of subview ranges, extract individual values for lower, upper bounds and steps and put t...

static SmallVector< int64_t > computePackUnPackPerm(int64_t rank, ArrayRef< int64_t > &innerDimsPos, ArrayRef< int64_t > &outerPerm, PackingMetadata &packingMetadata)

The permutation can be obtained from two permutations: a) Compute the permutation vector to move the ...

static void visit(Operation *op, DenseSet< Operation * > &visited)

Visits all the pdl.operand(s), pdl.result(s), and pdl.operation(s) connected to the given operation.

SmallVector< int64_t > innerDimsPos

DiagnosedSilenceableFailure doit(RewriterBase &rewriter, OpTy target, transform::ApplyToEachResultList &results, transform::TransformState &state)

Affine binary operation expression.

AffineExpr getLHS() const

AffineExpr getRHS() const

A dimensional identifier appearing in an affine expression.

unsigned getPosition() const

See documentation for AffineExprVisitorBase.

Base type for affine expression.

AffineExprKind getKind() const

Return the classification for this type.

A multi-dimensional affine map Affine map's are immutable like Type's, and they are uniqued.

static AffineMap getMultiDimIdentityMap(unsigned numDims, MLIRContext *context)

Returns an AffineMap with 'numDims' identity result dim exprs.

unsigned getNumResults() const

AffineExpr getResult(unsigned idx) const

AffineMap getSubMap(ArrayRef< unsigned > resultPos) const

Returns the map consisting of the resultPos subset.

static SmallVector< AffineMap, 4 > inferFromExprList(ArrayRef< ArrayRef< AffineExpr >> exprsList, MLIRContext *context)

Returns a vector of AffineMaps; each with as many results as exprs.size(), as many dims as the larges...

Attributes are known-constant values of operations.

Block represents an ordered list of Operations.

BlockArgument getArgument(unsigned i)

unsigned getNumArguments()

OpListType & getOperations()

iterator_range< iterator > without_terminator()

Return an iterator range over the operation within this block excluding the terminator operation at t...

IntegerAttr getIndexAttr(int64_t value)

MLIRContext * getContext() const

This class coordinates rewriting a piece of IR outside of a pattern rewrite, providing a way to keep ...

This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...

MLIRContext is the top-level object for a collection of MLIR operations.

RAII guard to reset the insertion point of the builder when destroyed.

This class helps build Operations.

Operation * create(const OperationState &state)

Creates an operation given the fields represented as an OperationState.

void setInsertionPointAfter(Operation *op)

Sets the insertion point to the node after the specified operation, which will cause subsequent inser...

This class represents a single result from folding an operation.

This class represents an operand of an operation.

This is a value defined by a result of an operation.

unsigned getResultNumber() const

Returns the number of this result.

Operation is the basic unit of execution within MLIR.

This class contains a list of basic blocks and a link to the parent operation it is attached to.

This class coordinates the application of a rewrite on a set of IR, providing a way for clients to tr...

void replaceUsesWithIf(Value from, Value to, function_ref< bool(OpOperand &)> functor, bool *allUsesReplaced=nullptr)

Find uses of from and replace them with to if the functor returns true.

Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...

bool isSignlessIntOrFloat() const

Return true of this is a signless integer or a float type.

This class provides an abstraction over the different types of ranges over Values.

This class represents an instance of an SSA value in the MLIR system, representing a computable value...

Type getType() const

Return the type of this value.

Operation * getDefiningOp() const

If this value is the result of an operation, return the operation that defines it.

Operation * getOwner() const

Return the owner of this operand.

bool hasElementwiseMappableTraits(Operation *op)

Together, Elementwise, Scalarizable, Vectorizable, and Tensorizable provide an easy way for scalar op...

void buildAffineLoopNest(OpBuilder &builder, Location loc, ArrayRef< int64_t > lbs, ArrayRef< int64_t > ubs, ArrayRef< int64_t > steps, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn=nullptr)

Builds a perfect nest of affine.for loops, i.e., each loop except the innermost one contains only ano...

AffineApplyOp makeComposedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)

Returns a composed AffineApplyOp by composing map and operands with other AffineApplyOps supplying th...

OpFoldResult makeComposedFoldedAffineMin(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)

Constructs an AffineMinOp that computes a minimum across the results of applying map to operands,...

OpFoldResult makeComposedFoldedAffineApply(OpBuilder &b, Location loc, AffineMap map, ArrayRef< OpFoldResult > operands)

Constructs an AffineApplyOp that applies map to operands after composing the map with the maps of any...

void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)

Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.

constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)

SmallVector< Value > makeTiledShapes(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)

Creates extract_slice/subview ops for all valuesToTile of the given linalgOp with builder,...

bool allIndexingsAreProjectedPermutation(LinalgOp op)

Check if all indexing maps are projected permutations.

bool isParallelIterator(utils::IteratorType iteratorType)

Check if iterator type has "parallel" semantics.

SmallVector< OpFoldResult > computeTileSizes(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds)

Computes tile sizes, given a list of tileSizes and dimension sizes (sizeBounds).

GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to)

Returns GenericOp that copies an n-D memref.

static void generateParallelLoopNest(OpBuilder &b, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ArrayRef< utils::IteratorType > iteratorTypes, ArrayRef< linalg::ProcInfo > procInfo, function_ref< void(OpBuilder &, Location, ValueRange)> bodyBuilderFn, SmallVectorImpl< Value > &ivStorage)

Generates a loop nest consisting of scf.parallel and scf.for, depending on the iteratorTypes.

SmallVector< OpFoldResult > computeTileOffsets(OpBuilder &b, Location loc, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes)

Computes tile offsets, given a list of loop ivs and tileSizes.

bool isReductionIterator(utils::IteratorType iteratorType)

Check if iterator type has "reduction" semantics.

bool hasOnlyScalarElementwiseOp(Region &r)

Detect whether r has only ConstantOp, ElementwiseMappable and YieldOp.

static Operation * materializeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, const SliceParameters &sliceParams)

std::optional< SmallVector< ReassociationIndices > > getReassociationMapForFoldingUnitDims(ArrayRef< OpFoldResult > mixedSizes)

Get the reassociation maps to fold the result of a extract_slice (or source of a insert_slice) operat...

OpFoldResult createFoldedDimOp(OpBuilder &b, Location loc, Value val, int64_t dim)

Create one memref::DimOp or tensor::DimOp depending on the type of val.

DistributionMethod

Scheme used to distribute loops to processors.

SmallVector< Value > insertSlicesBack(OpBuilder &builder, Location loc, LinalgOp op, ValueRange operands, ValueRange results)

Creates insert_slice ops that insert results back into larger tensors they were originally extracted ...

SmallVector< int64_t > getPackInverseDestPerm(PackOp packOp)

bool isElementwise(LinalgOp op)

Check if a LinalgOp is an element-wise operation.

void offsetIndices(OpBuilder &b, LinalgOp linalgOp, ArrayRef< OpFoldResult > offests)

Add the specified offsets to any linalg.index ops contained in the given linalgOp.

SmallVector< std::optional< SliceParameters > > computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, ValueRange valuesToTile, ArrayRef< OpFoldResult > ivs, ArrayRef< OpFoldResult > tileSizes, ArrayRef< OpFoldResult > sizeBounds, bool omitPartialTileCheck)

Computes SliceParamaters for all valuesToTile of the given linalgOp, assuming linalgOp is being fused...

Operation * makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)

Creates an extract_slice/subview op for a single valueToTile with builder.

Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold)

Create a tensor::PadOp that pads source to the size of the statically sized type whose static sizes a...

SmallVector< int64_t > getUnPackInverseSrcPerm(UnPackOp unpackOp, PackingMetadata &metadata)

void updateBoundsForCyclicDistribution(OpBuilder &builder, Location loc, Value procId, Value nprocs, Value &lb, Value &ub, Value &step)

Update the lb, ub and step to get per processor lb, ub and step.

SmallVector< Type > getTensorOutputTypes(LinalgOp op, ValueRange operands)

Returns the list of tensor output types produced when the given structured operation op is applied to...

SliceParameters computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef< OpFoldResult > tileSizes, AffineMap map, ArrayRef< OpFoldResult > lbs, ArrayRef< OpFoldResult > ubs, ArrayRef< OpFoldResult > subShapeSizes, bool omitPartialTileCheck)

Computes SliceParameters for a single valueToTile assuming that its user is being tiled with the give...

LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)

Creates a perfect nest of "for" loops, i.e.

SmallVector< Value > ValueVector

An owning vector of values, handy to return from functions.

PadOp createPadHighOp(RankedTensorType resType, Value source, Value pad, bool nofold, Location loc, OpBuilder &builder, SmallVector< Value > dynOutDims={})

Include the generated interface declarations.

bool matchPattern(Value value, const Pattern &pattern)

Entry point for matching a pattern over a Value.

std::optional< int64_t > getConstantIntValue(OpFoldResult ofr)

If ofr is a constant integer or an IntegerAttr, return the integer.

void bindDims(MLIRContext *ctx, AffineExprTy &...exprs)

Bind a list of AffineExpr references to DimExpr at positions: [0 .

@ Mul

RHS of mul is always a constant or a symbolic expression.

SmallVector< int64_t > computePermutationVector(int64_t permSize, ArrayRef< int64_t > positions, ArrayRef< int64_t > desiredPositions)

Return a permutation vector of size permSize that would result in moving positions into desiredPositi...

bool isZeroInteger(OpFoldResult v)

Return true if v is an IntegerAttr with value 0.

Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr)

Converts an OpFoldResult to a Value.

detail::constant_op_matcher m_Constant()

Matches a constant foldable operation.

void applyPermutationToVector(SmallVector< T, N > &inVec, ArrayRef< int64_t > permutation)

Apply the permutation defined by permutation to inVec.

AffineExpr getAffineDimExpr(unsigned position, MLIRContext *context)

These free functions allow clients of the API to not use classes in detail.

AffineExpr getAffineSymbolExpr(unsigned position, MLIRContext *context)

Helper struct to build simple arithmetic quantities with minimal type inference support.

Value _and(Value lhs, Value rhs)

Value slt(Value lhs, Value rhs)

Represents a range (offset, size, and stride) where each element of the triple may be dynamic or stat...

Utility class used to generate nested loops with ranges described by loopRanges and loop type describ...

Callback function type used to get processor ID, and number of processors used for distribution for a...

DistributionMethod distributionMethod

A struct containg offsets-sizes-strides arguments of the tiled shape.

SmallVector< OpFoldResult > strides

SmallVector< OpFoldResult > sizes

SmallVector< OpFoldResult > offsets