MLIR: lib/Conversion/GPUCommon/GPUOpsLowering.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
10
17 #include "llvm/ADT/SmallVectorExtras.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/FormatVariadic.h"
20
21 using namespace mlir;
22
25 StringRef name,
26 LLVM::LLVMFunctionType type) {
27 LLVM::LLVMFuncOp ret;
28 if (!(ret = moduleOp.template lookupSymbolLLVM::LLVMFuncOp(name))) {
31 ret = b.createLLVM::LLVMFuncOp(loc, name, type, LLVM::Linkage::External);
32 }
33 return ret;
34 }
35
37 StringRef prefix) {
38
39 unsigned stringNumber = 0;
41 do {
42 stringConstName.clear();
43 (prefix + Twine(stringNumber++)).toStringRef(stringConstName);
44 } while (moduleOp.lookupSymbol(stringConstName));
45 return stringConstName;
46 }
47
48 LLVM::GlobalOp
50 gpu::GPUModuleOp moduleOp, Type llvmI8,
51 StringRef namePrefix, StringRef str,
52 uint64_t alignment, unsigned addrSpace) {
54 nullTermStr.push_back('\0');
55 auto globalType =
58
59
60 for (auto globalOp : moduleOp.getOpsLLVM::GlobalOp())
61 if (globalOp.getGlobalType() == globalType && globalOp.getConstant() &&
62 globalOp.getValueAttr() == attr &&
63 globalOp.getAlignment().value_or(0) == alignment &&
64 globalOp.getAddrSpace() == addrSpace)
65 return globalOp;
66
67
71 return b.createLLVM::GlobalOp(loc, globalType,
72 true, LLVM::Linkage::Internal,
73 name, attr, alignment, addrSpace);
74 }
75
76 LogicalResult
79 Location loc = gpuFuncOp.getLoc();
80
82 if (encodeWorkgroupAttributionsAsArguments) {
83
84
85
87 gpuFuncOp.getWorkgroupAttributions();
88 size_t numAttributions = workgroupAttributions.size();
89
90
91 unsigned index = gpuFuncOp.getNumArguments();
93
94
95 Type workgroupPtrType =
96 rewriter.getTypeLLVM::LLVMPointerType(workgroupAddrSpace);
98
99
100 std::array attrs{
101 rewriter.getNamedAttr(LLVM::LLVMDialect::getNoAliasAttrName(),
104 getDialect().getWorkgroupAttributionAttrHelper().getName(),
106 };
108 for (BlockArgument attribution : workgroupAttributions) {
109 auto attributionType = cast(attribution.getType());
110 IntegerAttr numElements =
112 Type llvmElementType =
114 if (!llvmElementType)
115 return failure();
117 attrs.back().setValue(
118 rewriter.getAttrLLVM::WorkgroupAttributionAttr(numElements, type));
120 }
121
122
124
125
127 gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
128 LogicalResult inserted =
129 static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
130 argIndices, argTypes, argAttrs, argLocs);
131 (void)inserted;
132 assert(succeeded(inserted) &&
133 "expected GPU funcs to support inserting any argument");
134 });
135 } else {
136 workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
137 for (auto [idx, attribution] :
139 auto type = dyn_cast(attribution.getType());
140 assert(type && type.hasStaticShape() && "unexpected type in attribution");
141
142 uint64_t numElements = type.getNumElements();
143
144 auto elementType =
147 std::string name =
148 std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
149 uint64_t alignment = 0;
150 if (auto alignAttr = dyn_cast_or_null(
151 gpuFuncOp.getWorkgroupAttributionAttr(
152 idx, LLVM::LLVMDialect::getAlignAttrName())))
153 alignment = alignAttr.getInt();
154 auto globalOp = rewriter.createLLVM::GlobalOp(
155 gpuFuncOp.getLoc(), arrayType, false,
156 LLVM::Linkage::Internal, name, Attribute(), alignment,
157 workgroupAddrSpace);
158 workgroupBuffers.push_back(globalOp);
159 }
160 }
161
162
164 gpuFuncOp.front().getNumArguments());
165
167 gpuFuncOp.getFunctionType(), false,
168 getTypeConverter()->getOptions().useBarePtrCallConv, signatureConversion);
169 if (!funcType) {
171 diag << "failed to convert function signature type for: "
172 << gpuFuncOp.getFunctionType();
173 });
174 }
175
176
177
179 ArrayAttr argAttrs;
180 for (const auto &attr : gpuFuncOp->getAttrs()) {
182 attr.getName() == gpuFuncOp.getFunctionTypeAttrName() ||
183 attr.getName() ==
184 gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName() ||
185 attr.getName() == gpuFuncOp.getWorkgroupAttribAttrsAttrName() ||
186 attr.getName() == gpuFuncOp.getPrivateAttribAttrsAttrName() ||
187 attr.getName() == gpuFuncOp.getKnownBlockSizeAttrName() ||
188 attr.getName() == gpuFuncOp.getKnownGridSizeAttrName())
189 continue;
190 if (attr.getName() == gpuFuncOp.getArgAttrsAttrName()) {
191 argAttrs = gpuFuncOp.getArgAttrsAttr();
192 continue;
193 }
194 attributes.push_back(attr);
195 }
196
197 DenseI32ArrayAttr knownBlockSize = gpuFuncOp.getKnownBlockSizeAttr();
198 DenseI32ArrayAttr knownGridSize = gpuFuncOp.getKnownGridSizeAttr();
199
200
201 auto *gpuDialect = castgpu::GPUDialect(gpuFuncOp->getDialect());
202 if (knownBlockSize)
203 attributes.emplace_back(gpuDialect->getKnownBlockSizeAttrHelper().getName(),
204 knownBlockSize);
205 if (knownGridSize)
206 attributes.emplace_back(gpuDialect->getKnownGridSizeAttrHelper().getName(),
207 knownGridSize);
208
209
210
211
212 if (gpuFuncOp.isKernel()) {
213 if (kernelAttributeName)
214 attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
215
216 if (kernelBlockSizeAttributeName && knownBlockSize) {
217 attributes.emplace_back(kernelBlockSizeAttributeName, knownBlockSize);
218 }
219 }
220 LLVM::CConv callingConvention = gpuFuncOp.isKernel()
221 ? kernelCallingConvention
222 : nonKernelCallingConvention;
223 auto llvmFuncOp = rewriter.createLLVM::LLVMFuncOp(
224 gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
225 LLVM::Linkage::External, false, callingConvention,
226 nullptr, attributes);
227
228 {
229
230
231
232
233
235
236
238 unsigned numProperArguments = gpuFuncOp.getNumArguments();
239
240 if (encodeWorkgroupAttributionsAsArguments) {
241
242
243 unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
244 assert(numProperArguments >= numAttributions &&
245 "Expecting attributions to be encoded as arguments already");
246
247
248
250 gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
251 numAttributions);
253 gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
254 auto [attribution, arg] = vals;
255 auto type = cast(attribution.getType());
256
257
258
261
262
263 signatureConversion.remapInput(numProperArguments + idx, descr);
264 }
265 } else {
266 for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
268 global.getAddrSpace());
269 Value address = rewriter.createLLVM::AddressOfOp(
270 loc, ptrType, global.getSymNameAttr());
272 rewriter.createLLVM::GEPOp(loc, ptrType, global.getType(),
274
275
276
277
278
279 Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
280 auto type = cast(attribution.getType());
283 signatureConversion.remapInput(numProperArguments + idx, descr);
284 }
285 }
286
287
288 unsigned numWorkgroupAttributions = gpuFuncOp.getNumWorkgroupAttributions();
290 for (const auto [idx, attribution] :
292 auto type = cast(attribution.getType());
293 assert(type && type.hasStaticShape() && "unexpected type in attribution");
294
295
296
297
299 auto ptrType =
301 Value numElements = rewriter.createLLVM::ConstantOp(
302 gpuFuncOp.getLoc(), int64Ty, type.getNumElements());
303 uint64_t alignment = 0;
304 if (auto alignAttr =
305 dyn_cast_or_null(gpuFuncOp.getPrivateAttributionAttr(
306 idx, LLVM::LLVMDialect::getAlignAttrName())))
307 alignment = alignAttr.getInt();
308 Value allocated = rewriter.createLLVM::AllocaOp(
309 gpuFuncOp.getLoc(), ptrType, elementType, numElements, alignment);
313 numProperArguments + numWorkgroupAttributions + idx, descr);
314 }
315 }
316
317
318 rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
319 llvmFuncOp.end());
321 &signatureConversion)))
322 return failure();
323
324
325
326 for (const auto [idx, argTy] :
328 auto remapping = signatureConversion.getInputMapping(idx);
330 argAttrs ? cast(argAttrs[idx]) : NamedAttrList();
331 auto copyAttribute = [&](StringRef attrName) {
333 if (!attr)
334 return;
335 for (size_t i = 0, e = remapping->size; i < e; ++i)
336 llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
337 };
338 auto copyPointerAttribute = [&](StringRef attrName) {
340
341 if (!attr)
342 return;
343 if (remapping->size > 1 &&
344 attrName == LLVM::LLVMDialect::getNoAliasAttrName()) {
346 "Cannot copy noalias with non-bare pointers.\n");
347 return;
348 }
349 for (size_t i = 0, e = remapping->size; i < e; ++i) {
350 if (isaLLVM::LLVMPointerType(
351 llvmFuncOp.getArgument(remapping->inputNo + i).getType())) {
352 llvmFuncOp.setArgAttr(remapping->inputNo + i, attrName, attr);
353 }
354 }
355 };
356
357 if (argAttr.empty())
358 continue;
359
360 copyAttribute(LLVM::LLVMDialect::getReturnedAttrName());
361 copyAttribute(LLVM::LLVMDialect::getNoUndefAttrName());
362 copyAttribute(LLVM::LLVMDialect::getInRegAttrName());
363 bool lowersToPointer = false;
364 for (size_t i = 0, e = remapping->size; i < e; ++i) {
365 lowersToPointer |= isaLLVM::LLVMPointerType(
366 llvmFuncOp.getArgument(remapping->inputNo + i).getType());
367 }
368
369 if (lowersToPointer) {
370 copyPointerAttribute(LLVM::LLVMDialect::getNoAliasAttrName());
371 copyPointerAttribute(LLVM::LLVMDialect::getNoCaptureAttrName());
372 copyPointerAttribute(LLVM::LLVMDialect::getNoFreeAttrName());
373 copyPointerAttribute(LLVM::LLVMDialect::getAlignAttrName());
374 copyPointerAttribute(LLVM::LLVMDialect::getReadonlyAttrName());
375 copyPointerAttribute(LLVM::LLVMDialect::getWriteOnlyAttrName());
376 copyPointerAttribute(LLVM::LLVMDialect::getReadnoneAttrName());
377 copyPointerAttribute(LLVM::LLVMDialect::getNonNullAttrName());
378 copyPointerAttribute(LLVM::LLVMDialect::getDereferenceableAttrName());
379 copyPointerAttribute(
380 LLVM::LLVMDialect::getDereferenceableOrNullAttrName());
381 copyPointerAttribute(
382 LLVM::LLVMDialect::WorkgroupAttributionAttrHelper::getNameStr());
383 }
384 }
385 rewriter.eraseOp(gpuFuncOp);
386 return success();
387 }
388
390 gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
392 Location loc = gpuPrintfOp->getLoc();
393
398
399
400
401 auto moduleOp = gpuPrintfOp->getParentOfTypegpu::GPUModuleOp();
402
403 auto ocklBegin =
406 LLVM::LLVMFuncOp ocklAppendArgs;
407 if (!adaptor.getArgs().empty()) {
409 moduleOp, loc, rewriter, "__ockl_printf_append_args",
411 llvmI64, {llvmI64, llvmI32, llvmI64, llvmI64, llvmI64,
412 llvmI64, llvmI64, llvmI64, llvmI64, llvmI32}));
413 }
415 moduleOp, loc, rewriter, "__ockl_printf_append_string_n",
417 llvmI64,
418 {llvmI64, ptrType, llvmI64, llvmI32}));
419
420
421 Value zeroI64 = rewriter.createLLVM::ConstantOp(loc, llvmI64, 0);
422 auto printfBeginCall = rewriter.createLLVM::CallOp(loc, ocklBegin, zeroI64);
423 Value printfDesc = printfBeginCall.getResult();
424
425
427 rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat());
428
429
430 Value globalPtr = rewriter.createLLVM::AddressOfOp(
431 loc,
433 global.getSymNameAttr());
434 Value stringStart =
435 rewriter.createLLVM::GEPOp(loc, ptrType, global.getGlobalType(),
437 Value stringLen = rewriter.createLLVM::ConstantOp(
438 loc, llvmI64, cast(global.getValueAttr()).size());
439
440 Value oneI32 = rewriter.createLLVM::ConstantOp(loc, llvmI32, 1);
441 Value zeroI32 = rewriter.createLLVM::ConstantOp(loc, llvmI32, 0);
442
443 auto appendFormatCall = rewriter.createLLVM::CallOp(
444 loc, ocklAppendStringN,
445 ValueRange{printfDesc, stringStart, stringLen,
446 adaptor.getArgs().empty() ? oneI32 : zeroI32});
447 printfDesc = appendFormatCall.getResult();
448
449
450 constexpr size_t argsPerAppend = 7;
451 size_t nArgs = adaptor.getArgs().size();
452 for (size_t group = 0; group < nArgs; group += argsPerAppend) {
453 size_t bound = std::min(group + argsPerAppend, nArgs);
454 size_t numArgsThisCall = bound - group;
455
457 arguments.push_back(printfDesc);
458 arguments.push_back(
459 rewriter.createLLVM::ConstantOp(loc, llvmI32, numArgsThisCall));
460 for (size_t i = group; i < bound; ++i) {
461 Value arg = adaptor.getArgs()[i];
462 if (auto floatType = dyn_cast(arg.getType())) {
463 if (!floatType.isF64())
464 arg = rewriter.createLLVM::FPExtOp(
466 arg = rewriter.createLLVM::BitcastOp(loc, llvmI64, arg);
467 }
469 arg = rewriter.createLLVM::ZExtOp(loc, llvmI64, arg);
470
471 arguments.push_back(arg);
472 }
473
474 for (size_t extra = numArgsThisCall; extra < argsPerAppend; ++extra) {
475 arguments.push_back(zeroI64);
476 }
477
478 auto isLast = (bound == nArgs) ? oneI32 : zeroI32;
479 arguments.push_back(isLast);
480 auto call = rewriter.createLLVM::CallOp(loc, ocklAppendArgs, arguments);
482 }
483 rewriter.eraseOp(gpuPrintfOp);
484 return success();
485 }
486
488 gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
490 Location loc = gpuPrintfOp->getLoc();
491
495
496
497
498
499 auto moduleOp = gpuPrintfOp->getParentOfTypegpu::GPUModuleOp();
500
501 auto printfType =
503 true);
504 LLVM::LLVMFuncOp printfDecl =
506
507
509 rewriter, loc, moduleOp, llvmI8, "printfFormat_", adaptor.getFormat(),
510 0, addressSpace);
511
512
513 Value globalPtr = rewriter.createLLVM::AddressOfOp(
514 loc,
516 global.getSymNameAttr());
517 Value stringStart =
518 rewriter.createLLVM::GEPOp(loc, ptrType, global.getGlobalType(),
520
521
522 auto argsRange = adaptor.getArgs();
524 printfArgs.reserve(argsRange.size() + 1);
525 printfArgs.push_back(stringStart);
526 printfArgs.append(argsRange.begin(), argsRange.end());
527
528 rewriter.createLLVM::CallOp(loc, printfDecl, printfArgs);
529 rewriter.eraseOp(gpuPrintfOp);
530 return success();
531 }
532
534 gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor,
536 Location loc = gpuPrintfOp->getLoc();
537
540
541
542
543
544 auto moduleOp = gpuPrintfOp->getParentOfTypegpu::GPUModuleOp();
545
546
547
548
550
551 auto vprintfType =
554 moduleOp, globalLoc, rewriter, "vprintf", vprintfType);
555
556
557 LLVM::GlobalOp global =
559 "printfFormat_", adaptor.getFormat());
560
561
562 Value globalPtr = rewriter.createLLVM::AddressOfOp(loc, global);
563 Value stringStart =
564 rewriter.createLLVM::GEPOp(loc, ptrType, global.getGlobalType(),
568
569 for (Value arg : adaptor.getArgs()) {
570 Type type = arg.getType();
571 Value promotedArg = arg;
573 if (isa(type)) {
575 promotedArg = rewriter.createLLVM::FPExtOp(loc, type, arg);
576 }
577 types.push_back(type);
578 args.push_back(promotedArg);
579 }
580 Type structType =
581 LLVM::LLVMStructType::getLiteral(gpuPrintfOp.getContext(), types);
584 Value tempAlloc =
585 rewriter.createLLVM::AllocaOp(loc, ptrType, structType, one,
586 0);
588 Value ptr = rewriter.createLLVM::GEPOp(
589 loc, ptrType, structType, tempAlloc,
591 rewriter.createLLVM::StoreOp(loc, arg, ptr);
592 }
593 std::array<Value, 2> printfArgs = {stringStart, tempAlloc};
594
595 rewriter.createLLVM::CallOp(loc, vprintfDecl, printfArgs);
596 rewriter.eraseOp(gpuPrintfOp);
597 return success();
598 }
599
600
601
602
604 Type llvm1DVectorTy,
607 TypeRange operandTypes(operands);
608 VectorType vectorType = cast(llvm1DVectorTy);
610 Value result = rewriter.createLLVM::PoisonOp(loc, vectorType);
613 Type elementType = vectorType.getElementType();
614
615 for (int64_t i = 0; i < vectorType.getNumElements(); ++i) {
616 Value index = rewriter.createLLVM::ConstantOp(loc, indexType, i);
617 auto extractElement = [&](Value operand) -> Value {
618 if (!isa(operand.getType()))
619 return operand;
620 return rewriter.createLLVM::ExtractElementOp(loc, operand, index);
621 };
622 auto scalarOperands = llvm::map_to_vector(operands, extractElement);
624 rewriter.create(loc, name, scalarOperands, elementType, op->getAttrs());
625 result = rewriter.createLLVM::InsertElementOp(
626 loc, result, scalarOp->getResult(0), index);
627 }
628 return result;
629 }
630
631
635 TypeRange operandTypes(operands);
636 if (llvm::any_of(operandTypes, llvm::IsaPred)) {
637 VectorType vectorType =
640 rewriter, converter));
641 return success();
642 }
643
644 if (llvm::any_of(operandTypes, llvm::IsaPredLLVM::LLVMArrayType)) {
646 op, operands, converter,
649 converter);
650 },
651 rewriter);
652 }
653
654 return rewriter.notifyMatchFailure(op, "no llvm.array or vector to unroll");
655 }
656
659 }
660
661
662
665 gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter,
666 MemRefType memrefType, unsigned alignmentBit) {
667 uint64_t alignmentByte = alignmentBit / memrefType.getElementTypeBitWidth();
668
669 FailureOr addressSpace =
671 if (failed(addressSpace)) {
672 op->emitError() << "conversion of memref memory space "
673 << memrefType.getMemorySpace()
674 << " to integer address space "
675 "failed. Consider adding memory space conversions.";
676 }
677
678
679
681 for (auto globalOp : moduleOp.getBody()->getOpsLLVM::GlobalOp()) {
682 existingGlobalNames.insert(globalOp.getSymName());
683 if (auto arrayType = dyn_castLLVM::LLVMArrayType(globalOp.getType())) {
684 if (globalOp.getAddrSpace() == addressSpace.value() &&
685 arrayType.getNumElements() == 0 &&
686 globalOp.getAlignment().value_or(0) == alignmentByte) {
687 return globalOp;
688 }
689 }
690 }
691
692
693 unsigned uniquingCounter = 0;
694 SmallString<128> symName = SymbolTable::generateSymbolName<128>(
695 "__dynamic_shmem_",
696 [&](StringRef candidate) {
697 return existingGlobalNames.contains(candidate);
698 },
699 uniquingCounter);
700
701
704
706 typeConverter->convertType(memrefType.getElementType()), 0);
707
708 return rewriter.createLLVM::GlobalOp(
709 op->getLoc(), zeroSizedArrayType, false,
710 LLVM::Linkage::Internal, symName, Attribute(), alignmentByte,
711 addressSpace.value());
712 }
713
715 gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
718 MemRefType memrefType = op.getResultMemref().getType();
720
721
722 MemRefLayoutAttrInterface layout = {};
723 auto memrefType0sz =
724 MemRefType::get({0}, elementType, layout, memrefType.getMemorySpace());
725
726
727
728 auto moduleOp = op->getParentOfTypegpu::GPUModuleOp();
730 rewriter, moduleOp, op, getTypeConverter(), memrefType0sz, alignmentBit);
731
732
735 auto basePtr = rewriter.createLLVM::AddressOfOp(loc, shmemOp);
736 Type baseType = basePtr->getResultTypes().front();
737
738
740 Value shmemPtr = rewriter.createLLVM::GEPOp(loc, baseType, elementType,
741 basePtr, gepArgs);
742
746 sizeBytes);
748 loc, memrefType0sz, shmemPtr, shmemPtr, shape, strides, rewriter);
749
750
751 rewriter.replaceOp(op, {memRefDescriptor});
752 return success();
753 }
754
756 gpu::ReturnOp op, OpAdaptor adaptor,
759 unsigned numArguments = op.getNumOperands();
761
763 if (useBarePtrCallConv) {
764
765
766 for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
767 Type oldTy = std::get<0>(it).getType();
768 Value newOperand = std::get<1>(it);
769 if (isa(oldTy) && getTypeConverter()->canConvertToBarePtr(
770 cast(oldTy))) {
772 newOperand = memrefDesc.allocatedPtr(rewriter, loc);
773 } else if (isa(oldTy)) {
774
775
776 return failure();
777 }
778 updatedOperands.push_back(newOperand);
779 }
780 } else {
781 updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
783 updatedOperands,
784 true);
785 }
786
787
788 if (numArguments <= 1) {
790 op, TypeRange(), updatedOperands, op->getAttrs());
791 return success();
792 }
793
794
795
797 op.getOperandTypes(), useBarePtrCallConv);
798 if (!packedType) {
799 return rewriter.notifyMatchFailure(op, "could not convert result types");
800 }
801
802 Value packed = rewriter.createLLVM::PoisonOp(loc, packedType);
803 for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
804 packed = rewriter.createLLVM::InsertValueOp(loc, packed, operand, idx);
805 }
807 op->getAttrs());
808 return success();
809 }
810
814 [mapping](BaseMemRefType type, gpu::AddressSpaceAttr memorySpaceAttr) {
815 gpu::AddressSpace memorySpace = memorySpaceAttr.getValue();
816 unsigned addressSpace = mapping(memorySpace);
818 addressSpace);
819 });
820 }
static IntegerAttr wrapNumericMemorySpace(MLIRContext *ctx, unsigned space)
static SmallString< 16 > getUniqueSymbolName(gpu::GPUModuleOp moduleOp, StringRef prefix)
static Value scalarizeVectorOpHelper(Operation *op, ValueRange operands, Type llvm1DVectorTy, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Helper for impl::scalarizeVectorOp.
LLVM::GlobalOp getDynamicSharedMemorySymbol(ConversionPatternRewriter &rewriter, gpu::GPUModuleOp moduleOp, gpu::DynamicSharedMemoryOp op, const LLVMTypeConverter *typeConverter, MemRefType memrefType, unsigned alignmentBit)
Generates a symbol with 0-sized array type for dynamic shared memory usage, or uses existing symbol.
static std::string diag(const llvm::Value &value)
static Value min(ImplicitLocOpBuilder &builder, Value value, Value bound)
Attributes are known-constant values of operations.
This class provides a shared interface for ranked and unranked memref types.
This class represents an argument of a Block.
IntegerAttr getIndexAttr(int64_t value)
IntegerAttr getI64IntegerAttr(int64_t value)
IntegerType getIntegerType(unsigned width)
Ty getType(Args &&...args)
Get or construct an instance of the type Ty with provided arguments.
StringAttr getStringAttr(const Twine &bytes)
MLIRContext * getContext() const
DictionaryAttr getDictionaryAttr(ArrayRef< NamedAttribute > value)
NamedAttribute getNamedAttr(StringRef name, Attribute val)
Attr getAttr(Args &&...args)
Get or construct an instance of the attribute Attr with provided arguments.
This class implements a pattern rewriter for use with ConversionPatterns.
void replaceOp(Operation *op, ValueRange newValues) override
Replace the given operation with the new values.
FailureOr< Block * > convertRegionTypes(Region *region, const TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion=nullptr)
Apply a signature conversion to each block in the given region.
void eraseOp(Operation *op) override
PatternRewriter hook for erasing a dead operation.
const TypeConverter * typeConverter
An optional type converter for use by this pattern.
MemRefDescriptor createMemRefDescriptor(Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr, ArrayRef< Value > sizes, ArrayRef< Value > strides, ConversionPatternRewriter &rewriter) const
Creates and populates a canonical memref descriptor struct.
void getMemRefDescriptorSizes(Location loc, MemRefType memRefType, ValueRange dynamicSizes, ConversionPatternRewriter &rewriter, SmallVectorImpl< Value > &sizes, SmallVectorImpl< Value > &strides, Value &size, bool sizeInBytes=true) const
Computes sizes, strides and buffer size of memRefType with identity layout.
const LLVMTypeConverter * getTypeConverter() const
LLVM::LLVMDialect & getDialect() const
Returns the LLVM dialect.
LogicalResult copyUnrankedDescriptors(OpBuilder &builder, Location loc, TypeRange origTypes, SmallVectorImpl< Value > &operands, bool toDynamic) const
Copies the memory descriptor for any operands that were unranked descriptors originally to heap-alloc...
This class contains all of the information necessary to report a diagnostic to the DiagnosticEngine.
An instance of this location represents a tuple of file, line number, and column number.
Conversion from types to the LLVM IR dialect.
Type packFunctionResults(TypeRange types, bool useBarePointerCallConv=false) const
Convert a non-empty list of types to be returned from a function into an LLVM-compatible type.
const LowerToLLVMOptions & getOptions() const
Type convertFunctionSignature(FunctionType funcTy, bool isVariadic, bool useBarePtrCallConv, SignatureConversion &result) const
Convert a function type.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
FailureOr< unsigned > getMemRefAddressSpace(BaseMemRefType type) const
Return the LLVM address space corresponding to the memory space of the memref type type or failure if...
LocationAttr findInstanceOfOrUnknown()
Return an instance of the given location type if one is nested under the current location else return...
This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...
MLIRContext is the top-level object for a collection of MLIR operations.
Helper class to produce LLVM dialect operations extracting or inserting elements of a MemRef descript...
static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, const LLVMTypeConverter &typeConverter, MemRefType type, Value memory)
Builds IR creating a MemRef descriptor that represents type and populates it with static shape and st...
Value allocatedPtr(OpBuilder &builder, Location loc)
Builds IR extracting the allocated pointer from the descriptor.
NamedAttrList is array of NamedAttributes that tracks whether it is sorted and does some basic work t...
Attribute erase(StringAttr name)
Erase the attribute with the given name from the list.
RAII guard to reset the insertion point of the builder when destroyed.
This class helps build Operations.
void setInsertionPointToStart(Block *block)
Sets the insertion point to the start of the specified block.
void setInsertionPoint(Block *block, Block::iterator insertPoint)
Set the insertion point to the specified location.
Operation * create(const OperationState &state)
Creates an operation given the fields represented as an OperationState.
StringAttr getIdentifier() const
Return the name of this operation as a StringAttr.
Operation is the basic unit of execution within MLIR.
OpResult getResult(unsigned idx)
Get the 'idx'th result of this operation.
Location getLoc()
The source location the operation was defined or derived from.
ArrayRef< NamedAttribute > getAttrs()
Return all of the attributes on this operation.
OperationName getName()
The name of an operation is the key identifier for it.
result_type_range getResultTypes()
std::enable_if_t<!std::is_convertible< CallbackT, Twine >::value, LogicalResult > notifyMatchFailure(Location loc, CallbackT &&reasonCallback)
Used to notify the listener that the IR failed to be rewritten because of a match failure,...
void modifyOpInPlace(Operation *root, CallableT &&callable)
This method is a utility wrapper around an in-place modification of an operation.
void inlineRegionBefore(Region ®ion, Region &parent, Region::iterator before)
Move the blocks that belong to "region" before the given position in another region "parent".
OpTy replaceOpWithNewOp(Operation *op, Args &&...args)
Replace the results of the given (original) op with a new op that is created without verification (re...
static StringRef getSymbolAttrName()
Return the name of the attribute used for symbol names.
This class provides all of the information necessary to convert a type signature.
std::optional< InputMapping > getInputMapping(unsigned input) const
Get the input mapping for the given argument.
void remapInput(unsigned origInputNo, ArrayRef< Value > replacements)
Remap an input of the original signature to replacements values.
LogicalResult convertType(Type t, SmallVectorImpl< Type > &results) const
Convert the given type.
void addTypeAttributeConversion(FnT &&callback)
Register a conversion function for attributes within types.
This class provides an abstraction over the various different ranges of value types.
Instances of the Type class are uniqued, have an immutable identifier and an optional mutable compone...
bool isIntOrFloat() const
Return true if this is an integer (of any signedness) or a float type.
unsigned getIntOrFloatBitWidth() const
Return the bit width of an integer or a float type, assert failure on other types.
This class provides an abstraction over the different types of ranges over Values.
This class represents an instance of an SSA value in the MLIR system, representing a computable value...
Type getType() const
Return the type of this value.
LogicalResult handleMultidimensionalVectors(Operation *op, ValueRange operands, const LLVMTypeConverter &typeConverter, std::function< Value(Type, ValueRange)> createOperand, ConversionPatternRewriter &rewriter)
constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)
LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter)
Unrolls op to array/vector elements.
Include the generated interface declarations.
InFlightDiagnostic emitWarning(Location loc)
Utility method to emit a warning message using this location.
std::function< unsigned(gpu::AddressSpace)> MemorySpaceMapping
A function that maps a MemorySpace enum to a target-specific integer value.
void populateGpuMemorySpaceAttributeConversions(TypeConverter &typeConverter, const MemorySpaceMapping &mapping)
Populates memory space attribute conversion rules for lowering gpu.address_space to integer values.
auto get(MLIRContext *context, Ts &&...params)
Helper method that injects context only if needed, this helps unify some of the attribute constructio...
LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type)
Find or create an external function declaration in the given module.
LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment=0, unsigned addrSpace=0)
Create a global that contains the given string.
LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override
Methods that operate on the SourceOp type.