MLIR: lib/Dialect/GPU/Transforms/MemoryPromotion.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

15

23

24 using namespace mlir;

26

27

28

29

30

31

33 auto memRefType = cast(from.getType());

34 auto rank = memRefType.getRank();

35

37 Value zero = b.createarith::ConstantIndexOp(0);

38 Value one = b.createarith::ConstantIndexOp(1);

39

40

41

42 if (rank < GPUDialect::getNumWorkgroupDimensions()) {

43 unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions() - rank;

44 lbs.resize(extraLoops, zero);

45 ubs.resize(extraLoops, one);

46 steps.resize(extraLoops, one);

47 }

48

49

50 lbs.append(rank, zero);

51 ubs.reserve(lbs.size());

52 steps.reserve(lbs.size());

53 for (auto idx = 0; idx < rank; ++idx) {

54 ubs.push_back(b.createOrFoldmemref::DimOp(from, idx));

55 steps.push_back(one);

56 }

57

58

61 for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) {

62 threadIds.push_back(b.creategpu::ThreadIdOp(indexType, dim));

63 blockDims.push_back(b.creategpu::BlockDimOp(indexType, dim));

64 }

65

66

69 b, b.getLoc(), lbs, ubs, steps,

71 ivs.assign(loopIvs.begin(), loopIvs.end());

72 auto activeIvs = llvm::ArrayRef(ivs).take_back(rank);

73 Value loaded = b.creatememref::LoadOp(loc, from, activeIvs);

74 b.creatememref::StoreOp(loc, loaded, to, activeIvs);

75 });

76

77

78 for (const auto &en :

80 GPUDialect::getNumWorkgroupDimensions())))) {

81 Value v = en.value();

84 {blockDims[en.index()]});

85 }

86 }

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

123 auto fromType = cast(from.getType());

124 auto toType = cast(to.getType());

125 (void)fromType;

126 (void)toType;

127 assert(fromType.getShape() == toType.getShape());

128 assert(fromType.getRank() != 0);

129 assert(llvm::hasSingleElement(region) &&

130 "unstructured control flow not supported");

131

134 b.creategpu::BarrierOp();

135

136 b.setInsertionPoint(&region.front().back());

137 b.creategpu::BarrierOp();

139 }

140

141

142

144 Value value = op.getArgument(arg);

145 auto type = dyn_cast(value.getType());

146 assert(type && type.hasStaticShape() && "can only promote memrefs");

147

148

150 op->getContext(), gpu::AddressSpace::Workgroup);

151 auto bufferType = MemRefType::get(type.getShape(), type.getElementType(),

152 MemRefLayoutAttrInterface{},

153 Attribute(workgroupMemoryAddressSpace));

154 Value attribution = op.addWorkgroupAttribution(bufferType, value.getLoc());

155

156

157

159 insertCopies(op.getBody(), op.getLoc(), value, attribution);

160 }

Attributes are known-constant values of operations.

ImplicitLocOpBuilder maintains a 'current location', allowing use of the create<> method without spec...

Location getLoc() const

Accessors for the implied location.

static ImplicitLocOpBuilder atBlockBegin(Location loc, Block *block, Listener *listener=nullptr)

Create a builder and set the insertion point to before the first operation in the block but still ins...

OpTy create(Args &&...args)

Create an operation of specific op type at the current insertion point and location.

void createOrFold(llvm::SmallVectorImpl< Value > &results, Args &&...args)

Create an operation of specific op type at the current insertion point, and immediately try to fold i...

This class defines the main interface for locations in MLIR and acts as a non-nullable wrapper around...

This class helps build Operations.

This class contains a list of basic blocks and a link to the parent operation it is attached to.

Operation * getParentOp()

Return the parent operation this region is attached to.

This class provides an abstraction over the different types of ranges over Values.

This class represents an instance of an SSA value in the MLIR system, representing a computable value...

Type getType() const

Return the type of this value.

void replaceAllUsesWith(Value newValue)

Replace all uses of 'this' value with the new value, updating anything in the IR that uses 'this' to ...

Location getLoc() const

Return the location of this value.

Region * getParentRegion()

Return the Region in which this Value is defined.

void mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef< Value > processorId, ArrayRef< Value > numProcessors)

Maps forOp for execution on a parallel grid of virtual processorIds of size given by numProcessors.

constexpr void enumerate(std::tuple< Tys... > &tuple, CallbackT &&callback)

LoopNest buildLoopNest(OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, function_ref< ValueVector(OpBuilder &, Location, ValueRange, ValueRange)> bodyBuilder=nullptr)

Creates a perfect nest of "for" loops, i.e.

Include the generated interface declarations.

void promoteToWorkgroupMemory(gpu::GPUFuncOp op, unsigned arg)

Promotes a function argument to workgroup memory in the given function.

auto get(MLIRContext *context, Ts &&...params)

Helper method that injects context only if needed, this helps unify some of the attribute constructio...