LLVM: lib/Target/ARM/MVELaneInterleavingPass.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

67#include

68

69using namespace llvm;

70

71#define DEBUG_TYPE "mve-laneinterleave"

72

75 cl::desc("Enable interleave MVE vector operation lowering"));

76

77namespace {

78

79class MVELaneInterleaving : public FunctionPass {

80public:

81 static char ID;

82

85 }

86

88

90

95 }

96};

97

98}

99

100char MVELaneInterleaving::ID = 0;

101

103 false)

104

106 return new MVELaneInterleaving();

107}

108

111

112

113

114

115

116

117

118

119

120

121

122

123 for (auto *E : Exts) {

124 if (isa(E) || !isa(E->getOperand(0))) {

125 LLVM_DEBUG(dbgs() << "Beneficial due to " << *E << "\n");

126 return true;

127 }

128 }

129 for (auto *T : Truncs) {

130 if (T->hasOneUse() && !isa(*T->user_begin())) {

131 LLVM_DEBUG(dbgs() << "Beneficial due to " << *T << "\n");

132 return true;

133 }

134 }

135

136

137

138 for (auto *E : Exts) {

139 if (!E->hasOneUse() ||

140 cast(*E->user_begin())->getOpcode() != Instruction::Mul) {

141 LLVM_DEBUG(dbgs() << "Not beneficial due to " << *E << "\n");

142 return false;

143 }

144 }

145 return true;

146}

147

150 LLVM_DEBUG(dbgs() << "tryInterleave from " << *Start << "\n");

151

152 if (!isa(Start->getOperand(0)))

153 return false;

154

155

156 std::vector<Instruction *> Worklist;

157 Worklist.push_back(Start);

158 Worklist.push_back(cast(Start->getOperand(0)));

159

165

166 while (!Worklist.empty()) {

168 Worklist.pop_back();

169

170 switch (I->getOpcode()) {

171

172 case Instruction::Trunc:

173 case Instruction::FPTrunc:

175 continue;

177 break;

178

179

180 case Instruction::SExt:

181 case Instruction::ZExt:

182 case Instruction::FPExt:

184 continue;

185 for (auto *Use : I->users())

186 Worklist.push_back(cast(Use));

188 break;

189

190 case Instruction::Call: {

192 if (II)

193 return false;

194

195 if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {

197 continue;

199 break;

200 }

201

202 switch (II->getIntrinsicID()) {

203 case Intrinsic::abs:

204 case Intrinsic::smin:

205 case Intrinsic::smax:

206 case Intrinsic::umin:

207 case Intrinsic::umax:

208 case Intrinsic::sadd_sat:

209 case Intrinsic::ssub_sat:

210 case Intrinsic::uadd_sat:

211 case Intrinsic::usub_sat:

212 case Intrinsic::minnum:

213 case Intrinsic::maxnum:

214 case Intrinsic::fabs:

215 case Intrinsic::fma:

216 case Intrinsic::ceil:

217 case Intrinsic:🤣

218 case Intrinsic::rint:

219 case Intrinsic::round:

220 case Intrinsic::trunc:

221 break;

222 default:

223 return false;

224 }

225 [[fallthrough]];

226 }

227

228 case Instruction::Add:

229 case Instruction::Sub:

230 case Instruction::Mul:

231 case Instruction::AShr:

232 case Instruction::LShr:

233 case Instruction::Shl:

234 case Instruction::ICmp:

235 case Instruction::FCmp:

236 case Instruction::FAdd:

237 case Instruction::FMul:

238 case Instruction::Select:

240 continue;

241

242 for (Use &Op : I->operands()) {

243 if (!isa(Op->getType()))

244 continue;

245 if (isa(Op))

246 Worklist.push_back(cast(&Op));

247 else

249 }

250

251 for (auto *Use : I->users())

252 Worklist.push_back(cast(Use));

253 break;

254

255 case Instruction::ShuffleVector:

256

257 if (cast(I)->isZeroEltSplat())

258 continue;

259 [[fallthrough]];

260

261 default:

262 LLVM_DEBUG(dbgs() << " Unhandled instruction: " << *I << "\n");

263 return false;

264 }

265 }

266

267 if (Exts.empty() && OtherLeafs.empty())

268 return false;

269

271 dbgs() << "Found group:\n Exts:\n";

272 for (auto *I : Exts)

273 dbgs() << " " << *I << "\n";

274 dbgs() << " Ops:\n";

275 for (auto *I : Ops)

276 dbgs() << " " << *I << "\n";

277 dbgs() << " OtherLeafs:\n";

278 for (auto *I : OtherLeafs)

279 dbgs() << " " << *I->get() << " of " << *I->getUser() << "\n";

280 dbgs() << " Truncs:\n";

281 for (auto *I : Truncs)

282 dbgs() << " " << *I << "\n";

283 dbgs() << " Reducts:\n";

284 for (auto *I : Reducts)

285 dbgs() << " " << *I << "\n";

286 });

287

289 "Expected some truncs or reductions");

291 return false;

292

293 auto *VT = !Truncs.empty()

294 ? cast(Truncs[0]->getType())

295 : cast(Exts[0]->getOperand(0)->getType());

297

298

299 unsigned NumElts = VT->getNumElements();

300 unsigned BaseElts = VT->getScalarSizeInBits() == 16

301 ? 8

302 : (VT->getScalarSizeInBits() == 8 ? 16 : 0);

303 if (BaseElts == 0 || NumElts % BaseElts != 0) {

305 return false;

306 }

307 if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=

308 VT->getScalarSizeInBits() * 2) {

310 return false;

311 }

313 if (I->getOperand(0)->getType() != VT) {

315 return false;

316 }

318 if (I->getType() != VT) {

320 return false;

321 }

322

323

325 return false;

327 return I->getOpcode() == Instruction::Mul ||

328 I->getOpcode() == Instruction::Select ||

329 I->getOpcode() == Instruction::ICmp;

330 }))) {

331 LLVM_DEBUG(dbgs() << "Reduction does not look profitable\n");

332 return false;

333 }

334

335

337

340

341

342 for (unsigned Base = 0; Base < NumElts; Base += BaseElts) {

343 for (unsigned i = 0; i < BaseElts / 2; i++)

345 for (unsigned i = 0; i < BaseElts / 2; i++)

347 }

348 for (unsigned Base = 0; Base < NumElts; Base += BaseElts) {

349 for (unsigned i = 0; i < BaseElts / 2; i++) {

352 }

353 }

354

359 bool FPext = isa(I);

360 bool Sext = isa(I);

362 : Sext ? Builder.CreateSExt(Shuffle, I->getType())

363 : Builder.CreateZExt(Shuffle, I->getType());

364 I->replaceAllUsesWith(Ext);

366 }

367

368 for (Use *I : OtherLeafs) {

372 I->getUser()->setOperand(I->getOperandNo(), Shuffle);

374 }

375

378

381 I->replaceAllUsesWith(Shuf);

382 cast(Shuf)->setOperand(0, I);

383

385 }

386

387 return true;

388}

389

390

391

393 if (auto *II = dyn_cast(&I))

394 return II->getIntrinsicID() == Intrinsic::vector_reduce_add;

395 return false;

396}

397

398bool MVELaneInterleaving::runOnFunction(Function &F) {

400 return false;

401 auto &TPC = getAnalysis();

404 if (ST->hasMVEIntegerOps())

405 return false;

406

407 bool Changed = false;

408

411 if (((I.getType()->isVectorTy() &&

412 (isa(I) || isa(I))) ||

416 }

417

418 return Changed;

419}

Expand Atomic instructions

static bool isProfitableToInterleave(SmallSetVector< Instruction *, 4 > &Exts, SmallSetVector< Instruction *, 4 > &Truncs)

static bool tryInterleave(Instruction *Start, SmallPtrSetImpl< Instruction * > &Visited)

cl::opt< bool > EnableInterleave("enable-mve-interleave", cl::Hidden, cl::init(true), cl::desc("Enable interleave MVE vector operation lowering"))

static bool isAddReduction(Instruction &I)

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file implements a set that has insertion order iteration characteristics.

static SymbolRef::Type getType(const Symbol *Sym)

This file describes how to lower LLVM code to machine code.

Target-Independent Code Generator Pass Configuration Options pass.

This pass exposes codegen information to IR-level passes.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

void setPreservesCFG()

This function should be called by the pass, iff they do not:

This class represents an Operation in the Expression.

FunctionPass class - This class is used to implement most global optimizations.

virtual bool runOnFunction(Function &F)=0

runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateFPExt(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

A wrapper class for inspecting calls to intrinsic functions.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

Pass interface - Implemented by all 'passes'.

virtual void getAnalysisUsage(AnalysisUsage &) const

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

virtual StringRef getPassName() const

getPassName - Return a nice clean name for a pass.

size_type count(const key_type &key) const

Count the number of elements of a given key in the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

Primary interface to the complete machine description for the target machine.

Target-Independent Code Generator Pass Configuration Options.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

Pass * createMVELaneInterleavingPass()

void initializeMVELaneInterleavingPass(PassRegistry &)

auto reverse(ContainerTy &&C)

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.