LLVM: lib/Target/AArch64/SVEIntrinsicOpts.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

32#include "llvm/IR/IntrinsicsAArch64.h"

37#include

38

39using namespace llvm;

41

42#define DEBUG_TYPE "aarch64-sve-intrinsic-opts"

43

44namespace {

45struct SVEIntrinsicOpts : public ModulePass {

46 static char ID;

48

49 bool runOnModule(Module &M) override;

50 void getAnalysisUsage(AnalysisUsage &AU) const override;

51

52private:

53 bool coalescePTrueIntrinsicCalls(BasicBlock &BB,

58

60

61

62

64};

65}

66

67void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {

68 AU.addRequired();

70}

71

72char SVEIntrinsicOpts::ID = 0;

73static const char *name = "SVE intrinsics optimizations";

77

79 return new SVEIntrinsicOpts();

80}

81

82

83

84

85

86

87

88

89

90

91

92

93

95

96

101 }

102 }

103

104

105 if (ConvertToUses.empty())

106 return false;

107

108

109

110

112 for (IntrinsicInst *ConvertToUse : ConvertToUses) {

115 if (IntrUser && IntrUser->getIntrinsicID() ==

116 Intrinsic::aarch64_sve_convert_from_svbool) {

118

119

120 if (IntrUserVTy->getElementCount().getKnownMinValue() >

121 PTrueVTy->getElementCount().getKnownMinValue())

122

123 return true;

124 }

125 }

126 }

127

128

129 return false;

130}

131

132

133bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls(

134 BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) {

135 if (PTrues.size() <= 1)

136 return false;

137

138

139 auto *MostEncompassingPTrue =

143 return PTrue1VTy->getElementCount().getKnownMinValue() <

144 PTrue2VTy->getElementCount().getKnownMinValue();

145 });

146

147

148

149 PTrues.remove(MostEncompassingPTrue);

151

152

153

154

156

159 Builder.SetInsertPoint(&BB, ++MostEncompassingPTrue->getIterator());

160

161 auto *MostEncompassingPTrueVTy =

163 auto *ConvertToSVBool = Builder.CreateIntrinsic(

164 Intrinsic::aarch64_sve_convert_to_svbool, {MostEncompassingPTrueVTy},

165 {MostEncompassingPTrue});

166

167 bool ConvertFromCreated = false;

168 for (auto *PTrue : PTrues) {

170

171

172

173 if (MostEncompassingPTrueVTy != PTrueVTy) {

174 ConvertFromCreated = true;

175

176 Builder.SetInsertPoint(&BB, ++ConvertToSVBool->getIterator());

177 auto *ConvertFromSVBool =

178 Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,

179 {PTrueVTy}, {ConvertToSVBool});

180 PTrue->replaceAllUsesWith(ConvertFromSVBool);

181 } else

182 PTrue->replaceAllUsesWith(MostEncompassingPTrue);

183

184 PTrue->eraseFromParent();

185 }

186

187

188 if (!ConvertFromCreated)

189 ConvertToSVBool->eraseFromParent();

190

191 return true;

192}

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(

243 SmallSetVector<Function *, 4> &Functions) {

245

246 for (auto *F : Functions) {

247 for (auto &BB : *F) {

248 SmallSetVector<IntrinsicInst *, 4> SVAllPTrues;

249 SmallSetVector<IntrinsicInst *, 4> SVPow2PTrues;

250

251

252 for (Instruction &I : BB) {

253 if (I.use_empty())

254 continue;

255

257 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)

258 continue;

259

260 const auto PTruePattern =

262

263 if (PTruePattern == AArch64SVEPredPattern::all)

264 SVAllPTrues.insert(IntrI);

265 if (PTruePattern == AArch64SVEPredPattern::pow2)

266 SVPow2PTrues.insert(IntrI);

267 }

268

269 Changed |= coalescePTrueIntrinsicCalls(BB, SVAllPTrues);

270 Changed |= coalescePTrueIntrinsicCalls(BB, SVPow2PTrues);

271 }

272 }

273

275}

276

277

278

279bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {

280 auto *F = I->getFunction();

281 auto Attr = F->getFnAttribute(Attribute::VScaleRange);

282 if (!Attr.isValid())

283 return false;

284

285 unsigned MinVScale = Attr.getVScaleRangeMin();

286 std::optional MaxVScale = Attr.getVScaleRangeMax();

287

288 if (!MaxVScale || MinVScale != MaxVScale)

289 return false;

290

291 auto *PredType =

293 auto *FixedPredType =

295

296

298 if (!Store || Store->isSimple())

299 return false;

300

301

302 if (Store->getOperand(0)->getType() != FixedPredType)

303 return false;

304

305

307 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract)

308 return false;

309

310

312 return false;

313

314

316 if (!BitCast)

317 return false;

318

319

320 if (BitCast->getOperand(0)->getType() != PredType)

321 return false;

322

324 Builder.SetInsertPoint(I);

325

326 Builder.CreateStore(BitCast->getOperand(0), Store->getPointerOperand());

327

328 Store->eraseFromParent();

329 if (IntrI->use_empty())

330 IntrI->eraseFromParent();

331 if (BitCast->use_empty())

332 BitCast->eraseFromParent();

333

334 return true;

335}

336

337

338

339bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {

340 auto *F = I->getFunction();

341 auto Attr = F->getFnAttribute(Attribute::VScaleRange);

342 if (!Attr.isValid())

343 return false;

344

345 unsigned MinVScale = Attr.getVScaleRangeMin();

346 std::optional MaxVScale = Attr.getVScaleRangeMax();

347

348 if (!MaxVScale || MinVScale != MaxVScale)

349 return false;

350

351 auto *PredType =

353 auto *FixedPredType =

355

356

358 if (!BitCast || BitCast->getType() != PredType)

359 return false;

360

361

363 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert)

364 return false;

365

366

369 return false;

370

371

373 if (!Load || Load->isSimple())

374 return false;

375

376

377 if (Load->getType() != FixedPredType)

378 return false;

379

381 Builder.SetInsertPoint(Load);

382

383 auto *LoadPred = Builder.CreateLoad(PredType, Load->getPointerOperand());

384

385 BitCast->replaceAllUsesWith(LoadPred);

386 BitCast->eraseFromParent();

387 if (IntrI->use_empty())

388 IntrI->eraseFromParent();

389 if (Load->use_empty())

390 Load->eraseFromParent();

391

392 return true;

393}

394

395bool SVEIntrinsicOpts::optimizeInstructions(

396 SmallSetVector<Function *, 4> &Functions) {

398

399 for (auto *F : Functions) {

400 DominatorTree *DT = &getAnalysis(*F).getDomTree();

401

402

403

405 ReversePostOrderTraversal<BasicBlock *> RPOT(Root);

406 for (auto *BB : RPOT) {

408 switch (I.getOpcode()) {

409 case Instruction::Store:

410 Changed |= optimizePredicateStore(&I);

411 break;

412 case Instruction::BitCast:

413 Changed |= optimizePredicateLoad(&I);

414 break;

415 }

416 }

417 }

418 }

419

421}

422

423bool SVEIntrinsicOpts::optimizeFunctions(

424 SmallSetVector<Function *, 4> &Functions) {

426

427 Changed |= optimizePTrueIntrinsicCalls(Functions);

428 Changed |= optimizeInstructions(Functions);

429

431}

432

433bool SVEIntrinsicOpts::runOnModule(Module &M) {

435 SmallSetVector<Function *, 4> Functions;

436

437

438

439

440 for (auto &F : M.getFunctionList()) {

441 if (F.isDeclaration())

442 continue;

443

444 switch (F.getIntrinsicID()) {

445 case Intrinsic::vector_extract:

446 case Intrinsic::vector_insert:

447 case Intrinsic::aarch64_sve_ptrue:

448 for (User *U : F.users())

450 break;

451 default:

452 break;

453 }

454 }

455

456 if (!Functions.empty())

457 Changed |= optimizeFunctions(Functions);

458

460}

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Module.h This file contains the declarations for the Module class.

Machine Check Debug Module

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.

static bool isPTruePromoted(IntrinsicInst *PTrue)

Checks if a ptrue intrinsic call is promoted.

Definition SVEIntrinsicOpts.cpp:94

This file implements a set that has insertion order iteration characteristics.

static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

LLVM Basic Block Representation.

LLVM_ABI const_iterator getFirstInsertionPt() const

Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...

LLVM_ABI LLVMContext & getContext() const

Get the context in which this basic block lives.

Legacy analysis pass which computes a DominatorTree.

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

A wrapper class for inspecting calls to intrinsic functions.

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)

bool remove(const value_type &X)

Remove an item from the set vector.

bool remove_if(UnaryPredicate P)

Remove items from the set vector based on a predicate function.

size_type size() const

Determine the number of elements in the SetVector.

bool empty() const

Determine if the SetVector is empty or not.

bool insert(const value_type &X)

Insert a new element into the SetVector.

A SetVector that performs no allocations if smaller than a certain size.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Type * getType() const

All values are typed, get the type of this value.

iterator_range< user_iterator > users()

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ BasicBlock

Various leaf nodes.

bool match(Val *V, const Pattern &P)

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

ModulePass * createSVEIntrinsicOptsPass()

Definition SVEIntrinsicOpts.cpp:78

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.