LLVM: lib/Transforms/Utils/BypassSlowDivision.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

34#include

35#include

36

37using namespace llvm;

38

39#define DEBUG_TYPE "bypass-slow-division"

40

41namespace {

42

43 struct QuotRemPair {

46

47 QuotRemPair(Value *InQuotient, Value *InRemainder)

48 : Quotient(InQuotient), Remainder(InRemainder) {}

49 };

50

51

52

53

54 struct QuotRemWithBB {

56 Value *Quotient = nullptr;

57 Value *Remainder = nullptr;

58 };

59

63

64enum ValueRange {

65

66 VALRNG_KNOWN_SHORT,

67

68 VALRNG_UNKNOWN,

69

70

71 VALRNG_LIKELY_LONG

72};

73

74class FastDivInsertionTask {

75 bool IsValidTask = false;

79

80 bool isHashLikeValue(Value *V, VisitedSetTy &Visited);

81 ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);

84 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,

87 std::optional insertFastDivAndRem();

88

90 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||

91 SlowDivOrRem->getOpcode() == Instruction::SRem;

92 }

93

94 bool isDivisionOp() {

95 return SlowDivOrRem->getOpcode() == Instruction::SDiv ||

96 SlowDivOrRem->getOpcode() == Instruction::UDiv;

97 }

98

99 Type *getSlowType() { return SlowDivOrRem->getType(); }

100

101public:

102 FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);

103

104 Value *getReplacement(DivCacheTy &Cache);

105};

106

107}

108

109FastDivInsertionTask::FastDivInsertionTask(Instruction *I,

110 const BypassWidthsTy &BypassWidths) {

111 switch (I->getOpcode()) {

112 case Instruction::UDiv:

113 case Instruction::SDiv:

114 case Instruction::URem:

115 case Instruction::SRem:

116 SlowDivOrRem = I;

117 break;

118 default:

119

120 return;

121 }

122

123

124 IntegerType *SlowType = dyn_cast(SlowDivOrRem->getType());

125 if (!SlowType)

126 return;

127

128

129 auto BI = BypassWidths.find(SlowType->getBitWidth());

130 if (BI == BypassWidths.end())

131 return;

132

133

135 BypassType = BT;

136

137

138 MainBB = I->getParent();

139

140

141 IsValidTask = true;

142}

143

144

145

146

147

148Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {

149

150 if (!IsValidTask)

151 return nullptr;

152

153

154 Value *Dividend = SlowDivOrRem->getOperand(0);

155 Value *Divisor = SlowDivOrRem->getOperand(1);

157 auto CacheI = Cache.find(Key);

158

159 if (CacheI == Cache.end()) {

160

161 std::optional OptResult = insertFastDivAndRem();

162

163 if (!OptResult)

164 return nullptr;

165 CacheI = Cache.insert({Key, *OptResult}).first;

166 }

167

168 QuotRemPair &Value = CacheI->second;

169 return isDivisionOp() ? Value.Quotient : Value.Remainder;

170}

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {

189 if (I)

190 return false;

191

192 switch (I->getOpcode()) {

193 case Instruction::Xor:

194 return true;

195 case Instruction::Mul: {

196

197

198

199

200 Value *Op1 = I->getOperand(1);

202 if (C && isa(Op1))

203 C = dyn_cast(cast(Op1)->getOperand(0));

204 return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();

205 }

206 case Instruction::PHI:

207

208

209 if (Visited.size() >= 16)

210 return false;

211

212

213 if (!Visited.insert(I).second)

214 return true;

215 return llvm::all_of(cast(I)->incoming_values(), [&](Value *V) {

216

217

218 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||

219 isa(V);

220 });

221 default:

222 return false;

223 }

224}

225

226

227ValueRange FastDivInsertionTask::getValueRange(Value *V,

228 VisitedSetTy &Visited) {

229 unsigned ShortLen = BypassType->getBitWidth();

230 unsigned LongLen = V->getType()->getIntegerBitWidth();

231

232 assert(LongLen > ShortLen && "Value type must be wider than BypassType");

233 unsigned HiBits = LongLen - ShortLen;

234

235 const DataLayout &DL = SlowDivOrRem->getDataLayout();

237

239

240 if (Known.countMinLeadingZeros() >= HiBits)

241 return VALRNG_KNOWN_SHORT;

242

243 if (Known.countMaxLeadingZeros() < HiBits)

244 return VALRNG_LIKELY_LONG;

245

246

247

248

249

250 if (isHashLikeValue(V, Visited))

251 return VALRNG_LIKELY_LONG;

252

253 return VALRNG_UNKNOWN;

254}

255

256

257

258QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {

259 QuotRemWithBB DivRemPair;

260 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",

261 MainBB->getParent(), SuccessorBB);

262 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());

263 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());

264

265 Value *Dividend = SlowDivOrRem->getOperand(0);

266 Value *Divisor = SlowDivOrRem->getOperand(1);

267

269 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);

270 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);

271 } else {

272 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);

273 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);

274 }

275

276 Builder.CreateBr(SuccessorBB);

277 return DivRemPair;

278}

279

280

281

282QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {

283 QuotRemWithBB DivRemPair;

284 DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",

285 MainBB->getParent(), SuccessorBB);

286 IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());

287 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());

288

289 Value *Dividend = SlowDivOrRem->getOperand(0);

290 Value *Divisor = SlowDivOrRem->getOperand(1);

291 Value *ShortDivisorV =

292 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);

293 Value *ShortDividendV =

294 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);

295

296

297 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);

298 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);

299 DivRemPair.Quotient =

300 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());

301 DivRemPair.Remainder =

302 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());

303 Builder.CreateBr(SuccessorBB);

304

305 return DivRemPair;

306}

307

308

309QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,

310 QuotRemWithBB &RHS,

313 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());

314 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);

317 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);

320 return QuotRemPair(QuoPhi, RemPhi);

321}

322

323

324

325

326

327Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {

328 assert((Op1 || Op2) && "Nothing to check");

329 IRBuilder<> Builder(MainBB, MainBB->end());

330 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());

331

333 if (Op1 && Op2)

334 OrV = Builder.CreateOr(Op1, Op2);

335 else

336 OrV = Op1 ? Op1 : Op2;

337

338

339

340 uint64_t BitMask = ~BypassType->getBitMask();

341 Value *AndV = Builder.CreateAnd(OrV, BitMask);

342

343

345 return Builder.CreateICmpEQ(AndV, ZeroV);

346}

347

348

349

350std::optional FastDivInsertionTask::insertFastDivAndRem() {

351 Value *Dividend = SlowDivOrRem->getOperand(0);

352 Value *Divisor = SlowDivOrRem->getOperand(1);

353

354 VisitedSetTy SetL;

355 ValueRange DividendRange = getValueRange(Dividend, SetL);

356 if (DividendRange == VALRNG_LIKELY_LONG)

357 return std::nullopt;

358

359 VisitedSetTy SetR;

360 ValueRange DivisorRange = getValueRange(Divisor, SetR);

361 if (DivisorRange == VALRNG_LIKELY_LONG)

362 return std::nullopt;

363

364 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);

365 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);

366

367 if (DividendShort && DivisorShort) {

368

369

370

371

372

374 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);

375 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);

376 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);

377 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);

378 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());

379 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());

380 return QuotRemPair(ExtDiv, ExtRem);

381 }

382

383 if (isa(Divisor)) {

384

385

386

387 return std::nullopt;

388 }

389

390

391

392

393

394 if (auto *BCI = dyn_cast(Divisor))

395 if (BCI->getParent() == SlowDivOrRem->getParent() &&

396 isa(BCI->getOperand(0)))

397 return std::nullopt;

398

399 IRBuilder<> Builder(MainBB, MainBB->end());

400 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());

401

402 if (DividendShort && isSignedOp()) {

403

404

405

406

407

408

409

410

411

412

413

414

416

418 QuotRemWithBB Long;

419 Long.BB = MainBB;

420 Long.Quotient = ConstantInt::get(getSlowType(), 0);

421 Long.Remainder = Dividend;

422 QuotRemWithBB Fast = createFastBB(SuccessorBB);

423 QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);

424 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);

425 Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);

427 } else {

428

429

430

431

433

435 QuotRemWithBB Fast = createFastBB(SuccessorBB);

436 QuotRemWithBB Slow = createSlowBB(SuccessorBB);

437 QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);

438 Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,

439 DivisorShort ? nullptr : Divisor);

440 Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);

442 }

443}

444

445

446

448 const BypassWidthsTy &BypassWidths) {

449 DivCacheTy PerBBDivCache;

450

451 bool MadeChange = false;

453 while (Next != nullptr) {

454

455

458

459

460 if (I->hasNUses(0))

461 continue;

462

463 FastDivInsertionTask Task(I, BypassWidths);

464 if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {

465 I->replaceAllUsesWith(Replacement);

466 I->eraseFromParent();

467 MadeChange = true;

468 }

469 }

470

471

472

473

474 for (auto &KV : PerBBDivCache)

475 for (Value *V : {KV.second.Quotient, KV.second.Remainder})

477

478 return MadeChange;

479}

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file contains the declarations for the subclasses of Constant, which represent the different fla...

This file defines the DenseMap class.

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static int isSignedOp(ISD::CondCode Opcode)

For an integer comparison, return 1 if the comparison is a signed operation and 2 if the result is an...

This file defines the SmallPtrSet class.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)

Creates a new BasicBlock.

BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)

Split the basic block into two basic blocks at the specified instruction.

const Instruction & back() const

This is the shared class of boolean and integer constants.

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

InstListType::iterator eraseFromParent()

This method unlinks 'this' from the containing basic block and deletes it.

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

Class to represent integer types.

static IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

unsigned getBitWidth() const

Get the number of bits in this IntegerType.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

NodeTy * getNextNode()

Get the next node, or nullptr for the list tail.

@ Fast

Attempts to make calls as fast as possible (e.g.

@ C

The default llvm calling convention, compatible with C.

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)

This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...