LLVM: lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

41

42using namespace llvm;

43

44#define DEBUG_TYPE "rewrite-partial-reg-uses"

45

46namespace {

47

48class GCNRewritePartialRegUsesImpl {

53

54

55

56

58

59

61

62

63

64

65

67 SubRegMap &SubRegs) const;

68

69

70

71

72

73

74

75

76

77

78

79

80

81

83 getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,

84 unsigned CoverSubregIdx,

85 SubRegMap &SubRegs) const;

86

87

88

90 SubRegMap &SubRegs) const;

91

92

93

94

95

96 unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;

97

98

99

100 unsigned getSubReg(unsigned Offset, unsigned Size) const;

101

102

104

105

106

108 unsigned SubRegIdx) const;

109

110

113 SuperRegMasks;

114

115

116

117

119 getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;

120

121

122

124

125public:

126 GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}

128};

129

131public:

132 static char ID;

134

135 StringRef getPassName() const override {

136 return "Rewrite Partial Register Uses";

137 }

138

139 void getAnalysisUsage(AnalysisUsage &AU) const override {

144 }

145

147};

148

149}

150

151

152unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,

153 unsigned Size) const {

155 if (Inserted) {

156 for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {

157 if (TRI->getSubRegIdxOffset(Idx) == Offset &&

158 TRI->getSubRegIdxSize(Idx) == Size) {

159 I->second = Idx;

160 break;

161 }

162 }

163 }

164 return I->second;

165}

166

167unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,

168 unsigned RShift) const {

169 unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;

171}

172

173const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(

174 const TargetRegisterClass *RC, unsigned SubRegIdx) const {

176 SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);

177 if (Inserted) {

178 for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {

179 if (RCI.getSubReg() == SubRegIdx) {

180 I->second = RCI.getMask();

181 break;

182 }

183 }

184 }

185 return I->second;

186}

187

188const BitVector &

189GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(

190 unsigned AlignNumBits) const {

192 AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);

193 if (Inserted) {

194 BitVector &BV = I->second;

195 BV.resize(TRI->getNumRegClasses());

196 for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {

197 auto *RC = TRI->getRegClass(ClassID);

198 if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))

199 BV.set(ClassID);

200 }

201 }

202 return I->second;

203}

204

205const TargetRegisterClass *

206GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(

207 const TargetRegisterClass *RC, unsigned RShift, unsigned CoverSubregIdx,

208 SubRegMap &SubRegs) const {

209

210 unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);

211 LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign

212 << '\n');

213

214 BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));

215 for (auto &[OldSubReg, NewSubReg] : SubRegs) {

216 LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':');

217

218 auto *SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);

219 if (!SubRegRC) {

220 LLVM_DEBUG(dbgs() << "couldn't find target regclass\n");

221 return nullptr;

222 }

224 << (SubRegRC->isAllocatable() ? "" : " not alloc")

225 << " -> ");

226

227 if (OldSubReg == CoverSubregIdx) {

228

229 assert(SubRegRC->isAllocatable());

230 NewSubReg = AMDGPU::NoSubRegister;

232 } else {

233 NewSubReg = shiftSubReg(OldSubReg, RShift);

234 if (!NewSubReg) {

236 return nullptr;

237 }

239 }

240

241 const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)

242 : SubRegRC->getSubClassMask();

243 if (!Mask)

245

246 ClassMask.clearBitsNotInMask(Mask);

247

248

249 LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');

250 }

251

252

253

254

255

256 const TargetRegisterClass *MinRC = nullptr;

257 unsigned MinNumBits = std::numeric_limits::max();

258 for (unsigned ClassID : ClassMask.set_bits()) {

259 auto *RC = TRI->getRegClass(ClassID);

260 unsigned NumBits = TRI->getRegSizeInBits(*RC);

261 if (NumBits < MinNumBits) {

262 MinNumBits = NumBits;

263 MinRC = RC;

264 }

265 }

266#ifndef NDEBUG

267 if (MinRC) {

269 for (auto [OldSubReg, NewSubReg] : SubRegs)

270

271 assert(MinRC == TRI->getSubClassWithSubReg(MinRC, NewSubReg));

272 }

273#endif

274

275

276 return (MinRC != RC || RShift != 0) ? MinRC : nullptr;

277}

278

279const TargetRegisterClass *

280GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,

281 SubRegMap &SubRegs) const {

282 unsigned CoverSubreg = AMDGPU::NoSubRegister;

283 unsigned Offset = std::numeric_limits::max();

284 unsigned End = 0;

285 for (auto [SubReg, SRI] : SubRegs) {

286 unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);

287 unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);

288 if (SubRegOffset < Offset) {

289 Offset = SubRegOffset;

290 CoverSubreg = AMDGPU::NoSubRegister;

291 }

292 if (SubRegEnd > End) {

293 End = SubRegEnd;

294 CoverSubreg = AMDGPU::NoSubRegister;

295 }

296 if (SubRegOffset == Offset && SubRegEnd == End)

297 CoverSubreg = SubReg;

298 }

299

300

301 if (CoverSubreg != AMDGPU::NoSubRegister)

302 return getRegClassWithShiftedSubregs(RC, Offset, CoverSubreg, SubRegs);

303

304

305

306

307 unsigned MaxAlign = 0;

308 for (auto [SubReg, SRI] : SubRegs)

309 MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));

310

311 unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits::max();

312 for (auto [SubReg, SRI] : SubRegs) {

313 if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)

314 continue;

315 FirstMaxAlignedSubRegOffset =

316 std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));

317 if (FirstMaxAlignedSubRegOffset == Offset)

318 break;

319 }

320

321 unsigned NewOffsetOfMaxAlignedSubReg =

322 alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);

323

324 if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)

326

327 unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;

328 return getRegClassWithShiftedSubregs(RC, RShift, 0, SubRegs);

329}

330

331

332

333void GCNRewritePartialRegUsesImpl::updateLiveIntervals(

334 Register OldReg, Register NewReg, SubRegMap &SubRegs) const {

336 return;

337

340

342 NewLI.setWeight(OldLI.weight());

343

344 for (auto &SR : OldLI.subranges()) {

345 auto I = find_if(SubRegs, [&](auto &P) {

346 return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);

347 });

348

349 if (I == SubRegs.end()) {

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

371 return;

372 }

373

374 if (unsigned NewSubReg = I->second)

375 NewLI.createSubRangeFrom(Allocator,

376 TRI->getSubRegIndexLaneMask(NewSubReg), SR);

377 else

379

380 SubRegs.erase(I);

381 }

382 if (NewLI.empty())

386}

387

388bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {

389

390

391 SubRegMap SubRegs;

392 for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {

393 if (MO.getSubReg() == AMDGPU::NoSubRegister)

394 return false;

395 SubRegs.try_emplace(MO.getSubReg());

396 }

397

398 if (SubRegs.empty())

399 return false;

400

401 auto *RC = MRI->getRegClass(Reg);

403 << ':' << TRI->getRegClassName(RC) << '\n');

404

405 auto *NewRC = getMinSizeReg(RC, SubRegs);

406 if (!NewRC) {

408 return false;

409 }

410

411 Register NewReg = MRI->createVirtualRegister(NewRC);

413 << TRI->getRegClassName(RC) << " -> "

415 << TRI->getRegClassName(NewRC) << '\n');

416

418 MO.setReg(NewReg);

419

420

421 if (MO.isDebug() && MO.getSubReg() == 0)

422 continue;

423 unsigned NewSubReg = SubRegs[MO.getSubReg()];

424 MO.setSubReg(NewSubReg);

425 if (NewSubReg == AMDGPU::NoSubRegister && MO.isDef())

426 MO.setIsUndef(false);

427 }

428

429 if (LIS)

430 updateLiveIntervals(Reg, NewReg, SubRegs);

431

432 return true;

433}

434

435bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {

437 TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());

440 for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {

441 Changed |= rewriteReg(Register::index2VirtReg(I));

442 }

444}

445

446bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {

447 LiveIntervalsWrapperPass *LISWrapper =

448 getAnalysisIfAvailable();

449 LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;

450 GCNRewritePartialRegUsesImpl Impl(LIS);

451 return Impl.run(MF);

452}

453

454PreservedAnalyses

458 if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))

460

465 return PA;

466}

467

468char GCNRewritePartialRegUsesLegacy::ID;

469

471

473 "Rewrite Partial Register Uses", false, false)

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

Provides AMDGPU specific target descriptions.

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Register const TargetRegisterInfo * TRI

Promote Memory to Register

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Interface definition for SIRegisterInfo.

PassT::Result * getCachedResult(IRUnitT &IR) const

Get the cached result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addPreserved()

Add the specified Pass class to the set of analyses preserved by this pass.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

void resize(unsigned N, bool t=false)

resize - Grow or shrink the bitvector.

Represents analyses that only rely on functions' control flow.

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition GCNRewritePartialRegUses.cpp:455

bool hasInterval(Register Reg) const

VNInfo::Allocator & getVNInfoAllocator()

LiveInterval & getInterval(Register Reg)

void removeInterval(Register Reg)

Interval removal.

LiveInterval & createEmptyInterval(Register Reg)

Interval creation.

LiveInterval & createAndComputeVirtRegInterval(Register Reg)

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

Wrapper class representing virtual and physical registers.

StringRef - Represent a constant reference to a string, i.e.

TargetInstrInfo - Interface to description of machine instruction set.

bool isAllocatable() const

Return true if this register class may be used to create virtual registers.

virtual const TargetInstrInfo * getInstrInfo() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

This is an optimization pass for GlobalISel generic memory operations.

char & GCNRewritePartialRegUsesID

Definition GCNRewritePartialRegUses.cpp:470

iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)

Make a range that does early increment to allow mutation of the underlying range without disrupting i...

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)

Prints virtual and physical registers with or without a TRI instance.