LLVM: lib/Target/AMDGPU/SIMemoryLegalizer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

31

32using namespace llvm;

34

35#define DEBUG_TYPE "si-memory-legalizer"

36#define PASS_NAME "SI Memory Legalizer"

37

40 cl::desc("Use this to skip inserting cache invalidating instructions."));

41

42namespace {

43

45

46

47enum class SIMemOp {

49 LOAD = 1u << 0,

50 STORE = 1u << 1,

52};

53

54

55

56enum class Position {

57 BEFORE,

58 AFTER

59};

60

61

62enum class SIAtomicScope {

64 SINGLETHREAD,

65 WAVEFRONT,

66 WORKGROUP,

67 CLUSTER,

68 AGENT,

69 SYSTEM

70};

71

72

73

74enum class SIAtomicAddrSpace {

76 GLOBAL = 1u << 0,

77 LDS = 1u << 1,

78 SCRATCH = 1u << 2,

79 GDS = 1u << 3,

80 OTHER = 1u << 4,

81

82

83 FLAT = GLOBAL | LDS | SCRATCH,

84

85

86 ATOMIC = GLOBAL | LDS | SCRATCH | GDS,

87

88

89 ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,

90

92};

93

94class SIMemOpInfo final {

95private:

96

97 friend class SIMemOpAccess;

98

100 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;

101 SIAtomicScope Scope = SIAtomicScope::SYSTEM;

102 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;

103 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;

104 bool IsCrossAddressSpaceOrdering = false;

105 bool IsVolatile = false;

106 bool IsNonTemporal = false;

107 bool IsLastUse = false;

108 bool IsCooperative = false;

109

110

111 SIMemOpInfo(

112 const GCNSubtarget &ST,

113 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,

114 SIAtomicScope Scope = SIAtomicScope::SYSTEM,

115 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,

116 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,

117 bool IsCrossAddressSpaceOrdering = true,

118 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,

119 bool IsVolatile = false, bool IsNonTemporal = false,

120 bool IsLastUse = false, bool IsCooperative = false)

121 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),

122 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),

123 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),

124 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),

125 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {

126

127 if (Ordering == AtomicOrdering::NotAtomic) {

128 assert(!IsCooperative && "Cannot be cooperative & non-atomic!");

129 assert(Scope == SIAtomicScope::NONE &&

130 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&

131 !IsCrossAddressSpaceOrdering &&

132 FailureOrdering == AtomicOrdering::NotAtomic);

133 return;

134 }

135

136 assert(Scope != SIAtomicScope::NONE &&

137 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=

138 SIAtomicAddrSpace::NONE &&

139 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=

140 SIAtomicAddrSpace::NONE);

141

142

143

144

145 if ((OrderingAddrSpace == InstrAddrSpace) &&

147 this->IsCrossAddressSpaceOrdering = false;

148

149

150

151 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==

152 SIAtomicAddrSpace::NONE) {

153 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);

154 } else if ((InstrAddrSpace &

155 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==

156 SIAtomicAddrSpace::NONE) {

157 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);

158 } else if ((InstrAddrSpace &

159 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |

160 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {

161 this->Scope = std::min(Scope, SIAtomicScope::AGENT);

162 }

163

164

165

166 if (this->Scope == SIAtomicScope::CLUSTER && ST.hasClusters())

167 this->Scope = SIAtomicScope::AGENT;

168 }

169

170public:

171

172

173 SIAtomicScope getScope() const {

174 return Scope;

175 }

176

177

178

180 return Ordering;

181 }

182

183

184

186 return FailureOrdering;

187 }

188

189

190

191 SIAtomicAddrSpace getInstrAddrSpace() const {

192 return InstrAddrSpace;

193 }

194

195

196

197 SIAtomicAddrSpace getOrderingAddrSpace() const {

198 return OrderingAddrSpace;

199 }

200

201

202

203 bool getIsCrossAddressSpaceOrdering() const {

204 return IsCrossAddressSpaceOrdering;

205 }

206

207

208

209 bool isVolatile() const {

210 return IsVolatile;

211 }

212

213

214

215 bool isNonTemporal() const {

216 return IsNonTemporal;

217 }

218

219

220

221 bool isLastUse() const { return IsLastUse; }

222

223

224 bool isCooperative() const { return IsCooperative; }

225

226

227

228 bool isAtomic() const {

229 return Ordering != AtomicOrdering::NotAtomic;

230 }

231

232};

233

234class SIMemOpAccess final {

235private:

236 const AMDGPUMachineModuleInfo *MMI = nullptr;

237 const GCNSubtarget &ST;

238

239

241 const char *Msg) const;

242

243

244

245

246

247 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>

248 toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;

249

250

251 SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;

252

253

254

255 std::optional

257

258public:

259

260

261 SIMemOpAccess(const AMDGPUMachineModuleInfo &MMI, const GCNSubtarget &ST);

262

263

264 std::optional

266

267

268

269 std::optional

271

272

273

274 std::optional

276

277

278

279 std::optional

281

282

283

284

285 std::optional

287};

288

289class SICacheControl {

290protected:

291

292

293 const GCNSubtarget &ST;

294

295

296 const SIInstrInfo *TII = nullptr;

297

298 IsaVersion IV;

299

300

301 bool InsertCacheInv;

302

303 SICacheControl(const GCNSubtarget &ST);

304

305

306

308 unsigned Bits) const;

309

310

311

312 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const;

313

314public:

316

317

318 static std::unique_ptr create(const GCNSubtarget &ST);

319

320

321

322

324 SIAtomicScope Scope,

325 SIAtomicAddrSpace AddrSpace) const = 0;

326

327

328

329

331 SIAtomicScope Scope,

332 SIAtomicAddrSpace AddrSpace) const = 0;

333

334

335

336

338 SIAtomicScope Scope,

339 SIAtomicAddrSpace AddrSpace) const = 0;

340

341

342

343

345 SIAtomicAddrSpace AddrSpace,

346 SIMemOp Op, bool IsVolatile,

347 bool IsNonTemporal,

348 bool IsLastUse = false) const = 0;

349

350

351

352

353

354

355 virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {

356 return false;

357 };

358

359

360 virtual bool handleCooperativeAtomic(MachineInstr &MI) const {

362 "cooperative atomics are not available on this architecture");

363 }

364

365

366

367

368

369

370

371

372

373

375 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

376 bool IsCrossAddrSpaceOrdering, Position Pos,

378

379

380

381

382

383

385 SIAtomicScope Scope,

386 SIAtomicAddrSpace AddrSpace,

387 Position Pos) const = 0;

388

389

390

391

392

393

394

396 SIAtomicScope Scope,

397 SIAtomicAddrSpace AddrSpace,

398 bool IsCrossAddrSpaceOrdering,

399 Position Pos) const = 0;

400

401

402 virtual ~SICacheControl() = default;

403};

404

405

406

407class SIGfx6CacheControl final : public SICacheControl {

408public:

409

410 SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}

411

413 SIAtomicScope Scope,

414 SIAtomicAddrSpace AddrSpace) const override;

415

417 SIAtomicScope Scope,

418 SIAtomicAddrSpace AddrSpace) const override;

419

421 SIAtomicScope Scope,

422 SIAtomicAddrSpace AddrSpace) const override;

423

425 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

426 bool IsVolatile, bool IsNonTemporal,

427 bool IsLastUse) const override;

428

430 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

431 bool IsCrossAddrSpaceOrdering, Position Pos,

432 AtomicOrdering Order, bool AtomicsOnly) const override;

433

435 SIAtomicScope Scope,

436 SIAtomicAddrSpace AddrSpace,

437 Position Pos) const override;

438

440 SIAtomicScope Scope,

441 SIAtomicAddrSpace AddrSpace,

442 bool IsCrossAddrSpaceOrdering,

443 Position Pos) const override;

444};

445

446

447class SIGfx10CacheControl final : public SICacheControl {

448public:

449 SIGfx10CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}

450

452 SIAtomicScope Scope,

453 SIAtomicAddrSpace AddrSpace) const override;

454

456 SIAtomicScope Scope,

457 SIAtomicAddrSpace AddrSpace) const override {

458 return false;

459 }

460

462 SIAtomicScope Scope,

463 SIAtomicAddrSpace AddrSpace) const override {

464 return false;

465 }

466

468 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

469 bool IsVolatile, bool IsNonTemporal,

470 bool IsLastUse) const override;

471

473 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

474 bool IsCrossAddrSpaceOrdering, Position Pos,

475 AtomicOrdering Order, bool AtomicsOnly) const override;

476

478 SIAtomicAddrSpace AddrSpace, Position Pos) const override;

479

481 SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,

482 Position Pos) const override {

483 return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,

484 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,

485 false);

486 }

487};

488

489class SIGfx12CacheControl final : public SICacheControl {

490protected:

491

492

495

496

497

500

501

502

503

504

505

506

507 bool

509

511 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;

512

513public:

514 SIGfx12CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {

515

516

517 assert(ST.hasGFX1250Insts() || ST.isCuModeEnabled());

518 }

519

521 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

522 bool IsCrossAddrSpaceOrdering, Position Pos,

523 AtomicOrdering Order, bool AtomicsOnly) const override;

524

526 SIAtomicAddrSpace AddrSpace, Position Pos) const override;

527

529 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

530 bool IsVolatile, bool IsNonTemporal,

531 bool IsLastUse) const override;

532

533 bool finalizeStore(MachineInstr &MI, bool Atomic) const override;

534

535 bool handleCooperativeAtomic(MachineInstr &MI) const override;

536

538 SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,

539 Position Pos) const override;

540

542 SIAtomicScope Scope,

543 SIAtomicAddrSpace AddrSpace) const override {

544 return setAtomicScope(MI, Scope, AddrSpace);

545 }

546

548 SIAtomicScope Scope,

549 SIAtomicAddrSpace AddrSpace) const override {

550 return setAtomicScope(MI, Scope, AddrSpace);

551 }

552

554 SIAtomicScope Scope,

555 SIAtomicAddrSpace AddrSpace) const override {

556 return setAtomicScope(MI, Scope, AddrSpace);

557 }

558};

559

560class SIMemoryLegalizer final {

561private:

562 const MachineModuleInfo &MMI;

563

564 std::unique_ptr CC = nullptr;

565

566

567 std::listMachineBasicBlock::iterator AtomicPseudoMIs;

568

569

570

571 bool isAtomicRet(const MachineInstr &MI) const {

573 }

574

575

576

577 bool removeAtomicPseudoMIs();

578

579

580

581 bool expandLoad(const SIMemOpInfo &MOI,

583

584

585 bool expandStore(const SIMemOpInfo &MOI,

587

588

589 bool expandAtomicFence(const SIMemOpInfo &MOI,

591

592

593 bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,

595

596

598

599public:

600 SIMemoryLegalizer(const MachineModuleInfo &MMI) : MMI(MMI) {};

601 bool run(MachineFunction &MF);

602};

603

605public:

606 static char ID;

607

608 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}

609

610 void getAnalysisUsage(AnalysisUsage &AU) const override {

613 }

614

615 StringRef getPassName() const override {

617 }

618

619 bool runOnMachineFunction(MachineFunction &MF) override;

620};

621

623 {"global", SIAtomicAddrSpace::GLOBAL},

624 {"local", SIAtomicAddrSpace::LDS},

625}};

626

632 OS << "unknown address space '" << AS << "'; expected one of ";

634 for (const auto &[Name, Val] : ASNames)

635 OS << LS << '\'' << Name << '\'';

638}

639

640

641

642

643static std::optional

645 static constexpr StringLiteral FenceASPrefix = "amdgpu-synchronize-as";

646

648 if (!MMRA)

649 return std::nullopt;

650

651 SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;

652 for (const auto &[Prefix, Suffix] : MMRA) {

653 if (Prefix != FenceASPrefix)

654 continue;

655

656 if (auto It = ASNames.find(Suffix); It != ASNames.end())

657 Result |= It->second;

658 else

659 diagnoseUnknownMMRAASName(MI, Suffix);

660 }

661

662 if (Result == SIAtomicAddrSpace::NONE)

663 return std::nullopt;

664

666}

667

668}

669

671 const char *Msg) const {

673 Func.getContext().diagnose(

674 DiagnosticInfoUnsupported(Func, Msg, MI->getDebugLoc()));

675}

676

677std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>

678SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,

679 SIAtomicAddrSpace InstrAddrSpace) const {

681 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC, true);

683 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC, true);

685 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC, true);

687 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,

688 true);

690 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,

691 true);

693 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,

694 true);

696 return std::tuple(SIAtomicScope::SYSTEM,

697 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);

699 return std::tuple(SIAtomicScope::AGENT,

700 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);

702 return std::tuple(SIAtomicScope::CLUSTER,

703 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);

705 return std::tuple(SIAtomicScope::WORKGROUP,

706 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);

708 return std::tuple(SIAtomicScope::WAVEFRONT,

709 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);

711 return std::tuple(SIAtomicScope::SINGLETHREAD,

712 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);

713 return std::nullopt;

714}

715

716SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {

718 return SIAtomicAddrSpace::FLAT;

720 return SIAtomicAddrSpace::GLOBAL;

722 return SIAtomicAddrSpace::LDS;

724 return SIAtomicAddrSpace::SCRATCH;

726 return SIAtomicAddrSpace::GDS;

729 return SIAtomicAddrSpace::GLOBAL;

730

731 return SIAtomicAddrSpace::OTHER;

732}

733

734SIMemOpAccess::SIMemOpAccess(const AMDGPUMachineModuleInfo &MMI_,

735 const GCNSubtarget &ST)

736 : MMI(&MMI_), ST(ST) {}

737

738std::optional SIMemOpAccess::constructFromMIWithMMO(

740 assert(MI->getNumMemOperands() > 0);

741

744 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;

745 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;

746 bool IsNonTemporal = true;

748 bool IsLastUse = false;

749 bool IsCooperative = false;

750

751

752

753 for (const auto &MMO : MI->memoperands()) {

754 IsNonTemporal &= MMO->isNonTemporal();

756 IsLastUse |= MMO->getFlags() & MOLastUse;

757 IsCooperative |= MMO->getFlags() & MOCooperative;

758 InstrAddrSpace |=

759 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());

760 AtomicOrdering OpOrdering = MMO->getSuccessOrdering();

761 if (OpOrdering != AtomicOrdering::NotAtomic) {

762 const auto &IsSyncScopeInclusion =

764 if (!IsSyncScopeInclusion) {

765 reportUnsupported(MI,

766 "Unsupported non-inclusive atomic synchronization scope");

767 return std::nullopt;

768 }

769

770 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();

772 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&

773 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);

774 FailureOrdering =

776 }

777 }

778

779

780

781

784 Ordering = AtomicOrdering::Monotonic;

785

786 SIAtomicScope Scope = SIAtomicScope::NONE;

787 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;

788 bool IsCrossAddressSpaceOrdering = false;

789 if (Ordering != AtomicOrdering::NotAtomic) {

790 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);

791 if (!ScopeOrNone) {

792 reportUnsupported(MI, "Unsupported atomic synchronization scope");

793 return std::nullopt;

794 }

795 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =

796 *ScopeOrNone;

797 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||

798 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||

799 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {

800 reportUnsupported(MI, "Unsupported atomic address space");

801 return std::nullopt;

802 }

803 }

804 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,

805 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,

806 IsNonTemporal, IsLastUse, IsCooperative);

807}

808

809std::optional

812

813 if (!(MI->mayLoad() && MI->mayStore()))

814 return std::nullopt;

815

816

817 if (MI->getNumMemOperands() == 0)

818 return SIMemOpInfo(ST);

819

820 return constructFromMIWithMMO(MI);

821}

822

823std::optional

826

827 if (!(MI->mayLoad() && MI->mayStore()))

828 return std::nullopt;

829

830

831 if (MI->getNumMemOperands() == 0)

832 return SIMemOpInfo(ST);

833

834 return constructFromMIWithMMO(MI);

835}

836

837std::optional

840

841 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)

842 return std::nullopt;

843

846

848 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);

849 if (!ScopeOrNone) {

850 reportUnsupported(MI, "Unsupported atomic synchronization scope");

851 return std::nullopt;

852 }

853

854 SIAtomicScope Scope = SIAtomicScope::NONE;

855 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;

856 bool IsCrossAddressSpaceOrdering = false;

857 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =

858 *ScopeOrNone;

859

860 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {

861

862

863

864

865 reportUnsupported(MI, "Unsupported atomic address space");

866 return std::nullopt;

867 }

868

869 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*MI);

870 if (SynchronizeAS)

871 OrderingAddrSpace = *SynchronizeAS;

872

873 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,

874 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,

875 AtomicOrdering::NotAtomic);

876}

877

878std::optional SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(

881

882 if (!(MI->mayLoad() && MI->mayStore()))

883 return std::nullopt;

884

885

886 if (MI->getNumMemOperands() == 0)

887 return SIMemOpInfo(ST);

888

889 return constructFromMIWithMMO(MI);

890}

891

892std::optional

895

897 return std::nullopt;

898

899 return constructFromMIWithMMO(MI);

900}

901

902SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {

903 TII = ST.getInstrInfo();

906}

907

909 unsigned Bits) const {

910 MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);

911 if (!CPol)

912 return false;

913

914 CPol->setImm(CPol->getImm() | Bits);

915 return true;

916}

917

918bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const {

919 assert((ST.hasGloballyAddressableScratch() ||

920 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||

921 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&

922 "scratch instructions should already be replaced by flat "

923 "instructions if GloballyAddressableScratch is enabled");

924 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;

925}

926

927

928std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) {

929 GCNSubtarget::Generation Generation = ST.getGeneration();

930 if (Generation < AMDGPUSubtarget::GFX10)

931 return std::make_unique(ST);

932 if (Generation < AMDGPUSubtarget::GFX12)

933 return std::make_unique(ST);

934 return std::make_unique(ST);

935}

936

937bool SIGfx6CacheControl::enableLoadCacheBypass(

939 SIAtomicScope Scope,

940 SIAtomicAddrSpace AddrSpace) const {

941 assert(MI->mayLoad() && MI->mayStore());

942

943 if (!canAffectGlobalAddrSpace(AddrSpace)) {

944

945

946

947

948

949

950 return false;

951 }

952

954 switch (Scope) {

955 case SIAtomicScope::SYSTEM:

956 if (ST.hasGFX940Insts()) {

957

959 break;

960 }

961 [[fallthrough]];

962 case SIAtomicScope::AGENT:

963 if (ST.hasGFX940Insts()) {

964

966 } else {

967

968

970 }

971 break;

972 case SIAtomicScope::WORKGROUP:

973 if (ST.hasGFX940Insts()) {

974

975

976

977

978

980 } else if (ST.hasGFX90AInsts()) {

981

982

983

984

985 if (ST.isTgSplitEnabled())

987 }

988 break;

989 case SIAtomicScope::WAVEFRONT:

990 case SIAtomicScope::SINGLETHREAD:

991

992 break;

993 default:

995 }

996

998}

999

1000bool SIGfx6CacheControl::enableStoreCacheBypass(

1002 SIAtomicScope Scope,

1003 SIAtomicAddrSpace AddrSpace) const {

1004 assert(MI->mayLoad() && MI->mayStore());

1006

1007

1008

1009

1010

1011 if (ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {

1012 switch (Scope) {

1013 case SIAtomicScope::SYSTEM:

1014

1016 break;

1017 case SIAtomicScope::AGENT:

1018

1020 break;

1021 case SIAtomicScope::WORKGROUP:

1022

1024 break;

1025 case SIAtomicScope::WAVEFRONT:

1026 case SIAtomicScope::SINGLETHREAD:

1027

1028 break;

1029 default:

1031 }

1032

1033

1034

1035

1036

1037

1038

1039 }

1040

1042}

1043

1044bool SIGfx6CacheControl::enableRMWCacheBypass(

1046 SIAtomicScope Scope,

1047 SIAtomicAddrSpace AddrSpace) const {

1048 assert(MI->mayLoad() && MI->mayStore());

1050

1051

1052

1053

1054

1055

1056

1057 if (ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {

1058 switch (Scope) {

1059 case SIAtomicScope::SYSTEM:

1060

1062 break;

1063 case SIAtomicScope::AGENT:

1064 case SIAtomicScope::WORKGROUP:

1065 case SIAtomicScope::WAVEFRONT:

1066 case SIAtomicScope::SINGLETHREAD:

1067

1068

1069

1070

1071 break;

1072 default:

1074 }

1075 }

1076

1078}

1079

1080bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(

1082 bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {

1083

1084

1085

1087

1088

1089

1090

1091

1092 assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);

1093

1095

1096 if (IsVolatile) {

1097 if (ST.hasGFX940Insts()) {

1098

1100 } else if (Op == SIMemOp::LOAD) {

1101

1102

1103

1105 }

1106

1107

1108

1109

1110

1111

1112 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,

1113 Position::AFTER, AtomicOrdering::Unordered,

1114 false);

1115

1117 }

1118

1119 if (IsNonTemporal) {

1120 if (ST.hasGFX940Insts()) {

1122 } else {

1123

1124

1126 }

1128 }

1129

1131}

1132

1134 SIAtomicScope Scope,

1135 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

1136 bool IsCrossAddrSpaceOrdering, Position Pos,

1138 bool AtomicsOnly) const {

1140

1141 MachineBasicBlock &MBB = *MI->getParent();

1143

1144 if (Pos == Position::AFTER)

1145 ++MI;

1146

1147

1148 if (ST.hasGFX90AInsts() && ST.isTgSplitEnabled()) {

1149

1150

1151

1152

1153

1154

1155

1156 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |

1157 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&

1158 (Scope == SIAtomicScope::WORKGROUP)) {

1159

1160 Scope = SIAtomicScope::AGENT;

1161 }

1162

1163

1164 AddrSpace &= ~SIAtomicAddrSpace::LDS;

1165 }

1166

1167 bool VMCnt = false;

1168 bool LGKMCnt = false;

1169

1170 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=

1171 SIAtomicAddrSpace::NONE) {

1172 switch (Scope) {

1173 case SIAtomicScope::SYSTEM:

1174 case SIAtomicScope::AGENT:

1175 VMCnt |= true;

1176 break;

1177 case SIAtomicScope::WORKGROUP:

1178 case SIAtomicScope::WAVEFRONT:

1179 case SIAtomicScope::SINGLETHREAD:

1180

1181

1182 break;

1183 default:

1185 }

1186 }

1187

1188 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {

1189 switch (Scope) {

1190 case SIAtomicScope::SYSTEM:

1191 case SIAtomicScope::AGENT:

1192 case SIAtomicScope::WORKGROUP:

1193

1194

1195

1196

1197

1198

1199 LGKMCnt |= IsCrossAddrSpaceOrdering;

1200 break;

1201 case SIAtomicScope::WAVEFRONT:

1202 case SIAtomicScope::SINGLETHREAD:

1203

1204

1205 break;

1206 default:

1208 }

1209 }

1210

1211 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {

1212 switch (Scope) {

1213 case SIAtomicScope::SYSTEM:

1214 case SIAtomicScope::AGENT:

1215

1216

1217

1218

1219

1220

1221 LGKMCnt |= IsCrossAddrSpaceOrdering;

1222 break;

1223 case SIAtomicScope::WORKGROUP:

1224 case SIAtomicScope::WAVEFRONT:

1225 case SIAtomicScope::SINGLETHREAD:

1226

1227

1228 break;

1229 default:

1231 }

1232 }

1233

1234 if (VMCnt || LGKMCnt) {

1235 unsigned WaitCntImmediate =

1241 .addImm(WaitCntImmediate);

1243 }

1244

1245

1246

1247

1249 Scope == SIAtomicScope::WORKGROUP &&

1250 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {

1253 }

1254

1255 if (Pos == Position::AFTER)

1256 --MI;

1257

1259}

1260

1263 return false;

1264 return !ST.isAmdPalOS() && !ST.isMesa3DOS();

1265}

1266

1268 SIAtomicScope Scope,

1269 SIAtomicAddrSpace AddrSpace,

1270 Position Pos) const {

1271 if (!InsertCacheInv)

1272 return false;

1273

1275

1276 MachineBasicBlock &MBB = *MI->getParent();

1278

1279 if (Pos == Position::AFTER)

1280 ++MI;

1281

1283 ? AMDGPU::BUFFER_WBINVL1_VOL

1284 : AMDGPU::BUFFER_WBINVL1;

1285

1286 if (canAffectGlobalAddrSpace(AddrSpace)) {

1287 switch (Scope) {

1288 case SIAtomicScope::SYSTEM:

1289 if (ST.hasGFX940Insts()) {

1290

1291

1292

1294

1296

1297

1298

1299

1300

1302 break;

1303 }

1304

1305 if (ST.hasGFX90AInsts()) {

1306

1307

1308

1311

1312

1313

1314

1315

1317 break;

1318 }

1319 [[fallthrough]];

1320 case SIAtomicScope::AGENT:

1321 if (ST.hasGFX940Insts()) {

1322

1323

1324

1326

1328

1329

1330

1331

1332 } else

1335 break;

1336 case SIAtomicScope::WORKGROUP:

1337 if (ST.isTgSplitEnabled()) {

1338 if (ST.hasGFX940Insts()) {

1339

1340

1341

1342

1343

1344

1345

1346

1347

1349

1351

1352

1353

1354

1355

1357 } else if (ST.hasGFX90AInsts()) {

1360 }

1361 }

1362 break;

1363 case SIAtomicScope::WAVEFRONT:

1364 case SIAtomicScope::SINGLETHREAD:

1365

1366

1367

1368 break;

1369 default:

1371 }

1372 }

1373

1374

1375

1376

1377

1378

1379

1380

1381 if (Pos == Position::AFTER)

1382 --MI;

1383

1385}

1386

1388 SIAtomicScope Scope,

1389 SIAtomicAddrSpace AddrSpace,

1390 bool IsCrossAddrSpaceOrdering,

1391 Position Pos) const {

1393

1394 if (ST.hasGFX90AInsts()) {

1395 MachineBasicBlock &MBB = *MI->getParent();

1397

1398 if (Pos == Position::AFTER)

1399 ++MI;

1400

1401 if (canAffectGlobalAddrSpace(AddrSpace)) {

1402 switch (Scope) {

1403 case SIAtomicScope::SYSTEM:

1404

1405

1406

1407

1408

1409

1411

1414 break;

1415 case SIAtomicScope::AGENT:

1416 if (ST.hasGFX940Insts()) {

1418

1420

1421

1422

1423

1425 }

1426 break;

1427 case SIAtomicScope::WORKGROUP:

1428 case SIAtomicScope::WAVEFRONT:

1429 case SIAtomicScope::SINGLETHREAD:

1430

1431

1432

1433 break;

1434 default:

1436 }

1437 }

1438

1439 if (Pos == Position::AFTER)

1440 --MI;

1441 }

1442

1443

1444

1445 Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,

1446 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,

1447 false);

1448

1450}

1451

1452bool SIGfx10CacheControl::enableLoadCacheBypass(

1454 SIAtomicAddrSpace AddrSpace) const {

1455 assert(MI->mayLoad() && MI->mayStore());

1457

1458 if (canAffectGlobalAddrSpace(AddrSpace)) {

1459 switch (Scope) {

1460 case SIAtomicScope::SYSTEM:

1461 case SIAtomicScope::AGENT:

1462

1463

1464

1467 break;

1468 case SIAtomicScope::WORKGROUP:

1469

1470

1471

1472

1473 if (ST.isCuModeEnabled())

1475 break;

1476 case SIAtomicScope::WAVEFRONT:

1477 case SIAtomicScope::SINGLETHREAD:

1478

1479 break;

1480 default:

1482 }

1483 }

1484

1485

1486

1487

1488

1489

1490

1491

1493}

1494

1495bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(

1497 bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {

1498

1499

1500

1501

1503

1504

1505

1506

1507

1508 assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);

1509

1511

1512 if (IsVolatile) {

1513

1514

1515

1516 if (Op == SIMemOp::LOAD) {

1518 }

1519

1520

1523

1524

1525

1526

1527

1528

1529 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,

1530 Position::AFTER, AtomicOrdering::Unordered,

1531 false);

1533 }

1534

1535 if (IsNonTemporal) {

1536

1537

1538

1539

1540 if (Op == SIMemOp::STORE)

1543

1544

1547

1549 }

1550

1552}

1553

1555 SIAtomicScope Scope,

1556 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

1557 bool IsCrossAddrSpaceOrdering,

1559 bool AtomicsOnly) const {

1561

1562 MachineBasicBlock &MBB = *MI->getParent();

1564

1565 if (Pos == Position::AFTER)

1566 ++MI;

1567

1568 bool VMCnt = false;

1569 bool VSCnt = false;

1570 bool LGKMCnt = false;

1571

1572 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=

1573 SIAtomicAddrSpace::NONE) {

1574 switch (Scope) {

1575 case SIAtomicScope::SYSTEM:

1576 case SIAtomicScope::AGENT:

1577 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)

1578 VMCnt |= true;

1579 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)

1580 VSCnt |= true;

1581 break;

1582 case SIAtomicScope::WORKGROUP:

1583

1584

1585

1586

1587

1588

1589

1590

1592 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)

1593 VMCnt |= true;

1594 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)

1595 VSCnt |= true;

1596 }

1597 break;

1598 case SIAtomicScope::WAVEFRONT:

1599 case SIAtomicScope::SINGLETHREAD:

1600

1601

1602 break;

1603 default:

1605 }

1606 }

1607

1608 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {

1609 switch (Scope) {

1610 case SIAtomicScope::SYSTEM:

1611 case SIAtomicScope::AGENT:

1612 case SIAtomicScope::WORKGROUP:

1613

1614

1615

1616

1617

1618

1619 LGKMCnt |= IsCrossAddrSpaceOrdering;

1620 break;

1621 case SIAtomicScope::WAVEFRONT:

1622 case SIAtomicScope::SINGLETHREAD:

1623

1624

1625 break;

1626 default:

1628 }

1629 }

1630

1631 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {

1632 switch (Scope) {

1633 case SIAtomicScope::SYSTEM:

1634 case SIAtomicScope::AGENT:

1635

1636

1637

1638

1639

1640

1641 LGKMCnt |= IsCrossAddrSpaceOrdering;

1642 break;

1643 case SIAtomicScope::WORKGROUP:

1644 case SIAtomicScope::WAVEFRONT:

1645 case SIAtomicScope::SINGLETHREAD:

1646

1647

1648 break;

1649 default:

1651 }

1652 }

1653

1654 if (VMCnt || LGKMCnt) {

1655 unsigned WaitCntImmediate =

1661 .addImm(WaitCntImmediate);

1663 }

1664

1665

1666

1667

1669 Scope == SIAtomicScope::WORKGROUP &&

1670 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {

1673 }

1674

1675 if (VSCnt) {

1680 }

1681

1682 if (Pos == Position::AFTER)

1683 --MI;

1684

1686}

1687

1689 SIAtomicScope Scope,

1690 SIAtomicAddrSpace AddrSpace,

1691 Position Pos) const {

1692 if (!InsertCacheInv)

1693 return false;

1694

1696

1697 MachineBasicBlock &MBB = *MI->getParent();

1699

1700 if (Pos == Position::AFTER)

1701 ++MI;

1702

1703 if (canAffectGlobalAddrSpace(AddrSpace)) {

1704 switch (Scope) {

1705 case SIAtomicScope::SYSTEM:

1706 case SIAtomicScope::AGENT:

1707

1708

1709

1713 break;

1714 case SIAtomicScope::WORKGROUP:

1715

1716

1717

1718

1719 if (ST.isCuModeEnabled()) {

1722 }

1723 break;

1724 case SIAtomicScope::WAVEFRONT:

1725 case SIAtomicScope::SINGLETHREAD:

1726

1727 break;

1728 default:

1730 }

1731 }

1732

1733

1734

1735

1736

1737

1738

1739

1740 if (Pos == Position::AFTER)

1741 --MI;

1742

1744}

1745

1748 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);

1749 if (!CPol)

1750 return false;

1751

1755 return true;

1756 }

1757

1758 return false;

1759}

1760

1763 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);

1764 if (!CPol)

1765 return false;

1766

1770 return true;

1771 }

1772

1773 return false;

1774}

1775

1776bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(

1778

1779

1780 MachineBasicBlock &MBB = *MI->getParent();

1782

1784 if (ST.hasImageInsts()) {

1787 }

1790

1791 return true;

1792}

1793

1795 SIAtomicScope Scope,

1796 SIAtomicAddrSpace AddrSpace, SIMemOp Op,

1797 bool IsCrossAddrSpaceOrdering,

1799 bool AtomicsOnly) const {

1801

1802 MachineBasicBlock &MBB = *MI->getParent();

1804

1805 bool LOADCnt = false;

1806 bool DSCnt = false;

1807 bool STORECnt = false;

1808

1809 if (Pos == Position::AFTER)

1810 ++MI;

1811

1812 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=

1813 SIAtomicAddrSpace::NONE) {

1814 switch (Scope) {

1815 case SIAtomicScope::SYSTEM:

1816 case SIAtomicScope::AGENT:

1817 case SIAtomicScope::CLUSTER:

1818 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)

1819 LOADCnt |= true;

1820 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)

1821 STORECnt |= true;

1822 break;

1823 case SIAtomicScope::WORKGROUP:

1824

1825

1826

1827

1828

1829

1830

1831

1832

1833

1834

1835

1836

1837

1838

1839

1840 if (ST.isCuModeEnabled() || ST.hasGFX1250Insts() ||

1842 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)

1843 LOADCnt |= true;

1844 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)

1845 STORECnt |= true;

1846 }

1847 break;

1848 case SIAtomicScope::WAVEFRONT:

1849 case SIAtomicScope::SINGLETHREAD:

1850

1851

1852 break;

1853 default:

1855 }

1856 }

1857

1858 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {

1859 switch (Scope) {

1860 case SIAtomicScope::SYSTEM:

1861 case SIAtomicScope::AGENT:

1862 case SIAtomicScope::CLUSTER:

1863 case SIAtomicScope::WORKGROUP:

1864

1865

1866

1867

1868

1869

1870 DSCnt |= IsCrossAddrSpaceOrdering;

1871 break;

1872 case SIAtomicScope::WAVEFRONT:

1873 case SIAtomicScope::SINGLETHREAD:

1874

1875

1876 break;

1877 default:

1879 }

1880 }

1881

1882 if (LOADCnt) {

1883

1884

1885

1886

1887

1888

1889

1890

1891

1892

1893

1894 if (!AtomicsOnly && ST.hasImageInsts()) {

1897 }

1900 }

1901

1902 if (STORECnt) {

1905 }

1906

1907 if (DSCnt) {

1910 }

1911

1912 if (Pos == Position::AFTER)

1913 --MI;

1914

1916}

1917

1919 SIAtomicScope Scope,

1920 SIAtomicAddrSpace AddrSpace,

1921 Position Pos) const {

1922 if (!InsertCacheInv)

1923 return false;

1924

1925 MachineBasicBlock &MBB = *MI->getParent();

1927

1928

1929

1930

1931

1932

1933

1934 if (!canAffectGlobalAddrSpace(AddrSpace))

1935 return false;

1936

1938 switch (Scope) {

1939 case SIAtomicScope::SYSTEM:

1941 break;

1942 case SIAtomicScope::AGENT:

1944 break;

1945 case SIAtomicScope::CLUSTER:

1947 break;

1948 case SIAtomicScope::WORKGROUP:

1949

1950

1951

1952

1953

1954

1955

1956 if (ST.isCuModeEnabled())

1957 return false;

1958

1960 break;

1961 case SIAtomicScope::WAVEFRONT:

1962 case SIAtomicScope::SINGLETHREAD:

1963

1964 return false;

1965 default:

1967 }

1968

1969 if (Pos == Position::AFTER)

1970 ++MI;

1971

1973

1974 if (Pos == Position::AFTER)

1975 --MI;

1976

1977 return true;

1978}

1979

1981 SIAtomicScope Scope,

1982 SIAtomicAddrSpace AddrSpace,

1983 bool IsCrossAddrSpaceOrdering,

1984 Position Pos) const {

1986

1987 MachineBasicBlock &MBB = *MI->getParent();

1989

1990

1991

1992

1993

1994 if (canAffectGlobalAddrSpace(AddrSpace)) {

1995 if (Pos == Position::AFTER)

1996 ++MI;

1997

1998

1999

2000

2001

2002

2003

2004 switch (Scope) {

2005 case SIAtomicScope::SYSTEM:

2009 break;

2010 case SIAtomicScope::AGENT:

2011

2012 if (ST.hasGFX1250Insts()) {

2016 }

2017 break;

2018 case SIAtomicScope::CLUSTER:

2019 case SIAtomicScope::WORKGROUP:

2020

2021 case SIAtomicScope::WAVEFRONT:

2022 case SIAtomicScope::SINGLETHREAD:

2023

2024 break;

2025 default:

2027 }

2028

2029 if (Pos == Position::AFTER)

2030 --MI;

2031 }

2032

2033

2034

2035

2036 Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,

2037 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,

2038 false);

2039

2041}

2042

2043bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(

2045 bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {

2046

2047

2049

2050

2051

2052

2053

2054 assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);

2055

2057

2058 if (IsLastUse) {

2059

2061 } else if (IsNonTemporal) {

2062

2064 }

2065

2066 if (IsVolatile) {

2068

2069 if (ST.requiresWaitXCntForSingleAccessInstructions() &&

2071 MachineBasicBlock &MBB = *MI->getParent();

2074 }

2075

2076

2077

2078

2079

2080

2081 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,

2082 Position::AFTER, AtomicOrdering::Unordered,

2083 false);

2084 }

2085

2087}

2088

2089bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {

2090 assert(MI.mayStore() && "Not a Store inst");

2091 const bool IsRMW = (MI.mayLoad() && MI.mayStore());

2093

2094 if (Atomic && ST.requiresWaitXCntForSingleAccessInstructions() &&

2096 MachineBasicBlock &MBB = *MI.getParent();

2099 }

2100

2101

2102 if (IsRMW)

2104

2105 MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);

2106 if (!CPol)

2109

2110

2111 if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&

2113 Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());

2114

2116}

2117

2118bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &MI) const {

2119 if (ST.hasGFX1250Insts())

2120 return false;

2121

2122

2123 MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);

2124 assert(CPol && "No CPol operand?");

2128 return false;

2129}

2130

2132 SIAtomicScope Scope,

2133 SIAtomicAddrSpace AddrSpace) const {

2135

2136 if (canAffectGlobalAddrSpace(AddrSpace)) {

2137 switch (Scope) {

2138 case SIAtomicScope::SYSTEM:

2140 break;

2141 case SIAtomicScope::AGENT:

2143 break;

2144 case SIAtomicScope::CLUSTER:

2146 break;

2147 case SIAtomicScope::WORKGROUP:

2148

2149

2150 if (ST.isCuModeEnabled())

2152 break;

2153 case SIAtomicScope::WAVEFRONT:

2154 case SIAtomicScope::SINGLETHREAD:

2155

2156 break;

2157 default:

2159 }

2160 }

2161

2162

2163

2164

2165

2166

2167

2168

2170}

2171

2172bool SIMemoryLegalizer::removeAtomicPseudoMIs() {

2173 if (AtomicPseudoMIs.empty())

2174 return false;

2175

2176 for (auto &MI : AtomicPseudoMIs)

2177 MI->eraseFromParent();

2178

2179 AtomicPseudoMIs.clear();

2180 return true;

2181}

2182

2183bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,

2185 assert(MI->mayLoad() && MI->mayStore());

2186

2188

2189 if (MOI.isAtomic()) {

2191 if (Order == AtomicOrdering::Monotonic ||

2192 Order == AtomicOrdering::Acquire ||

2193 Order == AtomicOrdering::SequentiallyConsistent) {

2194 Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),

2195 MOI.getOrderingAddrSpace());

2196 }

2197

2198

2199

2200 if (MOI.isCooperative())

2201 Changed |= CC->handleCooperativeAtomic(*MI);

2202

2203 if (Order == AtomicOrdering::SequentiallyConsistent)

2204 Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),

2205 SIMemOp::LOAD | SIMemOp::STORE,

2206 MOI.getIsCrossAddressSpaceOrdering(),

2207 Position::BEFORE, Order, false);

2208

2209 if (Order == AtomicOrdering::Acquire ||

2210 Order == AtomicOrdering::SequentiallyConsistent) {

2211

2213 CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(),

2214 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),

2215 Position::AFTER, Order, true);

2216 Changed |= CC->insertAcquire(MI, MOI.getScope(),

2217 MOI.getOrderingAddrSpace(),

2218 Position::AFTER);

2219 }

2220

2222 }

2223

2224

2225

2226

2227 Changed |= CC->enableVolatileAndOrNonTemporal(

2228 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),

2229 MOI.isNonTemporal(), MOI.isLastUse());

2230

2232}

2233

2234bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,

2236 assert(MI->mayLoad() && MI->mayStore());

2237

2239

2240 MachineInstr &StoreMI = *MI;

2241

2242 if (MOI.isAtomic()) {

2243 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||

2244 MOI.getOrdering() == AtomicOrdering::Release ||

2245 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {

2246 Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(),

2247 MOI.getOrderingAddrSpace());

2248 }

2249

2250

2251

2252 if (MOI.isCooperative())

2253 Changed |= CC->handleCooperativeAtomic(*MI);

2254

2255 if (MOI.getOrdering() == AtomicOrdering::Release ||

2256 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)

2257 Changed |= CC->insertRelease(MI, MOI.getScope(),

2258 MOI.getOrderingAddrSpace(),

2259 MOI.getIsCrossAddressSpaceOrdering(),

2260 Position::BEFORE);

2261

2262 Changed |= CC->finalizeStore(StoreMI, true);

2264 }

2265

2266

2267

2268

2269 Changed |= CC->enableVolatileAndOrNonTemporal(

2270 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),

2271 MOI.isNonTemporal());

2272

2273

2274

2275 Changed |= CC->finalizeStore(StoreMI, false);

2277}

2278

2279bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,

2281 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);

2282

2283 AtomicPseudoMIs.push_back(MI);

2285

2286 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();

2287

2288 if (MOI.isAtomic()) {

2290 if (Order == AtomicOrdering::Acquire) {

2291

2292 Changed |= CC->insertWait(MI, MOI.getScope(), OrderingAddrSpace,

2293 SIMemOp::LOAD | SIMemOp::STORE,

2294 MOI.getIsCrossAddressSpaceOrdering(),

2295 Position::BEFORE, Order, true);

2296 }

2297

2298 if (Order == AtomicOrdering::Release ||

2299 Order == AtomicOrdering::AcquireRelease ||

2300 Order == AtomicOrdering::SequentiallyConsistent)

2301

2302

2303

2304

2305

2306

2307

2308 Changed |= CC->insertRelease(MI, MOI.getScope(), OrderingAddrSpace,

2309 MOI.getIsCrossAddressSpaceOrdering(),

2310 Position::BEFORE);

2311

2312

2313

2314

2315

2316

2317 if (Order == AtomicOrdering::Acquire ||

2318 Order == AtomicOrdering::AcquireRelease ||

2319 Order == AtomicOrdering::SequentiallyConsistent)

2320 Changed |= CC->insertAcquire(MI, MOI.getScope(), OrderingAddrSpace,

2321 Position::BEFORE);

2322

2324 }

2325

2327}

2328

2329bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,

2331 assert(MI->mayLoad() && MI->mayStore());

2332

2334 MachineInstr &RMWMI = *MI;

2335

2336 if (MOI.isAtomic()) {

2338 if (Order == AtomicOrdering::Monotonic ||

2339 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||

2340 Order == AtomicOrdering::AcquireRelease ||

2341 Order == AtomicOrdering::SequentiallyConsistent) {

2342 Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(),

2343 MOI.getInstrAddrSpace());

2344 }

2345

2346 if (Order == AtomicOrdering::Release ||

2347 Order == AtomicOrdering::AcquireRelease ||

2348 Order == AtomicOrdering::SequentiallyConsistent ||

2349 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)

2350 Changed |= CC->insertRelease(MI, MOI.getScope(),

2351 MOI.getOrderingAddrSpace(),

2352 MOI.getIsCrossAddressSpaceOrdering(),

2353 Position::BEFORE);

2354

2355 if (Order == AtomicOrdering::Acquire ||

2356 Order == AtomicOrdering::AcquireRelease ||

2357 Order == AtomicOrdering::SequentiallyConsistent ||

2358 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||

2359 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {

2360

2362 CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(),

2363 isAtomicRet(*MI) ? SIMemOp::LOAD : SIMemOp::STORE,

2364 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,

2365 Order, true);

2366 Changed |= CC->insertAcquire(MI, MOI.getScope(),

2367 MOI.getOrderingAddrSpace(),

2368 Position::AFTER);

2369 }

2370

2371 Changed |= CC->finalizeStore(RMWMI, true);

2373 }

2374

2376}

2377

2378bool SIMemoryLegalizer::expandLDSDMA(const SIMemOpInfo &MOI,

2380 assert(MI->mayLoad() && MI->mayStore());

2381

2382

2383

2384 SIMemOp OpKind =

2386

2387

2388

2389

2390 return CC->enableVolatileAndOrNonTemporal(

2391 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),

2392 MOI.isNonTemporal(), MOI.isLastUse());

2393}

2394

2395bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {

2396 const MachineModuleInfo &MMI =

2397 getAnalysis().getMMI();

2398 return SIMemoryLegalizer(MMI).run(MF);

2399}

2400

2401PreservedAnalyses

2405 .getCachedResult(

2407 assert(MMI && "MachineModuleAnalysis must be available");

2408 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))

2411}

2412

2415

2418 CC = SICacheControl::create(ST);

2419

2420 for (auto &MBB : MF) {

2421 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {

2422

2423

2424 if (MI->isBundle() && MI->mayLoadOrStore()) {

2427 I != E && I->isBundledWithPred(); ++I) {

2428 I->unbundleFromPred();

2430 if (MO.isReg())

2431 MO.setIsInternalRead(false);

2432 }

2433

2434 MI->eraseFromParent();

2435 MI = II->getIterator();

2436 }

2437

2439 continue;

2440

2441 if (const auto &MOI = MOA.getLoadInfo(MI)) {

2442 Changed |= expandLoad(*MOI, MI);

2443 } else if (const auto &MOI = MOA.getStoreInfo(MI)) {

2444 Changed |= expandStore(*MOI, MI);

2445 } else if (const auto &MOI = MOA.getLDSDMAInfo(MI)) {

2446 Changed |= expandLDSDMA(*MOI, MI);

2447 } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) {

2448 Changed |= expandAtomicFence(*MOI, MI);

2449 } else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) {

2450 Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);

2451 }

2452 }

2453 }

2454

2455 Changed |= removeAtomicPseudoMIs();

2457}

2458

2460

2461char SIMemoryLegalizerLegacy::ID = 0;

2463

2465 return new SIMemoryLegalizerLegacy();

2466}

static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

AMDGPU address space definition.

Provides AMDGPU specific target descriptions.

AMDGPU Machine Module Info.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Atomic ordering constants.

AMD GCN specific subclass of TargetSubtarget.

This header defines various interfaces for pass management in LLVM.

This file provides utility for Memory Model Relaxation Annotations (MMRAs).

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))

#define PASS_NAME

Definition SIMemoryLegalizer.cpp:36

static bool canUseBUFFER_WBINVL1_VOL(const GCNSubtarget &ST)

Definition SIMemoryLegalizer.cpp:1261

static const uint32_t IV[8]

SyncScope::ID getWorkgroupSSID() const

SyncScope::ID getWavefrontSSID() const

SyncScope::ID getAgentSSID() const

SyncScope::ID getClusterOneAddressSpaceSSID() const

SyncScope::ID getClusterSSID() const

std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const

In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...

SyncScope::ID getAgentOneAddressSpaceSSID() const

SyncScope::ID getSingleThreadOneAddressSpaceSSID() const

SyncScope::ID getWavefrontOneAddressSpaceSSID() const

SyncScope::ID getSystemOneAddressSpaceSSID() const

SyncScope::ID getWorkgroupOneAddressSpaceSSID() const

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

Represents analyses that only rely on functions' control flow.

Diagnostic information for unsupported feature in backend.

FunctionPass class - This class is used to implement most global optimizations.

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

Module * getParent()

Get the module that this global value is contained inside of...

LLVM_ABI void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

A helper class to return the specified delimiter string after the first invocation of operator String...

const MCInstrDesc & get(unsigned Opcode) const

Return the machine instruction descriptor that corresponds to the specified instruction opcode.

Instructions::iterator instr_iterator

MachineInstrBundleIterator< MachineInstr > iterator

MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

const TargetSubtargetInfo & getSubtarget() const

getSubtarget - Return the subtarget for which this machine code is being compiled.

Function & getFunction()

Return the LLVM function that this machine code represents.

const MachineInstrBuilder & addImm(int64_t Val) const

Add a new immediate operand.

const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const

Add a new virtual register operand.

Representation of each machine instruction.

Ty & getObjFileInfo()

Keep track of various per-module pieces of information for backends that would like to do so.

MachineOperand class - Representation of each machine instruction operand.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

PreservedAnalyses & preserveSet()

Mark an analysis set as preserved.

static bool isVMEM(const MachineInstr &MI)

static bool mayWriteLDSThroughDMA(const MachineInstr &MI)

static bool isBUF(const MachineInstr &MI)

static bool isAtomicRet(const MachineInstr &MI)

static bool isAtomic(const MachineInstr &MI)

static bool isLDSDMA(const MachineInstr &MI)

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition SIMemoryLegalizer.cpp:2402

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...

StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...

StringRef - Represent a constant reference to a string, i.e.

A raw_ostream that writes to an SmallVector or SmallString.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ BUFFER_STRIDED_POINTER

Address space for 192-bit fat buffer pointers with an additional index.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ BUFFER_FAT_POINTER

Address space for 160-bit buffer fat pointers.

@ PRIVATE_ADDRESS

Address space for private memory.

@ BUFFER_RESOURCE

Address space for 128-bit buffer resources.

constexpr char IsVolatile[]

Key for Kernel::Arg::Metadata::mIsVolatile.

bool isGFX10(const MCSubtargetInfo &STI)

bool isGFX11(const MCSubtargetInfo &STI)

LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)

unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)

Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.

unsigned getVmcntBitMask(const IsaVersion &Version)

unsigned getLgkmcntBitMask(const IsaVersion &Version)

unsigned getExpcntBitMask(const IsaVersion &Version)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ Undef

Value of the register doesn't matter.

@ SingleThread

Synchronized with respect to signal handlers executing in the same thread.

@ System

Synchronized with respect to all concurrently executing threads.

initializer< Ty > init(const Ty &Val)

PointerTypeMap run(const Module &M)

Compute the PointerTypeMap for the module M.

Scope

Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...

NodeAddr< FuncNode * > Func

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy

Provide the ModuleAnalysisManager to Function proxy.

MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)

Builder interface. Specify how to create the initial instruction itself.

char & SIMemoryLegalizerID

Definition SIMemoryLegalizer.cpp:2462

LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

bool isReleaseOrStronger(AtomicOrdering AO)

LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()

Returns the minimum set of Analyses that all machine function passes must preserve.

MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)

Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...

static const MachineMemOperand::Flags MOCooperative

Mark the MMO of cooperative load/store atomics.

AtomicOrdering

Atomic ordering for LLVM's memory model.

@ LLVM_MARK_AS_BITMASK_ENUM

DWARFExpression::Operation Op

static const MachineMemOperand::Flags MOLastUse

Mark the MMO of a load as the last use.

FunctionPass * createSIMemoryLegalizerPass()

Definition SIMemoryLegalizer.cpp:2464