LLVM: lib/Target/AMDGPU/SIMemoryLegalizer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
31
32using namespace llvm;
34
35#define DEBUG_TYPE "si-memory-legalizer"
36#define PASS_NAME "SI Memory Legalizer"
37
40 cl::desc("Use this to skip inserting cache invalidating instructions."));
41
42namespace {
43
45
46
47enum class SIMemOp {
49 LOAD = 1u << 0,
50 STORE = 1u << 1,
52};
53
54
55
56enum class Position {
57 BEFORE,
58 AFTER
59};
60
61
62enum class SIAtomicScope {
64 SINGLETHREAD,
65 WAVEFRONT,
66 WORKGROUP,
67 CLUSTER,
68 AGENT,
69 SYSTEM
70};
71
72
73
74enum class SIAtomicAddrSpace {
76 GLOBAL = 1u << 0,
77 LDS = 1u << 1,
78 SCRATCH = 1u << 2,
79 GDS = 1u << 3,
80 OTHER = 1u << 4,
81
82
83 FLAT = GLOBAL | LDS | SCRATCH,
84
85
86 ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
87
88
89 ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
90
92};
93
94class SIMemOpInfo final {
95private:
96
97 friend class SIMemOpAccess;
98
100 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
101 SIAtomicScope Scope = SIAtomicScope::SYSTEM;
102 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
103 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
104 bool IsCrossAddressSpaceOrdering = false;
105 bool IsVolatile = false;
106 bool IsNonTemporal = false;
107 bool IsLastUse = false;
108 bool IsCooperative = false;
109
110
111 SIMemOpInfo(
112 const GCNSubtarget &ST,
113 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
114 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
115 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
116 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
117 bool IsCrossAddressSpaceOrdering = true,
118 AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
119 bool IsVolatile = false, bool IsNonTemporal = false,
120 bool IsLastUse = false, bool IsCooperative = false)
121 : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
122 OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
123 IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
124 IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
125 IsLastUse(IsLastUse), IsCooperative(IsCooperative) {
126
127 if (Ordering == AtomicOrdering::NotAtomic) {
128 assert(!IsCooperative && "Cannot be cooperative & non-atomic!");
129 assert(Scope == SIAtomicScope::NONE &&
130 OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
131 !IsCrossAddressSpaceOrdering &&
132 FailureOrdering == AtomicOrdering::NotAtomic);
133 return;
134 }
135
136 assert(Scope != SIAtomicScope::NONE &&
137 (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
138 SIAtomicAddrSpace::NONE &&
139 (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
140 SIAtomicAddrSpace::NONE);
141
142
143
144
145 if ((OrderingAddrSpace == InstrAddrSpace) &&
147 this->IsCrossAddressSpaceOrdering = false;
148
149
150
151 if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
152 SIAtomicAddrSpace::NONE) {
153 this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
154 } else if ((InstrAddrSpace &
155 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
156 SIAtomicAddrSpace::NONE) {
157 this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
158 } else if ((InstrAddrSpace &
159 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
160 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
161 this->Scope = std::min(Scope, SIAtomicScope::AGENT);
162 }
163
164
165
166 if (this->Scope == SIAtomicScope::CLUSTER && .hasClusters())
167 this->Scope = SIAtomicScope::AGENT;
168 }
169
170public:
171
172
173 SIAtomicScope getScope() const {
174 return Scope;
175 }
176
177
178
180 return Ordering;
181 }
182
183
184
186 return FailureOrdering;
187 }
188
189
190
191 SIAtomicAddrSpace getInstrAddrSpace() const {
192 return InstrAddrSpace;
193 }
194
195
196
197 SIAtomicAddrSpace getOrderingAddrSpace() const {
198 return OrderingAddrSpace;
199 }
200
201
202
203 bool getIsCrossAddressSpaceOrdering() const {
204 return IsCrossAddressSpaceOrdering;
205 }
206
207
208
209 bool isVolatile() const {
210 return IsVolatile;
211 }
212
213
214
215 bool isNonTemporal() const {
216 return IsNonTemporal;
217 }
218
219
220
221 bool isLastUse() const { return IsLastUse; }
222
223
224 bool isCooperative() const { return IsCooperative; }
225
226
227
228 bool isAtomic() const {
229 return Ordering != AtomicOrdering::NotAtomic;
230 }
231
232};
233
234class SIMemOpAccess final {
235private:
236 const AMDGPUMachineModuleInfo *MMI = nullptr;
237 const GCNSubtarget &ST;
238
239
241 const char *Msg) const;
242
243
244
245
246
247 std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
248 toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
249
250
251 SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
252
253
254
255 std::optional
257
258public:
259
260
261 SIMemOpAccess(const AMDGPUMachineModuleInfo &MMI, const GCNSubtarget &ST);
262
263
264 std::optional
266
267
268
269 std::optional
271
272
273
274 std::optional
276
277
278
279 std::optional
281
282
283
284
285 std::optional
287};
288
289class SICacheControl {
290protected:
291
292
293 const GCNSubtarget &ST;
294
295
296 const SIInstrInfo *TII = nullptr;
297
298 IsaVersion IV;
299
300
301 bool InsertCacheInv;
302
303 SICacheControl(const GCNSubtarget &ST);
304
305
306
308 unsigned Bits) const;
309
310
311
312 bool canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const;
313
314public:
316
317
318 static std::unique_ptr create(const GCNSubtarget &ST);
319
320
321
322
324 SIAtomicScope Scope,
325 SIAtomicAddrSpace AddrSpace) const = 0;
326
327
328
329
331 SIAtomicScope Scope,
332 SIAtomicAddrSpace AddrSpace) const = 0;
333
334
335
336
338 SIAtomicScope Scope,
339 SIAtomicAddrSpace AddrSpace) const = 0;
340
341
342
343
345 SIAtomicAddrSpace AddrSpace,
346 SIMemOp Op, bool IsVolatile,
347 bool IsNonTemporal,
348 bool IsLastUse = false) const = 0;
349
350
351
352
353
354
355 virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {
356 return false;
357 };
358
359
360 virtual bool handleCooperativeAtomic(MachineInstr &MI) const {
362 "cooperative atomics are not available on this architecture");
363 }
364
365
366
367
368
369
370
371
372
373
375 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
376 bool IsCrossAddrSpaceOrdering, Position Pos,
378
379
380
381
382
383
385 SIAtomicScope Scope,
386 SIAtomicAddrSpace AddrSpace,
387 Position Pos) const = 0;
388
389
390
391
392
393
394
396 SIAtomicScope Scope,
397 SIAtomicAddrSpace AddrSpace,
398 bool IsCrossAddrSpaceOrdering,
399 Position Pos) const = 0;
400
401
402 virtual ~SICacheControl() = default;
403};
404
405
406
407class SIGfx6CacheControl final : public SICacheControl {
408public:
409
410 SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
411
413 SIAtomicScope Scope,
414 SIAtomicAddrSpace AddrSpace) const override;
415
417 SIAtomicScope Scope,
418 SIAtomicAddrSpace AddrSpace) const override;
419
421 SIAtomicScope Scope,
422 SIAtomicAddrSpace AddrSpace) const override;
423
425 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
426 bool IsVolatile, bool IsNonTemporal,
427 bool IsLastUse) const override;
428
430 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
431 bool IsCrossAddrSpaceOrdering, Position Pos,
432 AtomicOrdering Order, bool AtomicsOnly) const override;
433
435 SIAtomicScope Scope,
436 SIAtomicAddrSpace AddrSpace,
437 Position Pos) const override;
438
440 SIAtomicScope Scope,
441 SIAtomicAddrSpace AddrSpace,
442 bool IsCrossAddrSpaceOrdering,
443 Position Pos) const override;
444};
445
446
447class SIGfx10CacheControl final : public SICacheControl {
448public:
449 SIGfx10CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
450
452 SIAtomicScope Scope,
453 SIAtomicAddrSpace AddrSpace) const override;
454
456 SIAtomicScope Scope,
457 SIAtomicAddrSpace AddrSpace) const override {
458 return false;
459 }
460
462 SIAtomicScope Scope,
463 SIAtomicAddrSpace AddrSpace) const override {
464 return false;
465 }
466
468 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
469 bool IsVolatile, bool IsNonTemporal,
470 bool IsLastUse) const override;
471
473 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
474 bool IsCrossAddrSpaceOrdering, Position Pos,
475 AtomicOrdering Order, bool AtomicsOnly) const override;
476
478 SIAtomicAddrSpace AddrSpace, Position Pos) const override;
479
481 SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
482 Position Pos) const override {
483 return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
484 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
485 false);
486 }
487};
488
489class SIGfx12CacheControl final : public SICacheControl {
490protected:
491
492
495
496
497
500
501
502
503
504
505
506
507 bool
509
511 SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const;
512
513public:
514 SIGfx12CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {
515
516
517 assert(.hasGFX1250Insts() || ST.isCuModeEnabled());
518 }
519
521 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
522 bool IsCrossAddrSpaceOrdering, Position Pos,
523 AtomicOrdering Order, bool AtomicsOnly) const override;
524
526 SIAtomicAddrSpace AddrSpace, Position Pos) const override;
527
529 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
530 bool IsVolatile, bool IsNonTemporal,
531 bool IsLastUse) const override;
532
533 bool finalizeStore(MachineInstr &MI, bool Atomic) const override;
534
535 bool handleCooperativeAtomic(MachineInstr &MI) const override;
536
538 SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
539 Position Pos) const override;
540
542 SIAtomicScope Scope,
543 SIAtomicAddrSpace AddrSpace) const override {
544 return setAtomicScope(MI, Scope, AddrSpace);
545 }
546
548 SIAtomicScope Scope,
549 SIAtomicAddrSpace AddrSpace) const override {
550 return setAtomicScope(MI, Scope, AddrSpace);
551 }
552
554 SIAtomicScope Scope,
555 SIAtomicAddrSpace AddrSpace) const override {
556 return setAtomicScope(MI, Scope, AddrSpace);
557 }
558};
559
560class SIMemoryLegalizer final {
561private:
562 const MachineModuleInfo &MMI;
563
564 std::unique_ptr CC = nullptr;
565
566
567 std::listMachineBasicBlock::iterator AtomicPseudoMIs;
568
569
570
571 bool isAtomicRet(const MachineInstr &MI) const {
573 }
574
575
576
577 bool removeAtomicPseudoMIs();
578
579
580
581 bool expandLoad(const SIMemOpInfo &MOI,
583
584
585 bool expandStore(const SIMemOpInfo &MOI,
587
588
589 bool expandAtomicFence(const SIMemOpInfo &MOI,
591
592
593 bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
595
596
598
599public:
600 SIMemoryLegalizer(const MachineModuleInfo &MMI) : MMI(MMI) {};
601 bool run(MachineFunction &MF);
602};
603
605public:
606 static char ID;
607
608 SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
609
610 void getAnalysisUsage(AnalysisUsage &AU) const override {
613 }
614
615 StringRef getPassName() const override {
617 }
618
619 bool runOnMachineFunction(MachineFunction &MF) override;
620};
621
623 {"global", SIAtomicAddrSpace::GLOBAL},
624 {"local", SIAtomicAddrSpace::LDS},
625}};
626
632 OS << "unknown address space '" << AS << "'; expected one of ";
634 for (const auto &[Name, Val] : ASNames)
635 OS << LS << '\'' << Name << '\'';
638}
639
640
641
642
643static std::optional
645 static constexpr StringLiteral FenceASPrefix = "amdgpu-synchronize-as";
646
648 if (!MMRA)
649 return std::nullopt;
650
651 SIAtomicAddrSpace Result = SIAtomicAddrSpace::NONE;
652 for (const auto &[Prefix, Suffix] : MMRA) {
653 if (Prefix != FenceASPrefix)
654 continue;
655
656 if (auto It = ASNames.find(Suffix); It != ASNames.end())
657 Result |= It->second;
658 else
659 diagnoseUnknownMMRAASName(MI, Suffix);
660 }
661
662 if (Result == SIAtomicAddrSpace::NONE)
663 return std::nullopt;
664
666}
667
668}
669
671 const char *Msg) const {
673 Func.getContext().diagnose(
674 DiagnosticInfoUnsupported(Func, Msg, MI->getDebugLoc()));
675}
676
677std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
678SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
679 SIAtomicAddrSpace InstrAddrSpace) const {
681 return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC, true);
683 return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC, true);
685 return std::tuple(SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC, true);
687 return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
688 true);
690 return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
691 true);
693 return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
694 true);
696 return std::tuple(SIAtomicScope::SYSTEM,
697 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
699 return std::tuple(SIAtomicScope::AGENT,
700 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
702 return std::tuple(SIAtomicScope::CLUSTER,
703 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
705 return std::tuple(SIAtomicScope::WORKGROUP,
706 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
708 return std::tuple(SIAtomicScope::WAVEFRONT,
709 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
711 return std::tuple(SIAtomicScope::SINGLETHREAD,
712 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
713 return std::nullopt;
714}
715
716SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
718 return SIAtomicAddrSpace::FLAT;
720 return SIAtomicAddrSpace::GLOBAL;
722 return SIAtomicAddrSpace::LDS;
724 return SIAtomicAddrSpace::SCRATCH;
726 return SIAtomicAddrSpace::GDS;
729 return SIAtomicAddrSpace::GLOBAL;
730
731 return SIAtomicAddrSpace::OTHER;
732}
733
734SIMemOpAccess::SIMemOpAccess(const AMDGPUMachineModuleInfo &MMI_,
735 const GCNSubtarget &ST)
737
738std::optional SIMemOpAccess::constructFromMIWithMMO(
740 assert(MI->getNumMemOperands() > 0);
741
744 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
745 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
746 bool IsNonTemporal = true;
748 bool IsLastUse = false;
749 bool IsCooperative = false;
750
751
752
753 for (const auto &MMO : MI->memoperands()) {
754 IsNonTemporal &= MMO->isNonTemporal();
756 IsLastUse |= MMO->getFlags() & MOLastUse;
757 IsCooperative |= MMO->getFlags() & MOCooperative;
758 InstrAddrSpace |=
759 toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
760 AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
761 if (OpOrdering != AtomicOrdering::NotAtomic) {
762 const auto &IsSyncScopeInclusion =
764 if (!IsSyncScopeInclusion) {
765 reportUnsupported(MI,
766 "Unsupported non-inclusive atomic synchronization scope");
767 return std::nullopt;
768 }
769
770 SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
772 assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
773 MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
774 FailureOrdering =
776 }
777 }
778
779
780
781
784 Ordering = AtomicOrdering::Monotonic;
785
786 SIAtomicScope Scope = SIAtomicScope::NONE;
787 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
788 bool IsCrossAddressSpaceOrdering = false;
789 if (Ordering != AtomicOrdering::NotAtomic) {
790 auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
791 if (!ScopeOrNone) {
792 reportUnsupported(MI, "Unsupported atomic synchronization scope");
793 return std::nullopt;
794 }
795 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
796 *ScopeOrNone;
797 if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
798 ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
799 ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
800 reportUnsupported(MI, "Unsupported atomic address space");
801 return std::nullopt;
802 }
803 }
804 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
805 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
806 IsNonTemporal, IsLastUse, IsCooperative);
807}
808
809std::optional
812
813 if (!(MI->mayLoad() && ->mayStore()))
814 return std::nullopt;
815
816
817 if (MI->getNumMemOperands() == 0)
818 return SIMemOpInfo(ST);
819
820 return constructFromMIWithMMO(MI);
821}
822
823std::optional
826
827 if (!(->mayLoad() && MI->mayStore()))
828 return std::nullopt;
829
830
831 if (MI->getNumMemOperands() == 0)
832 return SIMemOpInfo(ST);
833
834 return constructFromMIWithMMO(MI);
835}
836
837std::optional
840
841 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
842 return std::nullopt;
843
846
848 auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
849 if (!ScopeOrNone) {
850 reportUnsupported(MI, "Unsupported atomic synchronization scope");
851 return std::nullopt;
852 }
853
854 SIAtomicScope Scope = SIAtomicScope::NONE;
855 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
856 bool IsCrossAddressSpaceOrdering = false;
857 std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
858 *ScopeOrNone;
859
860 if (OrderingAddrSpace != SIAtomicAddrSpace::ATOMIC) {
861
862
863
864
865 reportUnsupported(MI, "Unsupported atomic address space");
866 return std::nullopt;
867 }
868
869 auto SynchronizeAS = getSynchronizeAddrSpaceMD(*MI);
870 if (SynchronizeAS)
871 OrderingAddrSpace = *SynchronizeAS;
872
873 return SIMemOpInfo(ST, Ordering, Scope, OrderingAddrSpace,
874 SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
875 AtomicOrdering::NotAtomic);
876}
877
878std::optional SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
881
882 if (!(MI->mayLoad() && MI->mayStore()))
883 return std::nullopt;
884
885
886 if (MI->getNumMemOperands() == 0)
887 return SIMemOpInfo(ST);
888
889 return constructFromMIWithMMO(MI);
890}
891
892std::optional
895
897 return std::nullopt;
898
899 return constructFromMIWithMMO(MI);
900}
901
902SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
906}
907
909 unsigned Bits) const {
910 MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
911 if (!CPol)
912 return false;
913
914 CPol->setImm(CPol->getImm() | Bits);
915 return true;
916}
917
918bool SICacheControl::canAffectGlobalAddrSpace(SIAtomicAddrSpace AS) const {
919 assert((.hasGloballyAddressableScratch() ||
920 (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
921 (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
922 "scratch instructions should already be replaced by flat "
923 "instructions if GloballyAddressableScratch is enabled");
924 return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
925}
926
927
928std::unique_ptr SICacheControl::create(const GCNSubtarget &ST) {
929 GCNSubtarget::Generation Generation = ST.getGeneration();
930 if (Generation < AMDGPUSubtarget::GFX10)
931 return std::make_unique(ST);
932 if (Generation < AMDGPUSubtarget::GFX12)
933 return std::make_unique(ST);
934 return std::make_unique(ST);
935}
936
937bool SIGfx6CacheControl::enableLoadCacheBypass(
939 SIAtomicScope Scope,
940 SIAtomicAddrSpace AddrSpace) const {
941 assert(MI->mayLoad() && ->mayStore());
942
943 if (!canAffectGlobalAddrSpace(AddrSpace)) {
944
945
946
947
948
949
950 return false;
951 }
952
954 switch (Scope) {
955 case SIAtomicScope::SYSTEM:
956 if (ST.hasGFX940Insts()) {
957
959 break;
960 }
961 [[fallthrough]];
962 case SIAtomicScope::AGENT:
963 if (ST.hasGFX940Insts()) {
964
966 } else {
967
968
970 }
971 break;
972 case SIAtomicScope::WORKGROUP:
973 if (ST.hasGFX940Insts()) {
974
975
976
977
978
980 } else if (ST.hasGFX90AInsts()) {
981
982
983
984
985 if (ST.isTgSplitEnabled())
987 }
988 break;
989 case SIAtomicScope::WAVEFRONT:
990 case SIAtomicScope::SINGLETHREAD:
991
992 break;
993 default:
995 }
996
998}
999
1000bool SIGfx6CacheControl::enableStoreCacheBypass(
1002 SIAtomicScope Scope,
1003 SIAtomicAddrSpace AddrSpace) const {
1004 assert(->mayLoad() && MI->mayStore());
1006
1007
1008
1009
1010
1011 if (ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1012 switch (Scope) {
1013 case SIAtomicScope::SYSTEM:
1014
1016 break;
1017 case SIAtomicScope::AGENT:
1018
1020 break;
1021 case SIAtomicScope::WORKGROUP:
1022
1024 break;
1025 case SIAtomicScope::WAVEFRONT:
1026 case SIAtomicScope::SINGLETHREAD:
1027
1028 break;
1029 default:
1031 }
1032
1033
1034
1035
1036
1037
1038
1039 }
1040
1042}
1043
1044bool SIGfx6CacheControl::enableRMWCacheBypass(
1046 SIAtomicScope Scope,
1047 SIAtomicAddrSpace AddrSpace) const {
1048 assert(MI->mayLoad() && MI->mayStore());
1050
1051
1052
1053
1054
1055
1056
1057 if (ST.hasGFX940Insts() && canAffectGlobalAddrSpace(AddrSpace)) {
1058 switch (Scope) {
1059 case SIAtomicScope::SYSTEM:
1060
1062 break;
1063 case SIAtomicScope::AGENT:
1064 case SIAtomicScope::WORKGROUP:
1065 case SIAtomicScope::WAVEFRONT:
1066 case SIAtomicScope::SINGLETHREAD:
1067
1068
1069
1070
1071 break;
1072 default:
1074 }
1075 }
1076
1078}
1079
1080bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
1082 bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
1083
1084
1085
1087
1088
1089
1090
1091
1092 assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1093
1095
1096 if (IsVolatile) {
1097 if (ST.hasGFX940Insts()) {
1098
1100 } else if (Op == SIMemOp::LOAD) {
1101
1102
1103
1105 }
1106
1107
1108
1109
1110
1111
1112 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1113 Position::AFTER, AtomicOrdering::Unordered,
1114 false);
1115
1117 }
1118
1119 if (IsNonTemporal) {
1120 if (ST.hasGFX940Insts()) {
1122 } else {
1123
1124
1126 }
1128 }
1129
1131}
1132
1134 SIAtomicScope Scope,
1135 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1136 bool IsCrossAddrSpaceOrdering, Position Pos,
1138 bool AtomicsOnly) const {
1140
1141 MachineBasicBlock &MBB = *MI->getParent();
1143
1144 if (Pos == Position::AFTER)
1145 ++MI;
1146
1147
1148 if (ST.hasGFX90AInsts() && ST.isTgSplitEnabled()) {
1149
1150
1151
1152
1153
1154
1155
1156 if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1157 SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1158 (Scope == SIAtomicScope::WORKGROUP)) {
1159
1160 Scope = SIAtomicScope::AGENT;
1161 }
1162
1163
1164 AddrSpace &= ~SIAtomicAddrSpace::LDS;
1165 }
1166
1167 bool VMCnt = false;
1168 bool LGKMCnt = false;
1169
1170 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1171 SIAtomicAddrSpace::NONE) {
1172 switch (Scope) {
1173 case SIAtomicScope::SYSTEM:
1174 case SIAtomicScope::AGENT:
1175 VMCnt |= true;
1176 break;
1177 case SIAtomicScope::WORKGROUP:
1178 case SIAtomicScope::WAVEFRONT:
1179 case SIAtomicScope::SINGLETHREAD:
1180
1181
1182 break;
1183 default:
1185 }
1186 }
1187
1188 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1189 switch (Scope) {
1190 case SIAtomicScope::SYSTEM:
1191 case SIAtomicScope::AGENT:
1192 case SIAtomicScope::WORKGROUP:
1193
1194
1195
1196
1197
1198
1199 LGKMCnt |= IsCrossAddrSpaceOrdering;
1200 break;
1201 case SIAtomicScope::WAVEFRONT:
1202 case SIAtomicScope::SINGLETHREAD:
1203
1204
1205 break;
1206 default:
1208 }
1209 }
1210
1211 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1212 switch (Scope) {
1213 case SIAtomicScope::SYSTEM:
1214 case SIAtomicScope::AGENT:
1215
1216
1217
1218
1219
1220
1221 LGKMCnt |= IsCrossAddrSpaceOrdering;
1222 break;
1223 case SIAtomicScope::WORKGROUP:
1224 case SIAtomicScope::WAVEFRONT:
1225 case SIAtomicScope::SINGLETHREAD:
1226
1227
1228 break;
1229 default:
1231 }
1232 }
1233
1234 if (VMCnt || LGKMCnt) {
1235 unsigned WaitCntImmediate =
1241 .addImm(WaitCntImmediate);
1243 }
1244
1245
1246
1247
1249 Scope == SIAtomicScope::WORKGROUP &&
1250 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1253 }
1254
1255 if (Pos == Position::AFTER)
1256 --MI;
1257
1259}
1260
1263 return false;
1264 return !ST.isAmdPalOS() && !ST.isMesa3DOS();
1265}
1266
1268 SIAtomicScope Scope,
1269 SIAtomicAddrSpace AddrSpace,
1270 Position Pos) const {
1271 if (!InsertCacheInv)
1272 return false;
1273
1275
1276 MachineBasicBlock &MBB = *MI->getParent();
1278
1279 if (Pos == Position::AFTER)
1280 ++MI;
1281
1283 ? AMDGPU::BUFFER_WBINVL1_VOL
1284 : AMDGPU::BUFFER_WBINVL1;
1285
1286 if (canAffectGlobalAddrSpace(AddrSpace)) {
1287 switch (Scope) {
1288 case SIAtomicScope::SYSTEM:
1289 if (ST.hasGFX940Insts()) {
1290
1291
1292
1294
1296
1297
1298
1299
1300
1302 break;
1303 }
1304
1305 if (ST.hasGFX90AInsts()) {
1306
1307
1308
1311
1312
1313
1314
1315
1317 break;
1318 }
1319 [[fallthrough]];
1320 case SIAtomicScope::AGENT:
1321 if (ST.hasGFX940Insts()) {
1322
1323
1324
1326
1328
1329
1330
1331
1332 } else
1335 break;
1336 case SIAtomicScope::WORKGROUP:
1337 if (ST.isTgSplitEnabled()) {
1338 if (ST.hasGFX940Insts()) {
1339
1340
1341
1342
1343
1344
1345
1346
1347
1349
1351
1352
1353
1354
1355
1357 } else if (ST.hasGFX90AInsts()) {
1360 }
1361 }
1362 break;
1363 case SIAtomicScope::WAVEFRONT:
1364 case SIAtomicScope::SINGLETHREAD:
1365
1366
1367
1368 break;
1369 default:
1371 }
1372 }
1373
1374
1375
1376
1377
1378
1379
1380
1381 if (Pos == Position::AFTER)
1382 --MI;
1383
1385}
1386
1388 SIAtomicScope Scope,
1389 SIAtomicAddrSpace AddrSpace,
1390 bool IsCrossAddrSpaceOrdering,
1391 Position Pos) const {
1393
1394 if (ST.hasGFX90AInsts()) {
1395 MachineBasicBlock &MBB = *MI->getParent();
1397
1398 if (Pos == Position::AFTER)
1399 ++MI;
1400
1401 if (canAffectGlobalAddrSpace(AddrSpace)) {
1402 switch (Scope) {
1403 case SIAtomicScope::SYSTEM:
1404
1405
1406
1407
1408
1409
1411
1414 break;
1415 case SIAtomicScope::AGENT:
1416 if (ST.hasGFX940Insts()) {
1418
1420
1421
1422
1423
1425 }
1426 break;
1427 case SIAtomicScope::WORKGROUP:
1428 case SIAtomicScope::WAVEFRONT:
1429 case SIAtomicScope::SINGLETHREAD:
1430
1431
1432
1433 break;
1434 default:
1436 }
1437 }
1438
1439 if (Pos == Position::AFTER)
1440 --MI;
1441 }
1442
1443
1444
1445 Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1446 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
1447 false);
1448
1450}
1451
1452bool SIGfx10CacheControl::enableLoadCacheBypass(
1454 SIAtomicAddrSpace AddrSpace) const {
1455 assert(MI->mayLoad() && ->mayStore());
1457
1458 if (canAffectGlobalAddrSpace(AddrSpace)) {
1459 switch (Scope) {
1460 case SIAtomicScope::SYSTEM:
1461 case SIAtomicScope::AGENT:
1462
1463
1464
1467 break;
1468 case SIAtomicScope::WORKGROUP:
1469
1470
1471
1472
1473 if (.isCuModeEnabled())
1475 break;
1476 case SIAtomicScope::WAVEFRONT:
1477 case SIAtomicScope::SINGLETHREAD:
1478
1479 break;
1480 default:
1482 }
1483 }
1484
1485
1486
1487
1488
1489
1490
1491
1493}
1494
1495bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1497 bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
1498
1499
1500
1501
1503
1504
1505
1506
1507
1508 assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1509
1511
1512 if (IsVolatile) {
1513
1514
1515
1516 if (Op == SIMemOp::LOAD) {
1518 }
1519
1520
1523
1524
1525
1526
1527
1528
1529 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1530 Position::AFTER, AtomicOrdering::Unordered,
1531 false);
1533 }
1534
1535 if (IsNonTemporal) {
1536
1537
1538
1539
1540 if (Op == SIMemOp::STORE)
1543
1544
1547
1549 }
1550
1552}
1553
1555 SIAtomicScope Scope,
1556 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1557 bool IsCrossAddrSpaceOrdering,
1559 bool AtomicsOnly) const {
1561
1562 MachineBasicBlock &MBB = *MI->getParent();
1564
1565 if (Pos == Position::AFTER)
1566 ++MI;
1567
1568 bool VMCnt = false;
1569 bool VSCnt = false;
1570 bool LGKMCnt = false;
1571
1572 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1573 SIAtomicAddrSpace::NONE) {
1574 switch (Scope) {
1575 case SIAtomicScope::SYSTEM:
1576 case SIAtomicScope::AGENT:
1577 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1578 VMCnt |= true;
1579 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1580 VSCnt |= true;
1581 break;
1582 case SIAtomicScope::WORKGROUP:
1583
1584
1585
1586
1587
1588
1589
1590
1592 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1593 VMCnt |= true;
1594 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1595 VSCnt |= true;
1596 }
1597 break;
1598 case SIAtomicScope::WAVEFRONT:
1599 case SIAtomicScope::SINGLETHREAD:
1600
1601
1602 break;
1603 default:
1605 }
1606 }
1607
1608 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1609 switch (Scope) {
1610 case SIAtomicScope::SYSTEM:
1611 case SIAtomicScope::AGENT:
1612 case SIAtomicScope::WORKGROUP:
1613
1614
1615
1616
1617
1618
1619 LGKMCnt |= IsCrossAddrSpaceOrdering;
1620 break;
1621 case SIAtomicScope::WAVEFRONT:
1622 case SIAtomicScope::SINGLETHREAD:
1623
1624
1625 break;
1626 default:
1628 }
1629 }
1630
1631 if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1632 switch (Scope) {
1633 case SIAtomicScope::SYSTEM:
1634 case SIAtomicScope::AGENT:
1635
1636
1637
1638
1639
1640
1641 LGKMCnt |= IsCrossAddrSpaceOrdering;
1642 break;
1643 case SIAtomicScope::WORKGROUP:
1644 case SIAtomicScope::WAVEFRONT:
1645 case SIAtomicScope::SINGLETHREAD:
1646
1647
1648 break;
1649 default:
1651 }
1652 }
1653
1654 if (VMCnt || LGKMCnt) {
1655 unsigned WaitCntImmediate =
1661 .addImm(WaitCntImmediate);
1663 }
1664
1665
1666
1667
1669 Scope == SIAtomicScope::WORKGROUP &&
1670 (AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1673 }
1674
1675 if (VSCnt) {
1680 }
1681
1682 if (Pos == Position::AFTER)
1683 --MI;
1684
1686}
1687
1689 SIAtomicScope Scope,
1690 SIAtomicAddrSpace AddrSpace,
1691 Position Pos) const {
1692 if (!InsertCacheInv)
1693 return false;
1694
1696
1697 MachineBasicBlock &MBB = *MI->getParent();
1699
1700 if (Pos == Position::AFTER)
1701 ++MI;
1702
1703 if (canAffectGlobalAddrSpace(AddrSpace)) {
1704 switch (Scope) {
1705 case SIAtomicScope::SYSTEM:
1706 case SIAtomicScope::AGENT:
1707
1708
1709
1713 break;
1714 case SIAtomicScope::WORKGROUP:
1715
1716
1717
1718
1719 if (.isCuModeEnabled()) {
1722 }
1723 break;
1724 case SIAtomicScope::WAVEFRONT:
1725 case SIAtomicScope::SINGLETHREAD:
1726
1727 break;
1728 default:
1730 }
1731 }
1732
1733
1734
1735
1736
1737
1738
1739
1740 if (Pos == Position::AFTER)
1741 --MI;
1742
1744}
1745
1748 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
1749 if (!CPol)
1750 return false;
1751
1755 return true;
1756 }
1757
1758 return false;
1759}
1760
1763 MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
1764 if (!CPol)
1765 return false;
1766
1770 return true;
1771 }
1772
1773 return false;
1774}
1775
1776bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
1778
1779
1780 MachineBasicBlock &MBB = *MI->getParent();
1782
1784 if (ST.hasImageInsts()) {
1787 }
1790
1791 return true;
1792}
1793
1795 SIAtomicScope Scope,
1796 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1797 bool IsCrossAddrSpaceOrdering,
1799 bool AtomicsOnly) const {
1801
1802 MachineBasicBlock &MBB = *MI->getParent();
1804
1805 bool LOADCnt = false;
1806 bool DSCnt = false;
1807 bool STORECnt = false;
1808
1809 if (Pos == Position::AFTER)
1810 ++MI;
1811
1812 if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1813 SIAtomicAddrSpace::NONE) {
1814 switch (Scope) {
1815 case SIAtomicScope::SYSTEM:
1816 case SIAtomicScope::AGENT:
1817 case SIAtomicScope::CLUSTER:
1818 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1819 LOADCnt |= true;
1820 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1821 STORECnt |= true;
1822 break;
1823 case SIAtomicScope::WORKGROUP:
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 if (.isCuModeEnabled() || ST.hasGFX1250Insts() ||
1842 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
1843 LOADCnt |= true;
1844 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
1845 STORECnt |= true;
1846 }
1847 break;
1848 case SIAtomicScope::WAVEFRONT:
1849 case SIAtomicScope::SINGLETHREAD:
1850
1851
1852 break;
1853 default:
1855 }
1856 }
1857
1858 if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1859 switch (Scope) {
1860 case SIAtomicScope::SYSTEM:
1861 case SIAtomicScope::AGENT:
1862 case SIAtomicScope::CLUSTER:
1863 case SIAtomicScope::WORKGROUP:
1864
1865
1866
1867
1868
1869
1870 DSCnt |= IsCrossAddrSpaceOrdering;
1871 break;
1872 case SIAtomicScope::WAVEFRONT:
1873 case SIAtomicScope::SINGLETHREAD:
1874
1875
1876 break;
1877 default:
1879 }
1880 }
1881
1882 if (LOADCnt) {
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894 if (!AtomicsOnly && ST.hasImageInsts()) {
1897 }
1900 }
1901
1902 if (STORECnt) {
1905 }
1906
1907 if (DSCnt) {
1910 }
1911
1912 if (Pos == Position::AFTER)
1913 --MI;
1914
1916}
1917
1919 SIAtomicScope Scope,
1920 SIAtomicAddrSpace AddrSpace,
1921 Position Pos) const {
1922 if (!InsertCacheInv)
1923 return false;
1924
1925 MachineBasicBlock &MBB = *MI->getParent();
1927
1928
1929
1930
1931
1932
1933
1934 if (!canAffectGlobalAddrSpace(AddrSpace))
1935 return false;
1936
1938 switch (Scope) {
1939 case SIAtomicScope::SYSTEM:
1941 break;
1942 case SIAtomicScope::AGENT:
1944 break;
1945 case SIAtomicScope::CLUSTER:
1947 break;
1948 case SIAtomicScope::WORKGROUP:
1949
1950
1951
1952
1953
1954
1955
1956 if (ST.isCuModeEnabled())
1957 return false;
1958
1960 break;
1961 case SIAtomicScope::WAVEFRONT:
1962 case SIAtomicScope::SINGLETHREAD:
1963
1964 return false;
1965 default:
1967 }
1968
1969 if (Pos == Position::AFTER)
1970 ++MI;
1971
1973
1974 if (Pos == Position::AFTER)
1975 --MI;
1976
1977 return true;
1978}
1979
1981 SIAtomicScope Scope,
1982 SIAtomicAddrSpace AddrSpace,
1983 bool IsCrossAddrSpaceOrdering,
1984 Position Pos) const {
1986
1987 MachineBasicBlock &MBB = *MI->getParent();
1989
1990
1991
1992
1993
1994 if (canAffectGlobalAddrSpace(AddrSpace)) {
1995 if (Pos == Position::AFTER)
1996 ++MI;
1997
1998
1999
2000
2001
2002
2003
2004 switch (Scope) {
2005 case SIAtomicScope::SYSTEM:
2009 break;
2010 case SIAtomicScope::AGENT:
2011
2012 if (ST.hasGFX1250Insts()) {
2016 }
2017 break;
2018 case SIAtomicScope::CLUSTER:
2019 case SIAtomicScope::WORKGROUP:
2020
2021 case SIAtomicScope::WAVEFRONT:
2022 case SIAtomicScope::SINGLETHREAD:
2023
2024 break;
2025 default:
2027 }
2028
2029 if (Pos == Position::AFTER)
2030 --MI;
2031 }
2032
2033
2034
2035
2036 Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
2037 IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release,
2038 false);
2039
2041}
2042
2043bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2045 bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
2046
2047
2049
2050
2051
2052
2053
2054 assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
2055
2057
2058 if (IsLastUse) {
2059
2061 } else if (IsNonTemporal) {
2062
2064 }
2065
2066 if (IsVolatile) {
2068
2069 if (ST.requiresWaitXCntForSingleAccessInstructions() &&
2071 MachineBasicBlock &MBB = *MI->getParent();
2074 }
2075
2076
2077
2078
2079
2080
2081 Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
2082 Position::AFTER, AtomicOrdering::Unordered,
2083 false);
2084 }
2085
2087}
2088
2089bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
2090 assert(MI.mayStore() && "Not a Store inst");
2091 const bool IsRMW = (MI.mayLoad() && MI.mayStore());
2093
2094 if (Atomic && ST.requiresWaitXCntForSingleAccessInstructions() &&
2096 MachineBasicBlock &MBB = *MI.getParent();
2099 }
2100
2101
2102 if (IsRMW)
2104
2105 MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
2106 if (!CPol)
2109
2110
2111 if (ST.requiresWaitsBeforeSystemScopeStores() && !Atomic &&
2113 Changed |= insertWaitsBeforeSystemScopeStore(MI.getIterator());
2114
2116}
2117
2118bool SIGfx12CacheControl::handleCooperativeAtomic(MachineInstr &MI) const {
2119 if (.hasGFX1250Insts())
2120 return false;
2121
2122
2123 MachineOperand *CPol = TII->getNamedOperand(MI, OpName::cpol);
2124 assert(CPol && "No CPol operand?");
2128 return false;
2129}
2130
2132 SIAtomicScope Scope,
2133 SIAtomicAddrSpace AddrSpace) const {
2135
2136 if (canAffectGlobalAddrSpace(AddrSpace)) {
2137 switch (Scope) {
2138 case SIAtomicScope::SYSTEM:
2140 break;
2141 case SIAtomicScope::AGENT:
2143 break;
2144 case SIAtomicScope::CLUSTER:
2146 break;
2147 case SIAtomicScope::WORKGROUP:
2148
2149
2150 if (.isCuModeEnabled())
2152 break;
2153 case SIAtomicScope::WAVEFRONT:
2154 case SIAtomicScope::SINGLETHREAD:
2155
2156 break;
2157 default:
2159 }
2160 }
2161
2162
2163
2164
2165
2166
2167
2168
2170}
2171
2172bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
2173 if (AtomicPseudoMIs.empty())
2174 return false;
2175
2176 for (auto &MI : AtomicPseudoMIs)
2177 MI->eraseFromParent();
2178
2179 AtomicPseudoMIs.clear();
2180 return true;
2181}
2182
2183bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
2185 assert(MI->mayLoad() && ->mayStore());
2186
2188
2189 if (MOI.isAtomic()) {
2191 if (Order == AtomicOrdering::Monotonic ||
2192 Order == AtomicOrdering::Acquire ||
2193 Order == AtomicOrdering::SequentiallyConsistent) {
2194 Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
2195 MOI.getOrderingAddrSpace());
2196 }
2197
2198
2199
2200 if (MOI.isCooperative())
2201 Changed |= CC->handleCooperativeAtomic(*MI);
2202
2203 if (Order == AtomicOrdering::SequentiallyConsistent)
2204 Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
2205 SIMemOp::LOAD | SIMemOp::STORE,
2206 MOI.getIsCrossAddressSpaceOrdering(),
2207 Position::BEFORE, Order, false);
2208
2209 if (Order == AtomicOrdering::Acquire ||
2210 Order == AtomicOrdering::SequentiallyConsistent) {
2211
2213 CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2214 SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(),
2215 Position::AFTER, Order, true);
2216 Changed |= CC->insertAcquire(MI, MOI.getScope(),
2217 MOI.getOrderingAddrSpace(),
2218 Position::AFTER);
2219 }
2220
2222 }
2223
2224
2225
2226
2227 Changed |= CC->enableVolatileAndOrNonTemporal(
2228 MI, MOI.getInstrAddrSpace(), SIMemOp::LOAD, MOI.isVolatile(),
2229 MOI.isNonTemporal(), MOI.isLastUse());
2230
2232}
2233
2234bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
2236 assert(->mayLoad() && MI->mayStore());
2237
2239
2240 MachineInstr &StoreMI = *MI;
2241
2242 if (MOI.isAtomic()) {
2243 if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2244 MOI.getOrdering() == AtomicOrdering::Release ||
2245 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2246 Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(),
2247 MOI.getOrderingAddrSpace());
2248 }
2249
2250
2251
2252 if (MOI.isCooperative())
2253 Changed |= CC->handleCooperativeAtomic(*MI);
2254
2255 if (MOI.getOrdering() == AtomicOrdering::Release ||
2256 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2257 Changed |= CC->insertRelease(MI, MOI.getScope(),
2258 MOI.getOrderingAddrSpace(),
2259 MOI.getIsCrossAddressSpaceOrdering(),
2260 Position::BEFORE);
2261
2262 Changed |= CC->finalizeStore(StoreMI, true);
2264 }
2265
2266
2267
2268
2269 Changed |= CC->enableVolatileAndOrNonTemporal(
2270 MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2271 MOI.isNonTemporal());
2272
2273
2274
2275 Changed |= CC->finalizeStore(StoreMI, false);
2277}
2278
2279bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
2281 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
2282
2283 AtomicPseudoMIs.push_back(MI);
2285
2286 const SIAtomicAddrSpace OrderingAddrSpace = MOI.getOrderingAddrSpace();
2287
2288 if (MOI.isAtomic()) {
2290 if (Order == AtomicOrdering::Acquire) {
2291
2292 Changed |= CC->insertWait(MI, MOI.getScope(), OrderingAddrSpace,
2293 SIMemOp::LOAD | SIMemOp::STORE,
2294 MOI.getIsCrossAddressSpaceOrdering(),
2295 Position::BEFORE, Order, true);
2296 }
2297
2298 if (Order == AtomicOrdering::Release ||
2299 Order == AtomicOrdering::AcquireRelease ||
2300 Order == AtomicOrdering::SequentiallyConsistent)
2301
2302
2303
2304
2305
2306
2307
2308 Changed |= CC->insertRelease(MI, MOI.getScope(), OrderingAddrSpace,
2309 MOI.getIsCrossAddressSpaceOrdering(),
2310 Position::BEFORE);
2311
2312
2313
2314
2315
2316
2317 if (Order == AtomicOrdering::Acquire ||
2318 Order == AtomicOrdering::AcquireRelease ||
2319 Order == AtomicOrdering::SequentiallyConsistent)
2320 Changed |= CC->insertAcquire(MI, MOI.getScope(), OrderingAddrSpace,
2321 Position::BEFORE);
2322
2324 }
2325
2327}
2328
2329bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
2331 assert(MI->mayLoad() && MI->mayStore());
2332
2334 MachineInstr &RMWMI = *MI;
2335
2336 if (MOI.isAtomic()) {
2338 if (Order == AtomicOrdering::Monotonic ||
2339 Order == AtomicOrdering::Acquire || Order == AtomicOrdering::Release ||
2340 Order == AtomicOrdering::AcquireRelease ||
2341 Order == AtomicOrdering::SequentiallyConsistent) {
2342 Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(),
2343 MOI.getInstrAddrSpace());
2344 }
2345
2346 if (Order == AtomicOrdering::Release ||
2347 Order == AtomicOrdering::AcquireRelease ||
2348 Order == AtomicOrdering::SequentiallyConsistent ||
2349 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2350 Changed |= CC->insertRelease(MI, MOI.getScope(),
2351 MOI.getOrderingAddrSpace(),
2352 MOI.getIsCrossAddressSpaceOrdering(),
2353 Position::BEFORE);
2354
2355 if (Order == AtomicOrdering::Acquire ||
2356 Order == AtomicOrdering::AcquireRelease ||
2357 Order == AtomicOrdering::SequentiallyConsistent ||
2358 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
2359 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
2360
2362 CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(),
2363 isAtomicRet(*MI) ? SIMemOp::LOAD : SIMemOp::STORE,
2364 MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER,
2365 Order, true);
2366 Changed |= CC->insertAcquire(MI, MOI.getScope(),
2367 MOI.getOrderingAddrSpace(),
2368 Position::AFTER);
2369 }
2370
2371 Changed |= CC->finalizeStore(RMWMI, true);
2373 }
2374
2376}
2377
2378bool SIMemoryLegalizer::expandLDSDMA(const SIMemOpInfo &MOI,
2380 assert(MI->mayLoad() && MI->mayStore());
2381
2382
2383
2384 SIMemOp OpKind =
2386
2387
2388
2389
2390 return CC->enableVolatileAndOrNonTemporal(
2391 MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(),
2392 MOI.isNonTemporal(), MOI.isLastUse());
2393}
2394
2395bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
2396 const MachineModuleInfo &MMI =
2397 getAnalysis().getMMI();
2398 return SIMemoryLegalizer(MMI).run(MF);
2399}
2400
2401PreservedAnalyses
2405 .getCachedResult(
2407 assert(MMI && "MachineModuleAnalysis must be available");
2408 if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
2411}
2412
2415
2418 CC = SICacheControl::create(ST);
2419
2420 for (auto &MBB : MF) {
2421 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
2422
2423
2424 if (MI->isBundle() && MI->mayLoadOrStore()) {
2427 I != E && I->isBundledWithPred(); ++I) {
2428 I->unbundleFromPred();
2430 if (MO.isReg())
2431 MO.setIsInternalRead(false);
2432 }
2433
2434 MI->eraseFromParent();
2436 }
2437
2439 continue;
2440
2441 if (const auto &MOI = MOA.getLoadInfo(MI)) {
2442 Changed |= expandLoad(*MOI, MI);
2443 } else if (const auto &MOI = MOA.getStoreInfo(MI)) {
2444 Changed |= expandStore(*MOI, MI);
2445 } else if (const auto &MOI = MOA.getLDSDMAInfo(MI)) {
2446 Changed |= expandLDSDMA(*MOI, MI);
2447 } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) {
2448 Changed |= expandAtomicFence(*MOI, MI);
2449 } else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) {
2450 Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
2451 }
2452 }
2453 }
2454
2455 Changed |= removeAtomicPseudoMIs();
2457}
2458
2460
2461char SIMemoryLegalizerLegacy::ID = 0;
2463
2465 return new SIMemoryLegalizerLegacy();
2466}
static std::optional< LoadInfo > getLoadInfo(const MachineInstr &MI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
AMDGPU address space definition.
Provides AMDGPU specific target descriptions.
AMDGPU Machine Module Info.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
AMD GCN specific subclass of TargetSubtarget.
This header defines various interfaces for pass management in LLVM.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static cl::opt< bool > AmdgcnSkipCacheInvalidations("amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, cl::desc("Use this to skip inserting cache invalidating instructions."))
#define PASS_NAME
Definition SIMemoryLegalizer.cpp:36
static bool canUseBUFFER_WBINVL1_VOL(const GCNSubtarget &ST)
Definition SIMemoryLegalizer.cpp:1261
static const uint32_t IV[8]
SyncScope::ID getWorkgroupSSID() const
SyncScope::ID getWavefrontSSID() const
SyncScope::ID getAgentSSID() const
SyncScope::ID getClusterOneAddressSpaceSSID() const
SyncScope::ID getClusterSSID() const
std::optional< bool > isSyncScopeInclusion(SyncScope::ID A, SyncScope::ID B) const
In AMDGPU target synchronization scopes are inclusive, meaning a larger synchronization scope is incl...
SyncScope::ID getAgentOneAddressSpaceSSID() const
SyncScope::ID getSingleThreadOneAddressSpaceSSID() const
SyncScope::ID getWavefrontOneAddressSpaceSSID() const
SyncScope::ID getSystemOneAddressSpaceSSID() const
SyncScope::ID getWorkgroupOneAddressSpaceSSID() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
Diagnostic information for unsupported feature in backend.
FunctionPass class - This class is used to implement most global optimizations.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
A helper class to return the specified delimiter string after the first invocation of operator String...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Instructions::iterator instr_iterator
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Ty & getObjFileInfo()
Keep track of various per-module pieces of information for backends that would like to do so.
MachineOperand class - Representation of each machine instruction operand.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
static bool isVMEM(const MachineInstr &MI)
static bool mayWriteLDSThroughDMA(const MachineInstr &MI)
static bool isBUF(const MachineInstr &MI)
static bool isAtomicRet(const MachineInstr &MI)
static bool isAtomic(const MachineInstr &MI)
static bool isLDSDMA(const MachineInstr &MI)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition SIMemoryLegalizer.cpp:2402
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
StringRef - Represent a constant reference to a string, i.e.
A raw_ostream that writes to an SmallVector or SmallString.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BUFFER_STRIDED_POINTER
Address space for 192-bit fat buffer pointers with an additional index.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ BUFFER_FAT_POINTER
Address space for 160-bit buffer fat pointers.
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
bool isGFX10(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
unsigned getVmcntBitMask(const IsaVersion &Version)
unsigned getLgkmcntBitMask(const IsaVersion &Version)
unsigned getExpcntBitMask(const IsaVersion &Version)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Undef
Value of the register doesn't matter.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
@ System
Synchronized with respect to all concurrently executing threads.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Scope
Defines the scope in which this symbol should be visible: Default – Visible in the public interface o...
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIMemoryLegalizerID
Definition SIMemoryLegalizer.cpp:2462
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
bool isReleaseOrStronger(AtomicOrdering AO)
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, AtomicOrdering Other)
Return a single atomic ordering that is at least as strong as both the AO and Other orderings for an ...
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
DWARFExpression::Operation Op
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
FunctionPass * createSIMemoryLegalizerPass()
Definition SIMemoryLegalizer.cpp:2464