LLVM: lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp Source File (original) (raw)
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
34 std::initializer_list DstOpMappingList,
35 std::initializer_list SrcOpMappingList,
41 std::initializer_list OpList,
48 switch (UniID) {
49 case S1:
59 case P0:
61 case P1:
63 case P3:
65 case P4:
67 case P5:
69 case P8:
82 return MRI.getType(Reg).getSizeInBits() == 32;
84 return MRI.getType(Reg).getSizeInBits() == 64;
86 return MRI.getType(Reg).getSizeInBits() == 96;
88 return MRI.getType(Reg).getSizeInBits() == 128;
90 return MRI.getType(Reg).getSizeInBits() == 256;
92 return MRI.getType(Reg).getSizeInBits() == 512;
130 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
132 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
134 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
175 case _:
176 return true;
177 default:
179 }
180}
185
188 if (MI.getOperand(i).isReg())
189 return false;
190 continue;
191 }
192
193
194 if (.getOperand(i).isReg())
195 return false;
196
199 return false;
200 }
201
202
205
206 return true;
207}
212 : FastTypes(FastTypes) {}
216 return S16;
218 return S32;
220 return S64;
229 return _;
230}
235 return B32;
238 return B64;
240 return B96;
243 return _;
244}
250
251
252
253
255 Register Reg = MI.getOperand(0).getReg();
256 int Slot;
258 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
259 else
260 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
261
262 if (Slot != -1)
263 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
264 }
265
266
268 if (Rule.Predicate.match(MI, MUI, MRI))
269 return &Rule.OperandMapping;
270 }
271
272 return nullptr;
273}
276 Rules.push_back(Rule);
277}
281 int Slot = getFastPredicateSlot(Ty);
282 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
283 Div[Slot] = RuleApplyIDs;
284}
288 int Slot = getFastPredicateSlot(Ty);
289 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
290 Uni[Slot] = RuleApplyIDs;
291}
344RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list OpcList,
350RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list OpcList,
357 unsigned Opc = MI.getOpcode();
358 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
359 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
360 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
362 auto IRAIt = IRulesAlias.find(IntrID);
363 if (IRAIt == IRulesAlias.end())
364 return nullptr;
365 return &IRules.at(IRAIt->second);
366 }
367
368 auto GRAIt = GRulesAlias.find(Opc);
369 if (GRAIt == GRulesAlias.end())
370 return nullptr;
371 return &GRules.at(GRAIt->second);
372}
375class Predicate {
376private:
377 struct Elt {
378
379
380
381
382
383
384
385
386
387
388
390 bool Neg;
391 unsigned TJumpOffset;
392 unsigned FJumpOffset;
393 };
394
396
398
399public:
401 Expression.push_back({Pred, false, 1, 1});
402 };
403
405 unsigned Idx = 0;
406 unsigned ResultIdx = Expression.size();
407 bool Result;
408 do {
409 Result = Expression[Idx].Pred(MI);
410 Result = Expression[Idx].Neg ? !Result : Result;
411 if (Result) {
412 Idx += Expression[Idx].TJumpOffset;
413 } else {
414 Idx += Expression[Idx].FJumpOffset;
415 }
416 } while ((Idx != ResultIdx));
417
418 return Result;
419 };
420
423 for (const Elt &ExprElt : Expression) {
424 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
425 ExprElt.TJumpOffset});
426 }
427 return Predicate(std::move(NegExpression));
428 };
429
430 Predicate operator&&(const Predicate &RHS) const {
432
433 unsigned RHSSize = RHS.Expression.size();
434 unsigned ResultIdx = Expression.size();
435 for (unsigned i = 0; i < ResultIdx; ++i) {
436
437 if (i + AndExpression[i].FJumpOffset == ResultIdx)
438 AndExpression[i].FJumpOffset += RHSSize;
439 }
440
441 AndExpression.append(RHS.Expression);
442
443 return Predicate(std::move(AndExpression));
444 }
445
446 Predicate operator||(const Predicate &RHS) const {
448
449 unsigned RHSSize = RHS.Expression.size();
450 unsigned ResultIdx = Expression.size();
451 for (unsigned i = 0; i < ResultIdx; ++i) {
452
453 if (i + OrExpression[i].TJumpOffset == ResultIdx)
454 OrExpression[i].TJumpOffset += RHSSize;
455 }
456
457 OrExpression.append(RHS.Expression);
458
459 return Predicate(std::move(OrExpression));
460 }
461};
466 : ST(&_ST), MRI(&_MRI) {
467
468 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
477
478 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
481
482 addRulesForGOpcs({G_UADDE, G_USUBE}, Standard)
485
487
488 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
497
498 addRulesForGOpcs({G_SHL}, Standard)
507
508 addRulesForGOpcs({G_LSHR}, Standard)
517
518 addRulesForGOpcs({G_ASHR}, Standard)
527
528 addRulesForGOpcs({G_FSHR}, Standard)
531
533
534 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
539
540 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
547
548 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
555
556
557
559 addRulesForGOpcs({G_CONSTANT})
561 addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
562
563 addRulesForGOpcs({G_ICMP})
567
568 addRulesForGOpcs({G_FCMP})
571
572 addRulesForGOpcs({G_BRCOND})
575
576 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
577
578 addRulesForGOpcs({G_SELECT}, StandardB)
585
586 addRulesForGOpcs({G_ANYEXT})
597
598
599
600 addRulesForGOpcs({G_TRUNC})
610
614
615 addRulesForGOpcs({G_ZEXT})
624
629
630 addRulesForGOpcs({G_SEXT})
639
644
645 addRulesForGOpcs({G_SEXT_INREG})
650
651 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
656
657 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
658 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
659 bool usesTrue16 = ST->useRealTrue16Insts();
660
661 Predicate isAlign16([](const MachineInstr &MI) -> bool {
662 return (*MI.memoperands_begin())->getAlign() >= Align(16);
663 });
664
665 Predicate isAlign4([](const MachineInstr &MI) -> bool {
666 return (*MI.memoperands_begin())->getAlign() >= Align(4);
667 });
668
669 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
670 return (*MI.memoperands_begin())->isAtomic();
671 });
672
673 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
675 });
676
677 Predicate isConst([](const MachineInstr &MI) -> bool {
678
683 });
684
685 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
686 return (*MI.memoperands_begin())->isVolatile();
687 });
688
689 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
690 return (*MI.memoperands_begin())->isInvariant();
691 });
692
693 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
694 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
695 });
696
697 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
700 });
701
702 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
705 return MemSize == 16 || MemSize == 8;
706 });
707
708 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
711 });
712
713 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
714 (isConst || isInvMMO || isNoClobberMMO);
715
716
717
718 addRulesForGOpcs({G_LOAD})
719
721 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}})
725
726
727
729 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}})
735
736
737 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall)
739 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall)
740
741 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
750
751
752
753
754
755
756
757 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall)
759 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall)
760 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall)
766
767
769 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}})
773
779
780
781
783 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}})
789
790
791 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall)
793 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall)
794 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
803
804
805 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall)
807 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall)
808 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall)
814
815
817 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}})
821
823
824
825 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD})
827
833
836
842
844
845 addRulesForGOpcs({G_STORE})
846
848 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}})
852
853
854
855
856
858 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}})
862
863
864
865
867 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}})
871
872
878
879
880 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
881 G_AMDGPU_TBUFFER_LOAD_FORMAT},
891
892 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
894
895 addRulesForGOpcs({G_PTR_ADD})
900
901 addRulesForGOpcs({G_INTTOPTR})
908
909 addRulesForGOpcs({G_PTRTOINT})
916
918
919 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
920
921 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
923
924 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
925
926 addRulesForGOpcs({G_GLOBAL_VALUE})
932
933 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
934
935 bool hasSALUFloat = ST->hasSALUFloatInsts();
936
937 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
948 hasSALUFloat)
950
951
952
953
954
955 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
969
970 addRulesForGOpcs({G_FPTOUI})
973
974 addRulesForGOpcs({G_UITOFP})
978
979 addRulesForGOpcs({G_IS_FPCLASS})
986
988
990
991
992 addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
993
994 addRulesForIOpcs({amdgcn_if_break}, Standard)
996
997 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
999
1000 addRulesForIOpcs({amdgcn_readfirstlane})
1002
1003
1005
1006}