LLVM: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
29#include "llvm/IR/IntrinsicsAMDGPU.h"
32
33#ifdef EXPENSIVE_CHECKS
36#endif
37
38#define DEBUG_TYPE "amdgpu-isel"
39
40using namespace llvm;
41
42
43
44
45
46namespace {
49}
50
51
53 In = stripBitcast(In);
54
57 if (!Idx->isOne())
58 return false;
59 Out = In.getOperand(0);
60 return true;
61 }
62 }
63
65 return false;
66
67 SDValue Srl = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
71 Out = stripBitcast(Srl.getOperand(0));
72 return true;
73 }
74 }
75 }
76
77 return false;
78}
79
84 return Lo;
85 }
86
89
90 if (Lo->isDivergent()) {
92 SL, Lo.getValueType()),
93 0);
98
100 Src.getValueType(), Ops),
101 0);
102 } else {
103
104
105
107 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, Src.getValueType(), Lo),
108 0);
109 }
110
111 return NewSrc;
112}
113
114
115
118 SDValue Idx = In.getOperand(1);
119 if (isNullConstant(Idx) && In.getValueSizeInBits() <= 32)
120 return In.getOperand(0);
121 }
122
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
127 }
128
129 return In;
130}
131
132}
133
135 "AMDGPU DAG->DAG Pattern Instruction Selection", false,
136 false)
140#ifdef EXPENSIVE_CHECKS
143#endif
145 "AMDGPU DAG->DAG Pattern Instruction Selection", false,
146 false)
147
148
149
154
158
165
166bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
167
168 switch (Opc) {
177 case ISD::FABS:
178
179
180 case ISD::FSQRT:
181 case ISD::FSIN:
182 case ISD::FCOS:
183 case ISD::FPOWI:
184 case ISD::FPOW:
185 case ISD::FLOG:
186 case ISD::FLOG2:
187 case ISD::FLOG10:
188 case ISD::FEXP:
189 case ISD::FEXP2:
190 case ISD::FCEIL:
191 case ISD::FTRUNC:
192 case ISD::FRINT:
193 case ISD::FNEARBYINT:
194 case ISD::FROUNDEVEN:
195 case ISD::FROUND:
196 case ISD::FFLOOR:
197 case ISD::FMINNUM:
198 case ISD::FMAXNUM:
199 case ISD::FLDEXP:
200 case AMDGPUISD::FRACT:
201 case AMDGPUISD::CLAMP:
202 case AMDGPUISD::COS_HW:
203 case AMDGPUISD::SIN_HW:
204 case AMDGPUISD::FMIN3:
205 case AMDGPUISD::FMAX3:
206 case AMDGPUISD::FMED3:
207 case AMDGPUISD::FMAD_FTZ:
208 case AMDGPUISD::RCP:
209 case AMDGPUISD::RSQ:
210 case AMDGPUISD::RCP_IFLAG:
211
214
215
216
220 case AMDGPUISD::DIV_FIXUP:
222 default:
223
224
225 return false;
226 }
227}
228
230#ifdef EXPENSIVE_CHECKS
234 assert(L->isLCSSAForm(DT));
235 }
236#endif
238}
239
243#ifdef EXPENSIVE_CHECKS
246#endif
248}
249
251 assert(Subtarget->d16PreservesUnusedBits());
252 MVT VT = N->getValueType(0).getSimpleVT();
253 if (VT != MVT::v2i16 && VT != MVT::v2f16)
254 return false;
255
258
260
261
262
263
264
265
266
269
273 };
274
275 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
278 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
279 } else {
281 }
282
284 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
287
288 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
290 return true;
291 }
292
293
294
295
297 if (LdLo && Lo.hasOneUse()) {
300 return false;
301
303 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
306 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
307 } else {
309 }
310
311 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
312
315 };
316
318 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
321
322 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
324 return true;
325 }
326
327 return false;
328}
329
331 if (!Subtarget->d16PreservesUnusedBits())
332 return;
333
335
336 bool MadeChange = false;
337 while (Position != CurDAG->allnodes_begin()) {
339 if (N->use_empty())
340 continue;
341
342 switch (N->getOpcode()) {
344
346 break;
347 default:
348 break;
349 }
350 }
351
352 if (MadeChange) {
353 CurDAG->RemoveDeadNodes();
356 }
357}
358
359bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
360 if (N->isUndef())
361 return true;
362
365 return TII->isInlineConstant(C->getAPIntValue());
366
368 return TII->isInlineConstant(C->getValueAPF());
369
370 return false;
371}
372
373
374
375
376
378 unsigned OpNo) const {
379 if (->isMachineOpcode()) {
382 if (Reg.isVirtual()) {
384 return MRI.getRegClass(Reg);
385 }
386
387 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
388 return TRI->getPhysRegBaseClass(Reg);
389 }
390
391 return nullptr;
392 }
393
394 switch (N->getMachineOpcode()) {
395 default: {
396 const SIInstrInfo *TII = Subtarget->getInstrInfo();
397 const MCInstrDesc &Desc = TII->get(N->getMachineOpcode());
398 unsigned OpIdx = Desc.getNumDefs() + OpNo;
399 if (OpIdx >= Desc.getNumOperands())
400 return nullptr;
401
402 int16_t RegClass = TII->getOpRegClassID(Desc.operands()[OpIdx]);
403 if (RegClass == -1)
404 return nullptr;
405
406 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
407 }
408 case AMDGPU::REG_SEQUENCE: {
409 unsigned RCID = N->getConstantOperandVal(0);
410 const TargetRegisterClass *SuperRC =
411 Subtarget->getRegisterInfo()->getRegClass(RCID);
412
413 SDValue SubRegOp = N->getOperand(OpNo + 1);
414 unsigned SubRegIdx = SubRegOp->getAsZExtVal();
415 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
416 SubRegIdx);
417 }
418 }
419}
420
424 Ops.push_back(NewChain);
425 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
426 Ops.push_back(N->getOperand(i));
427
428 Ops.push_back(Glue);
429 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
430}
431
433 const SITargetLowering& Lowering =
435
436 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");
437
439 return glueCopyToOp(N, M0, M0.getValue(1));
440}
441
442SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
445 if (Subtarget->ldsRequiresM0Init())
446 return glueCopyToM0(
447 N, CurDAG->getSignedTargetConstant(-1, SDLoc(N), MVT::i32));
449 MachineFunction &MF = CurDAG->getMachineFunction();
450 unsigned Value = MF.getInfo()->getGDSSize();
451 return
452 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));
453 }
454 return N;
455}
456
458 EVT VT) const {
459 SDNode *Lo = CurDAG->getMachineNode(
460 AMDGPU::S_MOV_B32, DL, MVT::i32,
461 CurDAG->getTargetConstant(Lo_32(Imm), DL, MVT::i32));
462 SDNode *Hi = CurDAG->getMachineNode(
463 AMDGPU::S_MOV_B32, DL, MVT::i32,
464 CurDAG->getTargetConstant(Hi_32(Imm), DL, MVT::i32));
466 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
467 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
468 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
469
470 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
471}
472
473SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(const SDNode *N,
475
476
478 uint32_t LHSVal, RHSVal;
481 SDLoc SL(N);
482 uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);
484 isVGPRImm(N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
486 }
487
488 return nullptr;
489}
490
492 EVT VT = N->getValueType(0);
496 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
497
498 if (NumVectorElts == 1) {
499 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
500 RegClass);
501 return;
502 }
503
504 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
505 if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
508 bool AllConst = true;
510 for (unsigned I = 0; I < NumVectorElts; ++I) {
512 if (Op.isUndef()) {
513 AllConst = false;
514 break;
515 }
518 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
519 } else
521 C |= Val << (EltSize * I);
522 }
523 if (AllConst) {
526 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV);
527 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),
528 RegClass);
529 return;
530 }
531 }
532
533 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
534 "supported yet");
535
536
537
539
540 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
541 bool IsRegSeq = true;
542 unsigned NOps = N->getNumOperands();
543 for (unsigned i = 0; i < NOps; i++) {
544
546 IsRegSeq = false;
547 break;
548 }
551 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
552 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
553 }
554 if (NOps != NumVectorElts) {
555
557 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
558 DL, EltVT);
559 for (unsigned i = NOps; i < NumVectorElts; ++i) {
562 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
563 RegSeqArgs[1 + (2 * i) + 1] =
564 CurDAG->getTargetConstant(Sub, DL, MVT::i32);
565 }
566 }
567
568 if (!IsRegSeq)
569 SelectCode(N);
570 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
571}
572
574 EVT VT = N->getValueType(0);
576
577
578 if (!Subtarget->hasPkMovB32() || !EltVT.bitsEq(MVT::i32) ||
580 SelectCode(N);
581 return;
582 }
583
585
590
592 Mask[0] < 4 && Mask[1] < 4);
593
594 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
595 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
596 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
597 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
598
599 if (Mask[0] < 0) {
600 Src0SubReg = Src1SubReg;
602 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
603 VSrc0 = SDValue(ImpDef, 0);
604 }
605
606 if (Mask[1] < 0) {
607 Src1SubReg = Src0SubReg;
609 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
610 VSrc1 = SDValue(ImpDef, 0);
611 }
612
613
614
615
616
617
618
619 if (N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
620 Src1SubReg == AMDGPU::sub0) {
621
622
623
624
625
626 unsigned Src0OpSel =
628 unsigned Src1OpSel =
630
631
632
635
636 SDValue Src0OpSelVal = CurDAG->getTargetConstant(Src0OpSel, DL, MVT::i32);
637 SDValue Src1OpSelVal = CurDAG->getTargetConstant(Src1OpSel, DL, MVT::i32);
638 SDValue ZeroMods = CurDAG->getTargetConstant(0, DL, MVT::i32);
639
640 CurDAG->SelectNodeTo(N, AMDGPU::V_PK_MOV_B32, N->getVTList(),
641 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
642 ZeroMods,
643 ZeroMods,
644 ZeroMods,
645 ZeroMods,
646 ZeroMods});
647 return;
648 }
649
651 CurDAG->getTargetExtractSubreg(Src0SubReg, DL, EltVT, VSrc0);
653 CurDAG->getTargetExtractSubreg(Src1SubReg, DL, EltVT, VSrc1);
654
656 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
657 ResultElt0, CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
658 ResultElt1, CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};
659 CurDAG->SelectNodeTo(N, TargetOpcode::REG_SEQUENCE, VT, Ops);
660}
661
663 unsigned int Opc = N->getOpcode();
664 if (N->isMachineOpcode()) {
665 N->setNodeId(-1);
666 return;
667 }
668
669
670
672 N = glueCopyToM0LDSInit(N);
673 SelectCode(N);
674 return;
675 }
676
677 switch (Opc) {
678 default:
679 break;
680
681
682
687 if (N->getValueType(0) != MVT::i64)
688 break;
689
690 SelectADD_SUB_I64(N);
691 return;
692 }
695 if (N->getValueType(0) != MVT::i32)
696 break;
697
698 SelectAddcSubb(N);
699 return;
702 SelectUADDO_USUBO(N);
703 return;
704 }
705 case AMDGPUISD::FMUL_W_CHAIN: {
706 SelectFMUL_W_CHAIN(N);
707 return;
708 }
709 case AMDGPUISD::FMA_W_CHAIN: {
710 SelectFMA_W_CHAIN(N);
711 return;
712 }
713
716 EVT VT = N->getValueType(0);
720 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {
722 return;
723 }
724 }
725
726 break;
727 }
728
732 N->isDivergent()
733 ? TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
735
737 return;
738 }
741 return;
743 SDValue RC, SubReg0, SubReg1;
745 if (N->getValueType(0) == MVT::i128) {
746 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);
747 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
748 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
749 } else if (N->getValueType(0) == MVT::i64) {
750 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
751 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
752 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
753 } else {
755 }
756 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
757 N->getOperand(1), SubReg1 };
759 N->getValueType(0), Ops));
760 return;
761 }
762
765 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||
766 Subtarget->has64BitLiterals())
767 break;
768
771 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
773 break;
774 } else {
776 Imm = C->getZExtValue();
778 break;
779 }
780
782 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));
783 return;
784 }
785 case AMDGPUISD::BFE_I32:
786 case AMDGPUISD::BFE_U32: {
787
788
789
790
791
792
793
794
797 break;
798
800 if (!Width)
801 break;
802
803 bool Signed = Opc == AMDGPUISD::BFE_I32;
804
807
809 WidthVal));
810 return;
811 }
812 case AMDGPUISD::DIV_SCALE: {
813 SelectDIV_SCALE(N);
814 return;
815 }
818 SelectMAD_64_32(N);
819 return;
820 }
823 return SelectMUL_LOHI(N);
828 break;
829 }
834 if (N->getValueType(0) != MVT::i32)
835 break;
836
837 SelectS_BFE(N);
838 return;
839 case ISD::BRCOND:
840 SelectBRCOND(N);
841 return;
842 case ISD::FP_EXTEND:
843 SelectFP_EXTEND(N);
844 return;
845 case AMDGPUISD::CVT_PKRTZ_F16_F32:
846 case AMDGPUISD::CVT_PKNORM_I16_F32:
847 case AMDGPUISD::CVT_PKNORM_U16_F32:
848 case AMDGPUISD::CVT_PK_U16_U32:
849 case AMDGPUISD::CVT_PK_I16_I32: {
850
851 if (N->getValueType(0) == MVT::i32) {
852 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
853 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),
854 { N->getOperand(0), N->getOperand(1) });
855 SelectCode(N);
856 return;
857 }
858
859 break;
860 }
862 SelectINTRINSIC_W_CHAIN(N);
863 return;
864 }
866 SelectINTRINSIC_WO_CHAIN(N);
867 return;
868 }
870 SelectINTRINSIC_VOID(N);
871 return;
872 }
874 SelectWAVE_ADDRESS(N);
875 return;
876 }
877 case ISD::STACKRESTORE: {
878 SelectSTACKRESTORE(N);
879 return;
880 }
881 }
882
883 SelectCode(N);
884}
885
886bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
889 return Term->getMetadata("amdgpu.uniform") ||
890 Term->getMetadata("structurizecfg.uniform");
891}
892
893bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
894 unsigned ShAmtBits) const {
896
897 const APInt &RHS = N->getConstantOperandAPInt(1);
898 if (RHS.countr_one() >= ShAmtBits)
899 return true;
900
902 return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;
903}
904
909
910
911
912
915 SDValue BaseLo = Lo.getOperand(0);
917
921
924
928 N1 = Lo.getOperand(1);
929 return true;
930 }
931 }
932 }
933 return false;
934}
935
936bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
938 if (CurDAG->isBaseWithConstantOffset(Addr)) {
941 return true;
942 }
943
946 return true;
947 }
948
949 return false;
950}
951
953 return "AMDGPU DAG->DAG Pattern Instruction Selection";
954}
955
959
963#ifdef EXPENSIVE_CHECKS
965 .getManager();
969 for (auto &L : LI.getLoopsInPreorder())
970 assert(L->isLCSSAForm(DT) && "Loop is not in LCSSA form!");
971#endif
973}
974
975
976
977
978
979bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
981 return false;
982}
983
984bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
988
992 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
994 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
995 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
999 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
1000 } else {
1001 Base = Addr;
1002 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1003 }
1004
1005 return true;
1006}
1007
1008SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1010 SDNode *Mov = CurDAG->getMachineNode(
1011 AMDGPU::S_MOV_B32, DL, MVT::i32,
1012 CurDAG->getTargetConstant(Val, DL, MVT::i32));
1014}
1015
1016
1017void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
1021
1022 unsigned Opcode = N->getOpcode();
1024 bool ProduceCarry =
1027
1028 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1029 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1030
1031 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1032 DL, MVT::i32, LHS, Sub0);
1033 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1034 DL, MVT::i32, LHS, Sub1);
1035
1036 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1037 DL, MVT::i32, RHS, Sub0);
1038 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1039 DL, MVT::i32, RHS, Sub1);
1040
1041 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
1042
1043 static const unsigned OpcMap[2][2][2] = {
1044 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1045 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1046 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1047 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1048
1049 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];
1050 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];
1051
1052 SDNode *AddLo;
1053 if (!ConsumeCarry) {
1055 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
1056 } else {
1058 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
1059 }
1064 };
1065 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
1066
1067 SDValue RegSequenceArgs[] = {
1068 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
1070 Sub0,
1072 Sub1,
1073 };
1075 MVT::i64, RegSequenceArgs);
1076
1077 if (ProduceCarry) {
1078
1080 }
1081
1082
1084}
1085
1086void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
1089 SDValue CI = N->getOperand(2);
1090
1091 if (N->isDivergent()) {
1093 : AMDGPU::V_SUBB_U32_e64;
1094 CurDAG->SelectNodeTo(
1096 {LHS, RHS, CI,
1097 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1098 } else {
1100 : AMDGPU::S_SUB_CO_PSEUDO;
1101 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
1102 }
1103}
1104
1105void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
1106
1107
1108
1109 bool IsAdd = N->getOpcode() == ISD::UADDO;
1110 bool IsVALU = N->isDivergent();
1111
1112 for (SDNode::user_iterator UI = N->user_begin(), E = N->user_end(); UI != E;
1113 ++UI)
1114 if (UI.getUse().getResNo() == 1) {
1115 if (UI->isMachineOpcode()) {
1116 if (UI->getMachineOpcode() !=
1117 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1118 IsVALU = true;
1119 break;
1120 }
1121 } else {
1123 IsVALU = true;
1124 break;
1125 }
1126 }
1127 }
1128
1129 if (IsVALU) {
1130 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1131
1132 CurDAG->SelectNodeTo(
1134 {N->getOperand(0), N->getOperand(1),
1135 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1136 } else {
1137 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1138
1139 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
1140 {N->getOperand(0), N->getOperand(1)});
1141 }
1142}
1143
1144void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
1145
1147
1148 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
1149 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1150 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
1151 Ops[8] = N->getOperand(0);
1152 Ops[9] = N->getOperand(4);
1153
1154
1155
1156 bool UseFMAC = Subtarget->hasDLInsts() &&
1160 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1161 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
1162}
1163
1164void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
1165
1167
1168 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
1169 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
1170 Ops[6] = N->getOperand(0);
1171 Ops[7] = N->getOperand(3);
1172
1173 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
1174}
1175
1176
1177
1178void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
1179 EVT VT = N->getValueType(0);
1180
1181 assert(VT == MVT::f32 || VT == MVT::f64);
1182
1183 unsigned Opc
1184 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1185
1186
1187
1189 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
1190 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);
1191 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);
1193}
1194
1195
1196
1197void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
1198 SDLoc SL(N);
1200 unsigned Opc;
1201 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && ->hasAnyUseOfValue(1);
1202 if (Subtarget->hasMADIntraFwdBug())
1203 Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1204 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1205 else if (UseNoCarry)
1206 Opc = Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1207 else
1208 Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1209
1210 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1211 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
1212 Clamp };
1213
1214 if (UseNoCarry) {
1215 MachineSDNode *Mad = CurDAG->getMachineNode(Opc, SL, MVT::i64, Ops);
1217 CurDAG->RemoveDeadNode(N);
1218 return;
1219 }
1220
1222}
1223
1224
1225
1226void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
1227 SDLoc SL(N);
1229 SDVTList VTList;
1230 unsigned Opc;
1231 if (Subtarget->hasMadU64U32NoCarry()) {
1232 VTList = CurDAG->getVTList(MVT::i64);
1233 Opc = Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1234 } else {
1235 VTList = CurDAG->getVTList(MVT::i64, MVT::i1);
1236 if (Subtarget->hasMADIntraFwdBug()) {
1237 Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1238 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1239 } else {
1240 Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1241 }
1242 }
1243
1245 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
1246 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
1247 SDNode *Mad = CurDAG->getMachineNode(Opc, SL, VTList, Ops);
1248 if ((N, 0).use_empty()) {
1249 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1250 SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1251 MVT::i32, SDValue(Mad, 0), Sub0);
1253 }
1254 if ((N, 1).use_empty()) {
1255 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1256 SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1257 MVT::i32, SDValue(Mad, 0), Sub1);
1259 }
1260 CurDAG->RemoveDeadNode(N);
1261}
1262
1263bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
1265 return false;
1266
1267 if ( || Subtarget->hasUsableDSOffset() ||
1268 Subtarget->unsafeDSOffsetFoldingEnabled())
1269 return true;
1270
1271
1272
1274}
1275
1276bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
1278 SDLoc DL(Addr);
1279 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1283 if (isDSOffsetLegal(N0, C1->getSExtValue())) {
1284
1287 return true;
1288 }
1290
1292 int64_t ByteOffset = C->getSExtValue();
1293 if (isDSOffsetLegal(SDValue(), ByteOffset)) {
1295
1296
1297
1298
1301
1302 if (isDSOffsetLegal(Sub, ByteOffset)) {
1306
1307
1308 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1309 if (Subtarget->hasAddNoCarry()) {
1310 SubOp = AMDGPU::V_SUB_U32_e64;
1312 CurDAG->getTargetConstant(0, {}, MVT::i1));
1313 }
1314
1315 MachineSDNode *MachineSub =
1316 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);
1317
1319 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
1320 return true;
1321 }
1322 }
1323 }
1325
1326
1327
1328
1329
1330 SDLoc DL(Addr);
1331
1332 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {
1334 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1335 DL, MVT::i32, Zero);
1337 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
1338 return true;
1339 }
1340 }
1341
1342
1343 Base = Addr;
1344 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1345 return true;
1346}
1347
1348bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
1349 unsigned Offset1,
1350 unsigned Size) const {
1351 if (Offset0 % Size != 0 || Offset1 % Size != 0)
1352 return false;
1354 return false;
1355
1356 if ( || Subtarget->hasUsableDSOffset() ||
1357 Subtarget->unsafeDSOffsetFoldingEnabled())
1358 return true;
1359
1360
1361
1363}
1364
1365
1371
1372
1373
1374
1375bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
1377 return true;
1378
1379
1380
1381 if (Subtarget->hasSignedScratchOffsets())
1382 return true;
1383
1386
1387
1388
1389
1390
1391 ConstantSDNode *ImmOp = nullptr;
1394 return true;
1395 }
1396
1397 return CurDAG->SignBitIsZero(LHS);
1398}
1399
1400
1401
1402bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {
1404 return true;
1405
1406
1407
1408 if (Subtarget->hasSignedScratchOffsets())
1409 return true;
1410
1414}
1415
1416
1417
1418bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {
1419
1420
1422 return true;
1423
1426
1427
1428
1429
1432 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1433 return true;
1434
1435 auto LHS = Base.getOperand(0);
1436 auto RHS = Base.getOperand(1);
1438}
1439
1440
1441bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
1443 SDValue &Offset1) const {
1444 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);
1445}
1446
1447bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,
1449 SDValue &Offset1) const {
1450 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);
1451}
1452
1453bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
1455 unsigned Size) const {
1456 SDLoc DL(Addr);
1457
1458 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1463 unsigned OffsetValue1 = OffsetValue0 + Size;
1464
1465
1466 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {
1468 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i32);
1469 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i32);
1470 return true;
1471 }
1473
1474 if (const ConstantSDNode *C =
1476 unsigned OffsetValue0 = C->getZExtValue();
1477 unsigned OffsetValue1 = OffsetValue0 + Size;
1478
1479 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1480 SDLoc DL(Addr);
1482
1483
1484
1485
1488
1489 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {
1493 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1494 if (Subtarget->hasAddNoCarry()) {
1495 SubOp = AMDGPU::V_SUB_U32_e64;
1497 CurDAG->getTargetConstant(0, {}, MVT::i1));
1498 }
1499
1500 MachineSDNode *MachineSub = CurDAG->getMachineNode(
1502
1504 Offset0 =
1505 CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i32);
1506 Offset1 =
1507 CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i32);
1508 return true;
1509 }
1510 }
1511 }
1513 unsigned OffsetValue0 = CAddr->getZExtValue();
1514 unsigned OffsetValue1 = OffsetValue0 + Size;
1515
1516 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {
1518 MachineSDNode *MovZero =
1519 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);
1521 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i32);
1522 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i32);
1523 return true;
1524 }
1525 }
1526
1527
1528
1529 Base = Addr;
1530 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i32);
1531 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i32);
1532 return true;
1533}
1534
1538 SDValue &Addr64) const {
1539
1540
1541 if (Subtarget->useFlatForGlobal())
1542 return false;
1543
1544 SDLoc DL(Addr);
1545
1546 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1547 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
1548 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
1549 SOffset = Subtarget->hasRestrictedSOffset()
1550 ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1551 : CurDAG->getTargetConstant(0, DL, MVT::i32);
1552
1553 ConstantSDNode *C1 = nullptr;
1555 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1559 else
1560 C1 = nullptr;
1561 }
1562
1564
1565
1568 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1569
1572
1573
1574 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1575 VAddr = N0;
1576 } else {
1577
1578 Ptr = N3;
1579 VAddr = N2;
1580 }
1581 } else {
1582
1583 Ptr = N2;
1584 VAddr = N3;
1585 }
1586 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1588
1589
1590 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);
1591 VAddr = N0;
1592 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
1593 } else {
1594
1595
1596 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
1597 Ptr = N0;
1598 }
1599
1600 if (!C1) {
1601
1602 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1603 return true;
1604 }
1605
1606 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1608
1610 return true;
1611 }
1612
1613
1614 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1615 SOffset =
1617 AMDGPU::S_MOV_B32, DL, MVT::i32,
1619 0);
1620 return true;
1621}
1622
1623bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
1626 SDValue Ptr, Offen, Idxen, Addr64;
1627
1628
1629
1630 if (!Subtarget->hasAddr64())
1631 return false;
1632
1633 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1634 return false;
1635
1637 if (C->getSExtValue()) {
1638 SDLoc DL(Addr);
1639
1640 const SITargetLowering& Lowering =
1642
1644 return true;
1645 }
1646
1647 return false;
1648}
1649
1650std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
1652
1655 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
1656
1657
1658
1659
1660
1661 return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
1662}
1663
1664bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
1667 SDValue &ImmOffset) const {
1668
1669 SDLoc DL(Addr);
1670 MachineFunction &MF = CurDAG->getMachineFunction();
1671 const SIMachineFunctionInfo *Info = MF.getInfo();
1672
1673 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1674
1676 int64_t Imm = CAddr->getSExtValue();
1677 const int64_t NullPtr =
1679
1680 if (Imm != NullPtr) {
1683 CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
1684 MachineSDNode *MovHighBits = CurDAG->getMachineNode(
1685 AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
1686 VAddr = SDValue(MovHighBits, 0);
1687
1688 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1689 ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);
1690 return true;
1691 }
1692 }
1693
1694 if (CurDAG->isBaseWithConstantOffset(Addr)) {
1695
1696
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1716 if (TII->isLegalMUBUFImmOffset(C1) &&
1717 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1718 CurDAG->SignBitIsZero(N0))) {
1719 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1720 ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32);
1721 return true;
1722 }
1723 }
1724
1725
1726 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1727 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1728 return true;
1729}
1730
1733 return false;
1735 if (.isPhysical())
1736 return false;
1737 const auto *RC = TRI.getPhysRegBaseClass(Reg);
1738 return RC && TRI.isSGPRClass(RC);
1739}
1740
1741bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
1746 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
1747 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1748 MachineFunction &MF = CurDAG->getMachineFunction();
1749 const SIMachineFunctionInfo *Info = MF.getInfo();
1750 SDLoc DL(Addr);
1751
1752
1754 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1755 SOffset = Addr;
1756 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1757 return true;
1758 }
1759
1760 ConstantSDNode *CAddr;
1762
1764 if (!CAddr || ->isLegalMUBUFImmOffset(CAddr->getZExtValue()))
1765 return false;
1767 return false;
1768
1772
1773 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
1774 } else {
1775 return false;
1776 }
1777
1778 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
1779
1781 return true;
1782}
1783
1784bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
1786 ) const {
1787 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1788 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1789
1790 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
1791 return false;
1792
1796 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
1798 SDLoc DL(Addr);
1799
1800 const SITargetLowering& Lowering =
1802
1804 return true;
1805 }
1806 return false;
1807}
1808
1809bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
1810 SDValue &SOffset) const {
1811 if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {
1812 SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1813 return true;
1814 }
1815
1816 SOffset = ByteOffsetNode;
1817 return true;
1818}
1819
1820
1821
1825 return MN;
1827 for (SDValue V : N->op_values())
1830 return MN;
1832}
1833
1834bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
1836 uint64_t FlatVariant) const {
1837 int64_t OffsetVal = 0;
1838
1840
1841 bool CanHaveFlatSegmentOffsetBug =
1842 Subtarget->hasFlatSegmentOffsetBug() &&
1845
1846 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1848 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1850 isFlatScratchBaseLegal(Addr))) {
1852
1853
1854
1855
1856 bool IsInBounds =
1858 if (COffsetVal == 0 || FlatVariant != SIInstrFlags::FLAT || IsInBounds) {
1859 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1860 if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1861 Addr = N0;
1862 OffsetVal = COffsetVal;
1863 } else {
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1875 uint64_t RemainderOffset;
1876
1877 std::tie(OffsetVal, RemainderOffset) =
1878 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1879
1881 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
1882 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
1883
1888 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1889 if (Subtarget->hasAddNoCarry()) {
1890 AddOp = AMDGPU::V_ADD_U32_e64;
1892 }
1893 Addr =
1894 SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);
1895 } else {
1896
1897
1899 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
1901 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
1902
1903 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1904 DL, MVT::i32, N0, Sub0);
1905 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1906 DL, MVT::i32, N0, Sub1);
1907
1909 getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
1910
1911 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
1912
1913 SDNode *Add =
1914 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
1915 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
1916
1917 SDNode *Addc = CurDAG->getMachineNode(
1918 AMDGPU::V_ADDC_U32_e64, DL, VTs,
1920
1921 SDValue RegSequenceArgs[] = {
1922 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL,
1923 MVT::i32),
1925
1926 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
1927 MVT::i64, RegSequenceArgs),
1928 0);
1929 }
1930 }
1931 }
1932 }
1933 }
1934
1935 VAddr = Addr;
1936 Offset = CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1937 return true;
1938}
1939
1940bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,
1944}
1945
1946bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,
1950}
1951
1952bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,
1955 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,
1957}
1958
1959
1960
1963 if (Op.getValueType() == MVT::i32)
1964 return Op;
1965
1971
1972 SDValue ExtSrc = Op.getOperand(0);
1974}
1975
1976
1977
1978bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
1981 bool NeedIOffset) const {
1982 int64_t ImmOffset = 0;
1983 ScaleOffset = false;
1984
1985
1986
1987
1989 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
1991 const SIInstrInfo *TII = Subtarget->getInstrInfo();
1992
1993 if (NeedIOffset &&
1996 Addr = LHS;
1997 ImmOffset = COffsetVal;
1998 } else if (->isDivergent()) {
1999 if (COffsetVal > 0) {
2000 SDLoc SL(N);
2001
2002
2003
2004 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2005 if (NeedIOffset) {
2006 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
2008 }
2009
2010 if (Subtarget->hasSignedGVSOffset() ? isInt<32>(RemainderOffset)
2012 SDNode *VMov = CurDAG->getMachineNode(
2013 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2014 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2015 VOffset = SDValue(VMov, 0);
2016 SAddr = LHS;
2017 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2018 return true;
2019 }
2020 }
2021
2022
2023
2024
2025
2026
2027 unsigned NumLiterals =
2028 ->isInlineConstant(APInt(32, Lo_32(COffsetVal))) +
2029 ->isInlineConstant(APInt(32, Hi_32(COffsetVal)));
2030 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2031 return false;
2032 }
2033 }
2034
2035
2038
2039 if (->isDivergent()) {
2040
2042 ScaleOffset = SelectScaleOffset(N, RHS, Subtarget->hasSignedGVSOffset());
2044 RHS, Subtarget->hasSignedGVSOffset(), CurDAG)) {
2045 SAddr = LHS;
2046 VOffset = ExtRHS;
2047 }
2048 }
2049
2051 if (!SAddr && ->isDivergent()) {
2052
2053 ScaleOffset = SelectScaleOffset(N, LHS, Subtarget->hasSignedGVSOffset());
2055 LHS, Subtarget->hasSignedGVSOffset(), CurDAG)) {
2056 SAddr = RHS;
2057 VOffset = ExtLHS;
2058 }
2059 }
2060
2061 if (SAddr) {
2062 Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2063 return true;
2064 }
2065 }
2066
2067 if (Subtarget->hasScaleOffset() &&
2068 (Addr.getOpcode() == (Subtarget->hasSignedGVSOffset()
2076
2077 unsigned Size =
2078 (unsigned)cast(N)->getMemoryVT().getFixedSizeInBits() / 8;
2080 if (ScaleOffset) {
2083 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2084 return true;
2085 }
2086 }
2087
2090 return false;
2091
2092
2093
2094 SAddr = Addr;
2095 SDNode *VMov =
2096 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2097 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2098 VOffset = SDValue(VMov, 0);
2099 Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2100 return true;
2101}
2102
2103bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,
2107 bool ScaleOffset;
2108 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
2109 return false;
2110
2112 SDLoc(), MVT::i32);
2113 return true;
2114}
2115
2116bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(SDNode *N, SDValue Addr,
2120 bool ScaleOffset;
2121 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
2122 return false;
2123
2124
2125 auto PassedCPol =
2126 N->getConstantOperandVal(N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2128 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);
2129 return true;
2130}
2131
2132bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(SDNode *N, SDValue Addr,
2137 bool ScaleOffset;
2138 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
2139 return false;
2140
2141
2142 auto PassedCPol =
2143 N->getConstantOperandVal(N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2145 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);
2146 return true;
2147}
2148
2149bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(SDNode *N, SDValue Addr,
2153 bool ScaleOffset;
2154 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))
2155 return false;
2156
2158 CPol = CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2159 return true;
2160}
2161
2162bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(SDNode *N, SDValue Addr,
2166 bool ScaleOffset;
2168 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2169 false))
2170 return false;
2171
2172
2173 auto PassedCPol =
2174 N->getConstantOperandVal(N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2176 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);
2177 return true;
2178}
2179
2180bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(SDNode *N, SDValue Addr,
2184 bool ScaleOffset;
2186 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2187 false))
2188 return false;
2189
2190
2191 auto PassedCPol =
2194 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);
2195 return true;
2196}
2197
2200 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
2203
2204
2207 FI->getValueType(0));
2209 MVT::i32, TFI, SAddr.getOperand(1)),
2210 0);
2211 }
2212
2213 return SAddr;
2214}
2215
2216
2217bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
2221 return false;
2222
2223 SDLoc DL(Addr);
2224
2225 int64_t COffsetVal = 0;
2226
2227 if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2230 } else {
2231 SAddr = Addr;
2232 }
2233
2235
2236 const SIInstrInfo *TII = Subtarget->getInstrInfo();
2237
2240 int64_t SplitImmOffset, RemainderOffset;
2241 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
2243
2244 COffsetVal = SplitImmOffset;
2245
2248 ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)
2249 : CurDAG->getSignedTargetConstant(RemainderOffset, DL, MVT::i32);
2250 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,
2251 SAddr, AddOffset),
2252 0);
2253 }
2254
2255 Offset = CurDAG->getSignedTargetConstant(COffsetVal, DL, MVT::i32);
2256
2257 return true;
2258}
2259
2260
2261bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2262 SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {
2263 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2264 return false;
2265
2266
2267
2268
2269 KnownBits VKnown = CurDAG->computeKnownBits(VAddr);
2270 KnownBits SKnown =
2273 true)));
2276 return (VMax & 3) + (SMax & 3) >= 4;
2277}
2278
2279bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
2283 int64_t ImmOffset = 0;
2284
2286 SDValue OrigAddr = Addr;
2287 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
2289 const SIInstrInfo *TII = Subtarget->getInstrInfo();
2290
2293 Addr = LHS;
2294 ImmOffset = COffsetVal;
2295 } else if (->isDivergent() && COffsetVal > 0) {
2296 SDLoc SL(N);
2297
2298
2299 int64_t SplitImmOffset, RemainderOffset;
2300 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(
2302
2304 SDNode *VMov = CurDAG->getMachineNode(
2305 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2306 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2307 VAddr = SDValue(VMov, 0);
2308 SAddr = LHS;
2309 if (!isFlatScratchBaseLegal(Addr))
2310 return false;
2311 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2312 return false;
2313 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2314 CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2315 return true;
2316 }
2317 }
2318 }
2319
2321 return false;
2322
2325
2326 if (->isDivergent() && RHS->isDivergent()) {
2327 SAddr = LHS;
2328 VAddr = RHS;
2329 } else if (->isDivergent() && LHS->isDivergent()) {
2330 SAddr = RHS;
2331 VAddr = LHS;
2332 } else {
2333 return false;
2334 }
2335
2336 if (OrigAddr != Addr) {
2337 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2338 return false;
2339 } else {
2340 if (!isFlatScratchBaseLegalSV(OrigAddr))
2341 return false;
2342 }
2343
2344 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2345 return false;
2347 Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2348
2349 bool ScaleOffset = SelectScaleOffset(N, VAddr, true );
2351 SDLoc(), MVT::i32);
2352 return true;
2353}
2354
2355
2356
2357
2358bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,
2359 bool Imm32Only,
2360 bool IsBuffer,
2361 int64_t ImmOffset) const {
2362 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2364 KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);
2366 return false;
2367 }
2368
2369 return true;
2370}
2371
2372
2373
2374
2376 bool IsSigned) const {
2377 bool ScaleOffset = false;
2378 if (!Subtarget->hasScaleOffset() || )
2379 return false;
2380
2381 unsigned Size =
2382 (unsigned)cast(N)->getMemoryVT().getFixedSizeInBits() / 8;
2383
2386 Off = Ext;
2387
2390 ScaleOffset = C->getZExtValue() == Log2_32(Size);
2392 (IsSigned && Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2393 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2394 (Offset.isMachineOpcode() &&
2395 Offset.getMachineOpcode() ==
2396 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2397 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2399 ScaleOffset = C->getZExtValue() == Size;
2400 }
2401
2402 if (ScaleOffset)
2404
2405 return ScaleOffset;
2406}
2407
2408
2409
2410
2411bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode,
2413 bool Imm32Only, bool IsBuffer,
2414 bool HasSOffset, int64_t ImmOffset,
2415 bool *ScaleOffset) const {
2417 "Cannot match both soffset and offset at the same time!");
2418
2419 if (ScaleOffset) {
2421
2422 *ScaleOffset = SelectScaleOffset(N, ByteOffsetNode, false );
2423 }
2424
2426 if () {
2427 if (!SOffset)
2428 return false;
2429
2432 *SOffset = ByteOffsetNode;
2433 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2434 ImmOffset);
2435 }
2438 *SOffset = ByteOffsetNode.getOperand(0);
2439 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2440 ImmOffset);
2441 }
2442 }
2443 return false;
2444 }
2445
2446 SDLoc SL(ByteOffsetNode);
2447
2448
2449
2450 int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();
2452 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2453 if (EncodedOffset && Offset && !Imm32Only) {
2454 *Offset = CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2455 return true;
2456 }
2457
2458
2459 if (ByteOffset < 0)
2460 return false;
2461
2463 if (EncodedOffset && Offset && Imm32Only) {
2464 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2465 return true;
2466 }
2467
2469 return false;
2470
2471 if (SOffset) {
2472 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2474 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2475 return true;
2476 }
2477
2478 return false;
2479}
2480
2481SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
2483 return Addr;
2484
2485
2486 SDLoc SL(Addr);
2487
2488 const MachineFunction &MF = CurDAG->getMachineFunction();
2489 const SIMachineFunctionInfo *Info = MF.getInfo();
2490 unsigned AddrHiVal = Info->get32BitAddressHighBits();
2491 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2492
2494 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2495 Addr,
2496 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2497 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2498 0),
2499 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2500 };
2501
2502 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2503 Ops), 0);
2504}
2505
2506
2507
2508
2509bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDNode *N, SDValue Addr,
2512 bool IsBuffer, bool HasSOffset,
2513 int64_t ImmOffset,
2514 bool *ScaleOffset) const {
2515 if (SOffset && Offset) {
2516 assert(!Imm32Only && !IsBuffer);
2518
2519 if (!SelectSMRDBaseOffset(N, Addr, B, nullptr, Offset, false, false, true))
2520 return false;
2521
2522 int64_t ImmOff = 0;
2524 ImmOff = C->getSExtValue();
2525
2526 return SelectSMRDBaseOffset(N, B, SBase, SOffset, nullptr, false, false,
2527 true, ImmOff, ScaleOffset);
2528 }
2529
2530
2531
2534 return false;
2535
2537
2543 }
2544 if (!N0 || !N1)
2545 return false;
2546
2547 if (SelectSMRDOffset(N, N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2548 ImmOffset, ScaleOffset)) {
2550 return true;
2551 }
2552 if (SelectSMRDOffset(N, N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,
2553 ImmOffset, ScaleOffset)) {
2555 return true;
2556 }
2557 return false;
2558}
2559
2562 bool Imm32Only, bool *ScaleOffset) const {
2563 if (SelectSMRDBaseOffset(N, Addr, SBase, SOffset, Offset, Imm32Only,
2564 false, false,
2565 0, ScaleOffset)) {
2566 SBase = Expand32BitAddress(SBase);
2567 return true;
2568 }
2569
2571 SBase = Expand32BitAddress(Addr);
2572 *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2573 return true;
2574 }
2575
2576 return false;
2577}
2578
2579bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
2581 return SelectSMRD( nullptr, Addr, SBase, nullptr,
2583}
2584
2585bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
2588 return SelectSMRD( nullptr, Addr, SBase, nullptr,
2589 &Offset, true);
2590}
2591
2594 bool ScaleOffset;
2595 if (!SelectSMRD(N, Addr, SBase, &SOffset, nullptr,
2596 false, &ScaleOffset))
2597 return false;
2598
2600 SDLoc(N), MVT::i32);
2601 return true;
2602}
2603
2604bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDNode *N, SDValue Addr,
2608 bool ScaleOffset;
2609 if (!SelectSMRD(N, Addr, SBase, &SOffset, &Offset, false, &ScaleOffset))
2610 return false;
2611
2613 SDLoc(N), MVT::i32);
2614 return true;
2615}
2616
2618 return SelectSMRDOffset( nullptr, N, nullptr, &Offset,
2619 false, true);
2620}
2621
2622bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,
2625 return SelectSMRDOffset( nullptr, N, nullptr, &Offset,
2626 true, true);
2627}
2628
2629bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,
2631
2632
2633 return N.getValueType() == MVT::i32 &&
2634 SelectSMRDBaseOffset( nullptr, N, SOffset,
2635 nullptr, &Offset,
2636 false, true);
2637}
2638
2639bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
2642 SDLoc DL(Index);
2643
2644 if (CurDAG->isBaseWithConstantOffset(Index)) {
2648
2649
2650
2651
2652
2657 return true;
2658 }
2659 }
2660
2662 return false;
2663
2665 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
2666 return true;
2667}
2668
2669SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
2671 uint32_t Width) {
2673 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2676
2677 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
2678 }
2679 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2680
2681
2682
2683 uint32_t PackedVal = Offset | (Width << 16);
2684 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
2685
2686 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
2687}
2688
2689void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
2690
2691
2692
2693
2694 const SDValue &Shl = N->getOperand(0);
2697
2699 uint32_t BVal = B->getZExtValue();
2700 uint32_t CVal = C->getZExtValue();
2701
2702 if (0 < BVal && BVal <= CVal && CVal < 32) {
2705 32 - CVal));
2706 return;
2707 }
2708 }
2709 SelectCode(N);
2710}
2711
2712void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
2713 switch (N->getOpcode()) {
2715 if (N->getOperand(0).getOpcode() == ISD::SRL) {
2716
2717
2718 const SDValue &Srl = N->getOperand(0);
2721
2722 if (Shift && Mask) {
2724 uint32_t MaskVal = Mask->getZExtValue();
2725
2729 WidthVal));
2730 return;
2731 }
2732 }
2733 }
2734 break;
2736 if (N->getOperand(0).getOpcode() == ISD::AND) {
2737
2738
2742
2743 if (Shift && Mask) {
2745 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
2746
2749 ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
2750 WidthVal));
2751 return;
2752 }
2753 }
2754 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {
2755 SelectS_BFEFromShifts(N);
2756 return;
2757 }
2758 break;
2760 if (N->getOperand(0).getOpcode() == ISD::SHL) {
2761 SelectS_BFEFromShifts(N);
2762 return;
2763 }
2764 break;
2765
2767
2768 SDValue Src = N->getOperand(0);
2769 if (Src.getOpcode() != ISD::SRL)
2770 break;
2771
2773 if (!Amt)
2774 break;
2775
2776 unsigned Width = cast(N->getOperand(1))->getVT().getSizeInBits();
2777 ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
2779 return;
2780 }
2781 }
2782
2783 SelectCode(N);
2784}
2785
2786bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2787 assert(N->getOpcode() == ISD::BRCOND);
2788 if (->hasOneUse())
2789 return false;
2790
2794
2796 return false;
2797
2798 MVT VT = Cond.getOperand(0).getSimpleValueType();
2799 if (VT == MVT::i32)
2800 return true;
2801
2802 if (VT == MVT::i64) {
2805 Subtarget->hasScalarCompareEq64();
2806 }
2807
2808 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2809 return true;
2810
2811 return false;
2812}
2813
2816
2817
2818
2819
2820
2821
2822
2823
2824
2828
2832
2835 return Cond;
2836 }
2837 }
2839}
2840
2841void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2843
2844 if (Cond.isUndef()) {
2845 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2846 N->getOperand(2), N->getOperand(0));
2847 return;
2848 }
2849
2850 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
2851
2852 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
2853 bool AndExec = !UseSCCBr;
2854 bool Negate = false;
2855
2857 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2862
2864
2865
2866
2867
2868
2869
2870
2872 bool NegatedBallot = false;
2874 Cond = BallotCond;
2875 UseSCCBr = !BallotCond->isDivergent();
2876 Negate = Negate ^ NegatedBallot;
2877 } else {
2878
2879
2880 Cond = VCMP;
2881 UseSCCBr = false;
2882 }
2883 }
2884
2885
2886
2887 AndExec = false;
2888 }
2889
2890 unsigned BrOp =
2891 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2892 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2893 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();
2894 SDLoc SL(N);
2895
2896 if (AndExec) {
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2911 CurDAG->getMachineNode(
2912 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2913 MVT::i1,
2914 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2915 : AMDGPU::EXEC,
2916 MVT::i1),
2918 0);
2919 }
2920
2921 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
2922 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
2923 N->getOperand(2),
2925}
2926
2927void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {
2928 if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&
2929 ->isDivergent()) {
2930 SDValue Src = N->getOperand(0);
2931 if (Src.getValueType() == MVT::f16) {
2933 CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),
2934 {Src});
2935 return;
2936 }
2937 }
2938 }
2939
2940 SelectCode(N);
2941}
2942
2943void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
2944
2945
2946 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2947 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2948
2949 SDValue Chain = N->getOperand(0);
2952 MachineMemOperand *MMO = M->getMemOperand();
2954
2956 if (CurDAG->isBaseWithConstantOffset(Ptr)) {
2959
2960 const APInt &OffsetVal = PtrOffset->getAsAPIntVal();
2961 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
2962 N = glueCopyToM0(N, PtrBase);
2963 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
2964 }
2965 }
2966
2968 N = glueCopyToM0(N, Ptr);
2969 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2970 }
2971
2974 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
2975 Chain,
2976 N->getOperand(N->getNumOperands() - 1)
2977 };
2978
2979 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
2981}
2982
2983
2984
2985void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N, unsigned IntrID) {
2986 unsigned Opc;
2987 switch (IntrID) {
2988 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2989 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2990 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2991 break;
2992 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2993 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2994 break;
2995 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2996 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2997 break;
2998 }
2999 SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),
3000 N->getOperand(5), N->getOperand(0)};
3001
3003 MachineMemOperand *MMO = M->getMemOperand();
3004 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
3006}
3007
3009 switch (IntrID) {
3010 case Intrinsic::amdgcn_ds_gws_init:
3011 return AMDGPU::DS_GWS_INIT;
3012 case Intrinsic::amdgcn_ds_gws_barrier:
3013 return AMDGPU::DS_GWS_BARRIER;
3014 case Intrinsic::amdgcn_ds_gws_sema_v:
3015 return AMDGPU::DS_GWS_SEMA_V;
3016 case Intrinsic::amdgcn_ds_gws_sema_br:
3017 return AMDGPU::DS_GWS_SEMA_BR;
3018 case Intrinsic::amdgcn_ds_gws_sema_p:
3019 return AMDGPU::DS_GWS_SEMA_P;
3020 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3021 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3022 default:
3024 }
3025}
3026
3027void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
3028 if (!Subtarget->hasGWS() ||
3029 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3030 !Subtarget->hasGWSSemaReleaseAll())) {
3031
3032 SelectCode(N);
3033 return;
3034 }
3035
3036
3037 const bool HasVSrc = N->getNumOperands() == 4;
3038 assert(HasVSrc || N->getNumOperands() == 3);
3039
3040 SDLoc SL(N);
3041 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);
3042 int ImmOffset = 0;
3044 MachineMemOperand *MMO = M->getMemOperand();
3045
3046
3047
3048
3049
3050
3051
3053
3054
3055
3056
3057 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));
3058 ImmOffset = ConstOffset->getZExtValue();
3059 } else {
3060 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3062 BaseOffset = BaseOffset.getOperand(0);
3063 }
3064
3065
3066
3067
3068 SDNode *SGPROffset
3069 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3070 BaseOffset);
3071
3072 SDNode *M0Base
3073 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3075 CurDAG->getTargetConstant(16, SL, MVT::i32));
3076 glueCopyToM0(N, SDValue(M0Base, 0));
3077 }
3078
3080 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3081
3083
3084 const MCInstrDesc &InstrDesc = TII->get(Opc);
3085 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);
3086
3087 const TargetRegisterClass *DataRC = TII->getRegClass(InstrDesc, Data0Idx);
3088
3090 if (HasVSrc) {
3091 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
3092
3094 MVT DataVT = Data.getValueType().getSimpleVT();
3095 if (TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3096
3097 Ops.push_back(N->getOperand(2));
3098 } else {
3099
3100
3101 const SDValue RegSeqOps[] = {
3102 CurDAG->getTargetConstant(DataRC->getID(), SL, MVT::i32), Data,
3103 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3105 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3106 0),
3107 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3108
3109 Ops.push_back(SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
3110 SL, MVT::v2i32, RegSeqOps),
3111 0));
3112 }
3113 }
3114
3115 Ops.push_back(OffsetField);
3116 Ops.push_back(Chain);
3117
3118 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
3120}
3121
3122void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {
3123 if (Subtarget->getLDSBankCount() != 16) {
3124
3125 SelectCode(N);
3126 return;
3127 }
3128
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3151 N->getOperand(5), SDValue());
3152
3153 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);
3154
3155 SDNode *InterpMov =
3156 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {
3157 CurDAG->getTargetConstant(2, DL, MVT::i32),
3158 N->getOperand(3),
3159 N->getOperand(2),
3161 });
3162
3163 SDNode *InterpP1LV =
3164 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {
3165 CurDAG->getTargetConstant(0, DL, MVT::i32),
3166 N->getOperand(1),
3167 N->getOperand(3),
3168 N->getOperand(2),
3169 CurDAG->getTargetConstant(0, DL, MVT::i32),
3170 SDValue(InterpMov, 0),
3171 N->getOperand(4),
3172 CurDAG->getTargetConstant(0, DL, MVT::i1),
3173 CurDAG->getTargetConstant(0, DL, MVT::i32),
3175 });
3176
3178}
3179
3180void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
3181 unsigned IntrID = N->getConstantOperandVal(1);
3182 switch (IntrID) {
3183 case Intrinsic::amdgcn_ds_append:
3184 case Intrinsic::amdgcn_ds_consume: {
3185 if (N->getValueType(0) != MVT::i32)
3186 break;
3187 SelectDSAppendConsume(N, IntrID);
3188 return;
3189 }
3190 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3191 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3192 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3193 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3194 SelectDSBvhStackIntrinsic(N, IntrID);
3195 return;
3196 case Intrinsic::amdgcn_init_whole_wave:
3197 CurDAG->getMachineFunction()
3198 .getInfo()
3199 ->setInitWholeWave();
3200 break;
3201 }
3202
3203 SelectCode(N);
3204}
3205
3206void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
3207 unsigned IntrID = N->getConstantOperandVal(0);
3208 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3209 SDNode *ConvGlueNode = N->getGluedNode();
3210 if (ConvGlueNode) {
3211
3212 assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);
3214 ConvGlueNode =
3215 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3216 MVT::Glue, SDValue(ConvGlueNode, 0));
3217 } else {
3218 ConvGlueNode = nullptr;
3219 }
3220 switch (IntrID) {
3221 case Intrinsic::amdgcn_wqm:
3222 Opcode = AMDGPU::WQM;
3223 break;
3224 case Intrinsic::amdgcn_softwqm:
3225 Opcode = AMDGPU::SOFT_WQM;
3226 break;
3227 case Intrinsic::amdgcn_wwm:
3228 case Intrinsic::amdgcn_strict_wwm:
3229 Opcode = AMDGPU::STRICT_WWM;
3230 break;
3231 case Intrinsic::amdgcn_strict_wqm:
3232 Opcode = AMDGPU::STRICT_WQM;
3233 break;
3234 case Intrinsic::amdgcn_interp_p1_f16:
3235 SelectInterpP1F16(N);
3236 return;
3237 case Intrinsic::amdgcn_permlane16_swap:
3238 case Intrinsic::amdgcn_permlane32_swap: {
3239 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3240 !Subtarget->hasPermlane16Swap()) ||
3241 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3242 !Subtarget->hasPermlane32Swap())) {
3243 SelectCode(N);
3244 return;
3245 }
3246
3247 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3248 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3249 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3250
3252 if (ConvGlueNode)
3253 NewOps.push_back(SDValue(ConvGlueNode, 0));
3254
3255 bool FI = N->getConstantOperandVal(3);
3256 NewOps[2] = CurDAG->getTargetConstant(
3258
3259 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), NewOps);
3260 return;
3261 }
3262 default:
3263 SelectCode(N);
3264 break;
3265 }
3266
3267 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3268 SDValue Src = N->getOperand(1);
3269 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
3270 }
3271
3272 if (ConvGlueNode) {
3274 NewOps.push_back(SDValue(ConvGlueNode, 0));
3275 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);
3276 }
3277}
3278
3279void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
3280 unsigned IntrID = N->getConstantOperandVal(1);
3281 switch (IntrID) {
3282 case Intrinsic::amdgcn_ds_gws_init:
3283 case Intrinsic::amdgcn_ds_gws_barrier:
3284 case Intrinsic::amdgcn_ds_gws_sema_v:
3285 case Intrinsic::amdgcn_ds_gws_sema_br:
3286 case Intrinsic::amdgcn_ds_gws_sema_p:
3287 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3288 SelectDS_GWS(N, IntrID);
3289 return;
3290 default:
3291 break;
3292 }
3293
3294 SelectCode(N);
3295}
3296
3297void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) {
3299 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);
3300 CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),
3301 {N->getOperand(0), Log2WaveSize});
3302}
3303
3304void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) {
3305 SDValue SrcVal = N->getOperand(1);
3307 SelectCode(N);
3308 return;
3309 }
3310
3312 Register SP = TLI->getStackPointerRegisterToSaveRestore();
3313 SDLoc SL(N);
3314
3317 } else {
3318 SDValue Log2WaveSize = CurDAG->getTargetConstant(
3319 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3320
3321 if (N->isDivergent()) {
3322 SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3323 MVT::i32, SrcVal),
3324 0);
3325 }
3326
3327 CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3328 {SrcVal, Log2WaveSize}),
3329 0);
3330 }
3331
3332 SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);
3333 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);
3334}
3335
3336bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
3337 unsigned &Mods,
3338 bool IsCanonicalizing,
3339 bool AllowAbs) const {
3341 Src = In;
3342
3343 if (Src.getOpcode() == ISD::FNEG) {
3345 Src = Src.getOperand(0);
3346 } else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {
3347
3348
3350 if (LHS && LHS->isZero()) {
3352 Src = Src.getOperand(1);
3353 }
3354 }
3355
3356 if (AllowAbs && Src.getOpcode() == ISD::FABS) {
3358 Src = Src.getOperand(0);
3359 }
3360
3362 return true;
3363
3364
3365
3366
3367
3368
3369
3370
3371 if (IsCanonicalizing)
3372 return true;
3373
3374
3375
3376
3379
3380
3381
3382
3383
3384
3386 EVT VT = Src.getValueType();
3388 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3389 return true;
3390
3392 if (!CRHS)
3393 return true;
3394
3395 auto ReplaceSrc = [&]() -> SDValue {
3397 return Src.getOperand(0);
3398
3402 Src.getValueType(), LHS, Index);
3403 };
3404
3405
3406
3407
3408
3409
3412 Src = ReplaceSrc();
3413 } else if (Opc == ISD::AND && AllowAbs &&
3416 Src = ReplaceSrc();
3419 Src = ReplaceSrc();
3420 }
3421
3422 return true;
3423}
3424
3425bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
3426 SDValue &SrcMods) const {
3427 unsigned Mods;
3428 if (SelectVOP3ModsImpl(In, Src, Mods, true,
3429 true)) {
3430 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3431 return true;
3432 }
3433
3434 return false;
3435}
3436
3437bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3439 unsigned Mods;
3440 if (SelectVOP3ModsImpl(In, Src, Mods, false,
3441 true)) {
3442 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3443 return true;
3444 }
3445
3446 return false;
3447}
3448
3449bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
3450 SDValue &SrcMods) const {
3451 unsigned Mods;
3452 if (SelectVOP3ModsImpl(In, Src, Mods,
3453 true,
3454 false)) {
3455 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3456 return true;
3457 }
3458
3459 return false;
3460}
3461
3462bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
3463 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
3464 return false;
3465
3466 Src = In;
3467 return true;
3468}
3469
3470bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
3472 bool OpSel) const {
3473 unsigned Mods;
3474 if (SelectVOP3ModsImpl(In, Src, Mods,
3475 true,
3476 false)) {
3477 if (OpSel)
3479 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3480 return true;
3481 }
3482
3483 return false;
3484}
3485
3486bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,
3487 SDValue &SrcMods) const {
3488 return SelectVINTERPModsImpl(In, Src, SrcMods, false);
3489}
3490
3491bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,
3492 SDValue &SrcMods) const {
3493 return SelectVINTERPModsImpl(In, Src, SrcMods, true);
3494}
3495
3496bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
3499 SDLoc DL(In);
3500 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
3501 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
3502
3503 return SelectVOP3Mods(In, Src, SrcMods);
3504}
3505
3506bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,
3509 SDLoc DL(In);
3510 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
3511 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
3512
3513 return SelectVOP3BMods(In, Src, SrcMods);
3514}
3515
3516bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
3518 Src = In;
3519
3520 SDLoc DL(In);
3521 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
3522 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
3523
3524 return true;
3525}
3526
3527bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
3528 SDValue &SrcMods, bool IsDOT) const {
3530 Src = In;
3531
3532
3533 if (Src.getOpcode() == ISD::FNEG) {
3535 Src = Src.getOperand(0);
3536 }
3537
3538 if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&
3539 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3540 unsigned VecMods = Mods;
3541
3542 SDValue Lo = stripBitcast(Src.getOperand(0));
3543 SDValue Hi = stripBitcast(Src.getOperand(1));
3544
3545 if (Lo.getOpcode() == ISD::FNEG) {
3546 Lo = stripBitcast(Lo.getOperand(0));
3548 }
3549
3550 if (Hi.getOpcode() == ISD::FNEG) {
3551 Hi = stripBitcast(Hi.getOperand(0));
3553 }
3554
3557
3560
3561 unsigned VecSize = Src.getValueSizeInBits();
3562 Lo = stripExtractLoElt(Lo);
3563 Hi = stripExtractLoElt(Hi);
3564
3565 if (Lo.getValueSizeInBits() > VecSize) {
3566 Lo = CurDAG->getTargetExtractSubreg(
3567 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3569 }
3570
3571 if (Hi.getValueSizeInBits() > VecSize) {
3572 Hi = CurDAG->getTargetExtractSubreg(
3573 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3575 }
3576
3577 assert(Lo.getValueSizeInBits() <= VecSize &&
3578 Hi.getValueSizeInBits() <= VecSize);
3579
3580 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
3581
3582
3583
3584 if (VecSize == Lo.getValueSizeInBits()) {
3585 Src = Lo;
3586 } else if (VecSize == 32) {
3587 Src = createVOP3PSrc32FromLo16(Lo, Src, CurDAG, Subtarget);
3588 } else {
3589 assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);
3590
3591 SDLoc SL(In);
3593 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3594 Lo.getValueType()), 0);
3595 auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3596 : AMDGPU::SReg_64RegClassID;
3598 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3599 Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3600 Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3601
3602 Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3603 Src.getValueType(), Ops), 0);
3604 }
3605 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3606 return true;
3607 }
3608
3611 .bitcastToAPInt().getZExtValue();
3613 Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);
3614 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3615 return true;
3616 }
3617 }
3618
3619 Mods = VecMods;
3621 Src.getNumOperands() == 2) {
3622
3623
3624
3625
3627 ArrayRef Mask = SVN->getMask();
3628
3629 if (Mask[0] < 2 && Mask[1] < 2) {
3630
3631 SDValue ShuffleSrc = SVN->getOperand(0);
3632
3633 if (ShuffleSrc.getOpcode() == ISD::FNEG) {
3634 ShuffleSrc = ShuffleSrc.getOperand(0);
3636 }
3637
3638 if (Mask[0] == 1)
3640 if (Mask[1] == 1)
3642
3643 Src = ShuffleSrc;
3644 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3645 return true;
3646 }
3647 }
3648
3649
3651
3652 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3653 return true;
3654}
3655
3656bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
3657 SDValue &SrcMods) const {
3658 return SelectVOP3PMods(In, Src, SrcMods, true);
3659}
3660
3661bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
3664 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
3665
3667 unsigned SrcVal = C->getZExtValue();
3668 if (SrcVal == 1)
3670
3671 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3672 return true;
3673}
3674
3678 unsigned DstRegClass;
3679 EVT DstTy;
3680 switch (Elts.size()) {
3681 case 8:
3682 DstRegClass = AMDGPU::VReg_256RegClassID;
3683 DstTy = MVT::v8i32;
3684 break;
3685 case 4:
3686 DstRegClass = AMDGPU::VReg_128RegClassID;
3687 DstTy = MVT::v4i32;
3688 break;
3689 case 2:
3690 DstRegClass = AMDGPU::VReg_64RegClassID;
3691 DstTy = MVT::v2i32;
3692 break;
3693 default:
3695 }
3696
3699 for (unsigned i = 0; i < Elts.size(); ++i) {
3700 Ops.push_back(Elts[i]);
3703 }
3704 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);
3705}
3706
3711 assert("unhandled Reg sequence size" &&
3712 (Elts.size() == 8 || Elts.size() == 16));
3713
3714
3715
3716 for (unsigned i = 0; i < Elts.size(); i += 2) {
3717 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3719 if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) {
3721 } else {
3724 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,
3725 {Elts[i + 1], Elts[i], PackLoLo});
3727 }
3728 }
3729
3731}
3732
3735 const SDLoc &DL, unsigned ElementSize) {
3736 if (ElementSize == 16)
3738 if (ElementSize == 32)
3741}
3742
3746 unsigned ElementSize) {
3747 if (ModOpcode == ISD::FNEG) {
3749
3751 for (auto El : Elts) {
3752 if (El.getOpcode() != ISD::FABS)
3753 break;
3754 NegAbsElts.push_back(El->getOperand(0));
3755 }
3756 if (Elts.size() != NegAbsElts.size()) {
3757
3759 } else {
3760
3763 }
3764 } else {
3765 assert(ModOpcode == ISD::FABS);
3766
3769 }
3770}
3771
3772
3773
3774static void
3776 std::function<bool(SDValue)> ModifierCheck) {
3777 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3778 if (auto *F16Pair =
3780 for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3781 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3782 if (!ModifierCheck(ElF16))
3783 break;
3784 }
3785 }
3786 }
3787}
3788
3789bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,
3790 SDValue &SrcMods) const {
3791 Src = In;
3793
3794
3797
3799 if (Element.getOpcode() != ISD::FNEG)
3800 return false;
3802 return true;
3803 });
3804
3805
3810 }
3811 }
3812
3813
3816 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3818
3819 if (ElV2f16.getOpcode() != ISD::FNEG)
3820 break;
3822 }
3823
3824
3829 }
3830 }
3831
3832 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3833 return true;
3834}
3835
3836bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,
3837 SDValue &SrcMods) const {
3838 Src = In;
3840 unsigned ModOpcode;
3841
3842
3846
3847 if (EltsF16.empty())
3848 ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3849 if (ElF16.getOpcode() != ModOpcode)
3850 return false;
3852 return true;
3853 });
3854
3855
3858 16);
3859 }
3860
3861
3864
3865 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3867
3868 if (EltsV2F16.empty())
3869 ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3870 if (ElV2f16->getOpcode() != ModOpcode)
3871 break;
3873 }
3874
3875
3878 32);
3879 }
3880
3881 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3882 return true;
3883}
3884
3885bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
3886 SDValue &SrcMods) const {
3887 Src = In;
3890
3893
3895 unsigned ModOpcode =
3896 (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;
3897 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {
3899 if (ElF32.getOpcode() != ModOpcode)
3900 break;
3902 }
3903
3904
3907 32);
3908 }
3909
3910 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3911 return true;
3912}
3913
3914bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const {
3916 BitVector UndefElements;
3918 if (isInlineImmediate(Splat.getNode())) {
3920 unsigned Imm = C->getAPIntValue().getSExtValue();
3921 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3922 return true;
3923 }
3925 unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();
3926 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
3927 return true;
3928 }
3930 }
3931 }
3932
3933
3934 SDValue SplatSrc32 = stripBitcast(In);
3936 if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
3937 SDValue SplatSrc16 = stripBitcast(Splat32);
3939 if (SDValue Splat = SplatSrc16BV->getSplatValue()) {
3940 const SIInstrInfo *TII = Subtarget->getInstrInfo();
3941 std::optional RawValue;
3943 RawValue = C->getValueAPF().bitcastToAPInt();
3945 RawValue = C->getAPIntValue();
3946
3947 if (RawValue.has_value()) {
3948 EVT VT = In.getValueType().getScalarType();
3953 RawValue.value());
3954 if (TII->isInlineConstant(FloatVal)) {
3955 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3956 MVT::i16);
3957 return true;
3958 }
3959 } else if (VT.getSimpleVT() == MVT::i16) {
3960 if (TII->isInlineConstant(RawValue.value())) {
3961 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
3962 MVT::i16);
3963 return true;
3964 }
3965 } else
3967 }
3968 }
3969 }
3970
3971 return false;
3972}
3973
3974bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,
3975 SDValue &IndexKey) const {
3976 unsigned Key = 0;
3977 Src = In;
3978
3980 const llvm::SDValue &ShiftSrc = In.getOperand(0);
3985 Src = ShiftSrc;
3986 }
3987 }
3988
3989 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
3990 return true;
3991}
3992
3993bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
3994 SDValue &IndexKey) const {
3995 unsigned Key = 0;
3996 Src = In;
3997
3999 const llvm::SDValue &ShiftSrc = In.getOperand(0);
4003 Key = 1;
4004 Src = ShiftSrc;
4005 }
4006 }
4007
4008 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
4009 return true;
4010}
4011
4012bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(SDValue In, SDValue &Src,
4013 SDValue &IndexKey) const {
4014 unsigned Key = 0;
4015 Src = In;
4016
4018
4020 const SDValue &ExtendSrc = In.getOperand(0);
4022 InI32 = ExtendSrc;
4023 } else if (In->getOpcode() == ISD::BITCAST) {
4024 const SDValue &CastSrc = In.getOperand(0);
4028 if (Zero && Zero->getZExtValue() == 0)
4030 }
4031 }
4032
4038 Key = 1;
4039 Src = ExtractVecEltSrc;
4040 }
4041 }
4042
4043 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
4044 return true;
4045}
4046
4047bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
4048 SDValue &SrcMods) const {
4049 Src = In;
4050
4051 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4052 return true;
4053}
4054
4055bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
4056 SDValue &SrcMods) const {
4057
4058 return SelectVOP3Mods(In, Src, SrcMods);
4059}
4060
4061
4062
4063
4064
4065
4066
4068 if (Op.getValueType() != MVT::f32 || Op.getOpcode() != ISD::BITCAST)
4071
4072 IsExtractHigh = false;
4075 if (!Low16 || !Low16->isZero())
4077 Op = stripBitcast(Op.getOperand(1));
4078 if (Op.getValueType() != MVT::bf16)
4080 return Op;
4081 }
4082
4083 if (Op.getValueType() != MVT::i32)
4085
4088 if (Mask->getZExtValue() == 0xffff0000) {
4089 IsExtractHigh = true;
4090 return Op.getOperand(0);
4091 }
4092 }
4094 }
4095
4099 return Op.getOperand(0);
4100 }
4101 }
4102
4104}
4105
4106
4107
4108bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
4109 unsigned &Mods,
4110 MVT VT) const {
4111 Mods = 0;
4112 SelectVOP3ModsImpl(In, Src, Mods);
4113
4114 bool IsExtractHigh = false;
4115 if (Src.getOpcode() == ISD::FP_EXTEND) {
4116 Src = Src.getOperand(0);
4117 } else if (VT == MVT::bf16) {
4119 if (!B16)
4120 return false;
4121 Src = B16;
4122 } else
4123 return false;
4124
4125 if (Src.getValueType() != VT &&
4126 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4127 return false;
4128
4129 Src = stripBitcast(Src);
4130
4131
4132
4134 unsigned ModsTmp;
4135 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4136
4139
4142 }
4143
4144
4145
4146
4147
4148
4150 if (Src.getValueSizeInBits() == 16) {
4153
4154
4155 return true;
4156 }
4157
4159 Src.getOperand(0).getValueType() == MVT::i32) {
4160 Src = Src.getOperand(0);
4161 return true;
4162 }
4163
4164 if (Subtarget->useRealTrue16Insts())
4165
4166 Src = createVOP3PSrc32FromLo16(Src, In, CurDAG, Subtarget);
4167 } else if (IsExtractHigh)
4169
4170 return true;
4171}
4172
4173bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
4174 SDValue &SrcMods) const {
4175 unsigned Mods = 0;
4176 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4177 return false;
4178 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4179 return true;
4180}
4181
4182bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
4183 SDValue &SrcMods) const {
4184 unsigned Mods = 0;
4185 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4186 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4187 return true;
4188}
4189
4190bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(SDValue In, SDValue &Src,
4191 SDValue &SrcMods) const {
4192 unsigned Mods = 0;
4193 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4194 return false;
4195 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4196 return true;
4197}
4198
4199bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src,
4200 SDValue &SrcMods) const {
4201 unsigned Mods = 0;
4202 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4203 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4204 return true;
4205}
4206
4207
4208
4211 unsigned NumOpcodes = 0;
4212 uint8_t LHSBits, RHSBits;
4213
4214 auto getOperandBits = [&Src, In](SDValue Op, uint8_t &Bits) -> bool {
4215
4216
4217
4218
4219
4220
4221
4222
4223
4224 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4225
4227 if (C->isAllOnes()) {
4228 Bits = 0xff;
4229 return true;
4230 }
4231 if (C->isZero()) {
4232 Bits = 0;
4233 return true;
4234 }
4235 }
4236
4237 for (unsigned I = 0; I < Src.size(); ++I) {
4238
4240 Bits = SrcBits[I];
4241 return true;
4242 }
4243
4244 if (Src[I] == In) {
4245 Bits = SrcBits[I];
4247 return true;
4248 }
4249 }
4250
4251 if (Src.size() == 3) {
4252
4253
4254
4257 if (C->isAllOnes()) {
4259 for (unsigned I = 0; I < Src.size(); ++I) {
4261 Bits = ~SrcBits[I];
4262 return true;
4263 }
4264 }
4265 }
4266 }
4267 }
4268
4269 return false;
4270 }
4271
4272 Bits = SrcBits[Src.size()];
4273 Src.push_back(Op);
4274 return true;
4275 };
4276
4277 switch (In.getOpcode()) {
4283
4285 if (!getOperandBits(LHS, LHSBits) ||
4286 !getOperandBits(RHS, RHSBits)) {
4287 Src = Backup;
4288 return std::make_pair(0, 0);
4289 }
4290
4291
4293 if (Op.first) {
4294 NumOpcodes += Op.first;
4295 LHSBits = Op.second;
4296 }
4297
4299 if (Op.first) {
4300 NumOpcodes += Op.first;
4301 RHSBits = Op.second;
4302 }
4303 break;
4304 }
4305 default:
4306 return std::make_pair(0, 0);
4307 }
4308
4310 switch (In.getOpcode()) {
4312 TTbl = LHSBits & RHSBits;
4313 break;
4315 TTbl = LHSBits | RHSBits;
4316 break;
4318 TTbl = LHSBits ^ RHSBits;
4319 break;
4320 default:
4321 break;
4322 }
4323
4324 return std::make_pair(NumOpcodes + 1, TTbl);
4325}
4326
4330 uint8_t TTbl;
4331 unsigned NumOpcodes;
4332
4333 std::tie(NumOpcodes, TTbl) = BitOp3_Op(In, Src);
4334
4335
4336
4337 if (NumOpcodes < 2 || Src.empty())
4338 return false;
4339
4340
4341
4342
4343 if (NumOpcodes < 4 && !In->isDivergent())
4344 return false;
4345
4346 if (NumOpcodes == 2 && In.getValueType() == MVT::i32) {
4347
4348
4349
4351 (In.getOperand(0).getOpcode() == In.getOpcode() ||
4352 In.getOperand(1).getOpcode() == In.getOpcode()))
4353 return false;
4354
4355 if (In.getOpcode() == ISD::OR &&
4356 (In.getOperand(0).getOpcode() == ISD::AND ||
4357 In.getOperand(1).getOpcode() == ISD::AND))
4358 return false;
4359 }
4360
4361
4362
4363
4364
4365
4366 while (Src.size() < 3)
4367 Src.push_back(Src[0]);
4368
4369 Src0 = Src[0];
4370 Src1 = Src[1];
4371 Src2 = Src[2];
4372
4373 Tbl = CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4374 return true;
4375}
4376
4377SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
4378 if (In.isUndef())
4379 return CurDAG->getUNDEF(MVT::i32);
4380
4382 SDLoc SL(In);
4383 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
4384 }
4385
4387 SDLoc SL(In);
4388 return CurDAG->getConstant(
4389 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4390 }
4391
4394 return Src;
4395
4397}
4398
4399bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
4400 assert(CurDAG->getTarget().getTargetTriple().isAMDGCN());
4401
4402 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4403 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4404
4405 unsigned Limit = 0;
4406 bool AllUsesAcceptSReg = true;
4407 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
4408 Limit < 10 && U != E; ++U, ++Limit) {
4409 const TargetRegisterClass *RC =
4410 getOperandRegClass(U->getUser(), U->getOperandNo());
4411
4412
4413
4414
4416 return false;
4417
4418 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4419 RC != &AMDGPU::VS_64_Align2RegClass) {
4420 AllUsesAcceptSReg = false;
4421 SDNode *User = U->getUser();
4422 if (User->isMachineOpcode()) {
4423 unsigned Opc = User->getMachineOpcode();
4424 const MCInstrDesc &Desc = SII->get(Opc);
4425 if (Desc.isCommutable()) {
4426 unsigned OpIdx = Desc.getNumDefs() + U->getOperandNo();
4429 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();
4430 const TargetRegisterClass *CommutedRC =
4431 getOperandRegClass(U->getUser(), CommutedOpNo);
4432 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4433 CommutedRC == &AMDGPU::VS_64RegClass ||
4434 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4435 AllUsesAcceptSReg = true;
4436 }
4437 }
4438 }
4439
4440
4441
4442
4443 if (!AllUsesAcceptSReg)
4444 break;
4445 }
4446 }
4447 return !AllUsesAcceptSReg && (Limit < 10);
4448}
4449
4450bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {
4452 const MachineMemOperand *MMO = Ld->getMemOperand();
4453
4454
4455
4456
4457
4458
4459
4461 return false;
4462
4464 Ld->getAlign() >=
4466 uint64_t(4))) &&
4470 (Subtarget->getScalarizeGlobalBehavior() &&
4472 Ld->isSimple() &&
4474 ->isMemOpHasNoClobberedMemOperand(N)));
4475}
4476
4480 bool IsModified = false;
4481 do {
4482 IsModified = false;
4483
4484
4486 while (Position != CurDAG->allnodes_end()) {
4489 if (!MachineNode)
4490 continue;
4491
4493 if (ResNode != Node) {
4494 if (ResNode)
4496 IsModified = true;
4497 }
4498 }
4499 CurDAG->RemoveDeadNodes();
4500 } while (IsModified);
4501}
4502
4507
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const TargetInstrInfo & TII
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
Definition AMDGPUISelDAGToDAG.cpp:905
static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
Definition AMDGPUISelDAGToDAG.cpp:3675
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
Definition AMDGPUISelDAGToDAG.cpp:2198
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
Definition AMDGPUISelDAGToDAG.cpp:1961
static MemSDNode * findMemSDNode(SDNode *N)
Definition AMDGPUISelDAGToDAG.cpp:1822
static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)
Definition AMDGPUISelDAGToDAG.cpp:3707
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
Definition AMDGPUISelDAGToDAG.cpp:1731
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
Definition AMDGPUISelDAGToDAG.cpp:2814
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
Definition AMDGPUISelDAGToDAG.cpp:4067
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Definition AMDGPUISelDAGToDAG.cpp:3775
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
LLVM IR instance of the generic uniformity analysis.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition AMDGPUISelDAGToDAG.cpp:240
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
Definition AMDGPUISelDAGToDAG.cpp:4503
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition AMDGPUISelDAGToDAG.cpp:229
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition AMDGPUISelDAGToDAG.cpp:952
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
void SelectBuildVector(SDNode *N, unsigned RegClassID)
Definition AMDGPUISelDAGToDAG.cpp:491
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition AMDGPUISelDAGToDAG.cpp:662
bool runOnMachineFunction(MachineFunction &MF) override
Definition AMDGPUISelDAGToDAG.cpp:159
void SelectVectorShuffle(SDNode *N)
Definition AMDGPUISelDAGToDAG.cpp:573
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Definition AMDGPUISelDAGToDAG.cpp:330
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
Definition AMDGPUISelDAGToDAG.cpp:4477
bool matchLoadD16FromBuildVector(SDNode *N) const
Definition AMDGPUISelDAGToDAG.cpp:250
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Definition AMDGPUISelDAGToDAG.cpp:961
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
Definition AMDGPUISelDAGToDAG.cpp:956
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
static SDValue stripBitcast(SDValue Val)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
Legacy analysis pass which computes a CycleInfo.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ Undef
Value of the register doesn't matter.
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
Definition AMDGPUISelDAGToDAG.cpp:150
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.