LLVM: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

29#include "llvm/IR/IntrinsicsAMDGPU.h"

32

33#ifdef EXPENSIVE_CHECKS

36#endif

37

38#define DEBUG_TYPE "amdgpu-isel"

39

40using namespace llvm;

41

42

43

44

45

46namespace {

49}

50

51

53 In = stripBitcast(In);

54

57 if (!Idx->isOne())

58 return false;

59 Out = In.getOperand(0);

60 return true;

61 }

62 }

63

65 return false;

66

67 SDValue Srl = In.getOperand(0);

70 if (ShiftAmt->getZExtValue() == 16) {

71 Out = stripBitcast(Srl.getOperand(0));

72 return true;

73 }

74 }

75 }

76

77 return false;

78}

79

84 return Lo;

85 }

86

89

90 if (Lo->isDivergent()) {

92 SL, Lo.getValueType()),

93 0);

98

100 Src.getValueType(), Ops),

101 0);

102 } else {

103

104

105

107 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, Src.getValueType(), Lo),

108 0);

109 }

110

111 return NewSrc;

112}

113

114

115

118 SDValue Idx = In.getOperand(1);

119 if (isNullConstant(Idx) && In.getValueSizeInBits() <= 32)

120 return In.getOperand(0);

121 }

122

124 SDValue Src = In.getOperand(0);

125 if (Src.getValueType().getSizeInBits() == 32)

126 return stripBitcast(Src);

127 }

128

129 return In;

130}

131

132}

133

135 "AMDGPU DAG->DAG Pattern Instruction Selection", false,

136 false)

140#ifdef EXPENSIVE_CHECKS

143#endif

145 "AMDGPU DAG->DAG Pattern Instruction Selection", false,

146 false)

147

148

149

154

158

165

166bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {

167

168 switch (Opc) {

177 case ISD::FABS:

178

179

180 case ISD::FSQRT:

181 case ISD::FSIN:

182 case ISD::FCOS:

183 case ISD::FPOWI:

184 case ISD::FPOW:

185 case ISD::FLOG:

186 case ISD::FLOG2:

187 case ISD::FLOG10:

188 case ISD::FEXP:

189 case ISD::FEXP2:

190 case ISD::FCEIL:

191 case ISD::FTRUNC:

192 case ISD::FRINT:

193 case ISD::FNEARBYINT:

194 case ISD::FROUNDEVEN:

195 case ISD::FROUND:

196 case ISD::FFLOOR:

197 case ISD::FMINNUM:

198 case ISD::FMAXNUM:

199 case ISD::FLDEXP:

200 case AMDGPUISD::FRACT:

201 case AMDGPUISD::CLAMP:

202 case AMDGPUISD::COS_HW:

203 case AMDGPUISD::SIN_HW:

204 case AMDGPUISD::FMIN3:

205 case AMDGPUISD::FMAX3:

206 case AMDGPUISD::FMED3:

207 case AMDGPUISD::FMAD_FTZ:

208 case AMDGPUISD::RCP:

209 case AMDGPUISD::RSQ:

210 case AMDGPUISD::RCP_IFLAG:

211

214

215

216

220 case AMDGPUISD::DIV_FIXUP:

222 default:

223

224

225 return false;

226 }

227}

228

230#ifdef EXPENSIVE_CHECKS

234 assert(L->isLCSSAForm(DT));

235 }

236#endif

238}

239

243#ifdef EXPENSIVE_CHECKS

246#endif

248}

249

251 assert(Subtarget->d16PreservesUnusedBits());

252 MVT VT = N->getValueType(0).getSimpleVT();

253 if (VT != MVT::v2i16 && VT != MVT::v2f16)

254 return false;

255

258

260

261

262

263

264

265

266

269

273 };

274

275 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;

278 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;

279 } else {

281 }

282

284 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,

287

288 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);

290 return true;

291 }

292

293

294

295

297 if (LdLo && Lo.hasOneUse()) {

300 return false;

301

303 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;

306 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;

307 } else {

309 }

310

311 TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);

312

315 };

316

318 CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,

321

322 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);

324 return true;

325 }

326

327 return false;

328}

329

331 if (!Subtarget->d16PreservesUnusedBits())

332 return;

333

335

336 bool MadeChange = false;

337 while (Position != CurDAG->allnodes_begin()) {

338 SDNode *N = &*--Position;

339 if (N->use_empty())

340 continue;

341

342 switch (N->getOpcode()) {

344

346 break;

347 default:

348 break;

349 }

350 }

351

352 if (MadeChange) {

353 CurDAG->RemoveDeadNodes();

356 }

357}

358

359bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {

360 if (N->isUndef())

361 return true;

362

365 return TII->isInlineConstant(C->getAPIntValue());

366

368 return TII->isInlineConstant(C->getValueAPF());

369

370 return false;

371}

372

373

374

375

376

378 unsigned OpNo) const {

379 if (N->isMachineOpcode()) {

382 if (Reg.isVirtual()) {

384 return MRI.getRegClass(Reg);

385 }

386

387 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();

388 return TRI->getPhysRegBaseClass(Reg);

389 }

390

391 return nullptr;

392 }

393

394 switch (N->getMachineOpcode()) {

395 default: {

396 const SIInstrInfo *TII = Subtarget->getInstrInfo();

397 const MCInstrDesc &Desc = TII->get(N->getMachineOpcode());

398 unsigned OpIdx = Desc.getNumDefs() + OpNo;

399 if (OpIdx >= Desc.getNumOperands())

400 return nullptr;

401

402 int16_t RegClass = TII->getOpRegClassID(Desc.operands()[OpIdx]);

403 if (RegClass == -1)

404 return nullptr;

405

406 return Subtarget->getRegisterInfo()->getRegClass(RegClass);

407 }

408 case AMDGPU::REG_SEQUENCE: {

409 unsigned RCID = N->getConstantOperandVal(0);

410 const TargetRegisterClass *SuperRC =

411 Subtarget->getRegisterInfo()->getRegClass(RCID);

412

413 SDValue SubRegOp = N->getOperand(OpNo + 1);

414 unsigned SubRegIdx = SubRegOp->getAsZExtVal();

415 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,

416 SubRegIdx);

417 }

418 }

419}

420

424 Ops.push_back(NewChain);

425 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)

426 Ops.push_back(N->getOperand(i));

427

428 Ops.push_back(Glue);

429 return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);

430}

431

433 const SITargetLowering& Lowering =

435

436 assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain");

437

439 return glueCopyToOp(N, M0, M0.getValue(1));

440}

441

442SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {

445 if (Subtarget->ldsRequiresM0Init())

446 return glueCopyToM0(

447 N, CurDAG->getSignedTargetConstant(-1, SDLoc(N), MVT::i32));

449 MachineFunction &MF = CurDAG->getMachineFunction();

450 unsigned Value = MF.getInfo()->getGDSSize();

451 return

452 glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32));

453 }

454 return N;

455}

456

458 EVT VT) const {

459 SDNode *Lo = CurDAG->getMachineNode(

460 AMDGPU::S_MOV_B32, DL, MVT::i32,

461 CurDAG->getTargetConstant(Lo_32(Imm), DL, MVT::i32));

462 SDNode *Hi = CurDAG->getMachineNode(

463 AMDGPU::S_MOV_B32, DL, MVT::i32,

464 CurDAG->getTargetConstant(Hi_32(Imm), DL, MVT::i32));

466 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),

467 SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),

468 SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};

469

470 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);

471}

472

473SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(const SDNode *N,

475

476

478 uint32_t LHSVal, RHSVal;

481 SDLoc SL(N);

482 uint32_t K = (LHSVal & 0xffff) | (RHSVal << 16);

484 isVGPRImm(N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,

486 }

487

488 return nullptr;

489}

490

492 EVT VT = N->getValueType(0);

496 SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);

497

498 if (NumVectorElts == 1) {

499 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),

500 RegClass);

501 return;

502 }

503

504 bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();

505 if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&

508 bool AllConst = true;

510 for (unsigned I = 0; I < NumVectorElts; ++I) {

512 if (Op.isUndef()) {

513 AllConst = false;

514 break;

515 }

518 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();

519 } else

521 C |= Val << (EltSize * I);

522 }

523 if (AllConst) {

526 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV);

527 CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),

528 RegClass);

529 return;

530 }

531 }

532

533 assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "

534 "supported yet");

535

536

537

539

540 RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);

541 bool IsRegSeq = true;

542 unsigned NOps = N->getNumOperands();

543 for (unsigned i = 0; i < NOps; i++) {

544

546 IsRegSeq = false;

547 break;

548 }

551 RegSeqArgs[1 + (2 * i)] = N->getOperand(i);

552 RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);

553 }

554 if (NOps != NumVectorElts) {

555

557 MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,

558 DL, EltVT);

559 for (unsigned i = NOps; i < NumVectorElts; ++i) {

562 RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);

563 RegSeqArgs[1 + (2 * i) + 1] =

564 CurDAG->getTargetConstant(Sub, DL, MVT::i32);

565 }

566 }

567

568 if (!IsRegSeq)

569 SelectCode(N);

570 CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);

571}

572

574 EVT VT = N->getValueType(0);

576

577

578 if (!Subtarget->hasPkMovB32() || !EltVT.bitsEq(MVT::i32) ||

580 SelectCode(N);

581 return;

582 }

583

585

590

592 Mask[0] < 4 && Mask[1] < 4);

593

594 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;

595 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;

596 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;

597 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;

598

599 if (Mask[0] < 0) {

600 Src0SubReg = Src1SubReg;

602 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);

603 VSrc0 = SDValue(ImpDef, 0);

604 }

605

606 if (Mask[1] < 0) {

607 Src1SubReg = Src0SubReg;

609 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);

610 VSrc1 = SDValue(ImpDef, 0);

611 }

612

613

614

615

616

617

618

619 if (N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&

620 Src1SubReg == AMDGPU::sub0) {

621

622

623

624

625

626 unsigned Src0OpSel =

628 unsigned Src1OpSel =

630

631

632

635

636 SDValue Src0OpSelVal = CurDAG->getTargetConstant(Src0OpSel, DL, MVT::i32);

637 SDValue Src1OpSelVal = CurDAG->getTargetConstant(Src1OpSel, DL, MVT::i32);

638 SDValue ZeroMods = CurDAG->getTargetConstant(0, DL, MVT::i32);

639

640 CurDAG->SelectNodeTo(N, AMDGPU::V_PK_MOV_B32, N->getVTList(),

641 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,

642 ZeroMods,

643 ZeroMods,

644 ZeroMods,

645 ZeroMods,

646 ZeroMods});

647 return;

648 }

649

651 CurDAG->getTargetExtractSubreg(Src0SubReg, DL, EltVT, VSrc0);

653 CurDAG->getTargetExtractSubreg(Src1SubReg, DL, EltVT, VSrc1);

654

656 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),

657 ResultElt0, CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),

658 ResultElt1, CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)};

659 CurDAG->SelectNodeTo(N, TargetOpcode::REG_SEQUENCE, VT, Ops);

660}

661

663 unsigned int Opc = N->getOpcode();

664 if (N->isMachineOpcode()) {

665 N->setNodeId(-1);

666 return;

667 }

668

669

670

672 N = glueCopyToM0LDSInit(N);

673 SelectCode(N);

674 return;

675 }

676

677 switch (Opc) {

678 default:

679 break;

680

681

682

687 if (N->getValueType(0) != MVT::i64)

688 break;

689

690 SelectADD_SUB_I64(N);

691 return;

692 }

695 if (N->getValueType(0) != MVT::i32)

696 break;

697

698 SelectAddcSubb(N);

699 return;

702 SelectUADDO_USUBO(N);

703 return;

704 }

705 case AMDGPUISD::FMUL_W_CHAIN: {

706 SelectFMUL_W_CHAIN(N);

707 return;

708 }

709 case AMDGPUISD::FMA_W_CHAIN: {

710 SelectFMA_W_CHAIN(N);

711 return;

712 }

713

716 EVT VT = N->getValueType(0);

720 if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) {

722 return;

723 }

724 }

725

726 break;

727 }

728

732 N->isDivergent()

733 ? TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)

735

737 return;

738 }

741 return;

743 SDValue RC, SubReg0, SubReg1;

745 if (N->getValueType(0) == MVT::i128) {

746 RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32);

747 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);

748 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);

749 } else if (N->getValueType(0) == MVT::i64) {

750 RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);

751 SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);

752 SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);

753 } else {

755 }

756 const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,

757 N->getOperand(1), SubReg1 };

759 N->getValueType(0), Ops));

760 return;

761 }

762

765 if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||

766 Subtarget->has64BitLiterals())

767 break;

768

771 Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();

773 break;

774 } else {

776 Imm = C->getZExtValue();

778 break;

779 }

780

782 ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0)));

783 return;

784 }

785 case AMDGPUISD::BFE_I32:

786 case AMDGPUISD::BFE_U32: {

787

788

789

790

791

792

793

794

797 break;

798

800 if (!Width)

801 break;

802

803 bool Signed = Opc == AMDGPUISD::BFE_I32;

804

807

809 WidthVal));

810 return;

811 }

812 case AMDGPUISD::DIV_SCALE: {

813 SelectDIV_SCALE(N);

814 return;

815 }

818 SelectMAD_64_32(N);

819 return;

820 }

823 return SelectMUL_LOHI(N);

828 break;

829 }

834 if (N->getValueType(0) != MVT::i32)

835 break;

836

837 SelectS_BFE(N);

838 return;

839 case ISD::BRCOND:

840 SelectBRCOND(N);

841 return;

842 case ISD::FP_EXTEND:

843 SelectFP_EXTEND(N);

844 return;

845 case AMDGPUISD::CVT_PKRTZ_F16_F32:

846 case AMDGPUISD::CVT_PKNORM_I16_F32:

847 case AMDGPUISD::CVT_PKNORM_U16_F32:

848 case AMDGPUISD::CVT_PK_U16_U32:

849 case AMDGPUISD::CVT_PK_I16_I32: {

850

851 if (N->getValueType(0) == MVT::i32) {

852 MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;

853 N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT),

854 { N->getOperand(0), N->getOperand(1) });

855 SelectCode(N);

856 return;

857 }

858

859 break;

860 }

862 SelectINTRINSIC_W_CHAIN(N);

863 return;

864 }

866 SelectINTRINSIC_WO_CHAIN(N);

867 return;

868 }

870 SelectINTRINSIC_VOID(N);

871 return;

872 }

874 SelectWAVE_ADDRESS(N);

875 return;

876 }

877 case ISD::STACKRESTORE: {

878 SelectSTACKRESTORE(N);

879 return;

880 }

881 }

882

883 SelectCode(N);

884}

885

886bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {

889 return Term->getMetadata("amdgpu.uniform") ||

890 Term->getMetadata("structurizecfg.uniform");

891}

892

893bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,

894 unsigned ShAmtBits) const {

896

897 const APInt &RHS = N->getConstantOperandAPInt(1);

898 if (RHS.countr_one() >= ShAmtBits)

899 return true;

900

902 return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;

903}

904

909

910

911

912

915 SDValue BaseLo = Lo.getOperand(0);

917

921

924

928 N1 = Lo.getOperand(1);

929 return true;

930 }

931 }

932 }

933 return false;

934}

935

936bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,

938 if (CurDAG->isBaseWithConstantOffset(Addr)) {

941 return true;

942 }

943

946 return true;

947 }

948

949 return false;

950}

951

953 return "AMDGPU DAG->DAG Pattern Instruction Selection";

954}

955

959

963#ifdef EXPENSIVE_CHECKS

965 .getManager();

969 for (auto &L : LI.getLoopsInPreorder())

970 assert(L->isLCSSAForm(DT) && "Loop is not in LCSSA form!");

971#endif

973}

974

975

976

977

978

979bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,

981 return false;

982}

983

984bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,

988

992 } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&

994 Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);

995 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);

999 Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);

1000 } else {

1001 Base = Addr;

1002 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1003 }

1004

1005 return true;

1006}

1007

1008SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,

1009 const SDLoc &DL) const {

1010 SDNode *Mov = CurDAG->getMachineNode(

1011 AMDGPU::S_MOV_B32, DL, MVT::i32,

1012 CurDAG->getTargetConstant(Val, DL, MVT::i32));

1014}

1015

1016

1017void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {

1018 SDLoc DL(N);

1021

1022 unsigned Opcode = N->getOpcode();

1024 bool ProduceCarry =

1027

1028 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);

1029 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);

1030

1031 SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

1032 DL, MVT::i32, LHS, Sub0);

1033 SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

1034 DL, MVT::i32, LHS, Sub1);

1035

1036 SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

1037 DL, MVT::i32, RHS, Sub0);

1038 SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

1039 DL, MVT::i32, RHS, Sub1);

1040

1041 SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);

1042

1043 static const unsigned OpcMap[2][2][2] = {

1044 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},

1045 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},

1046 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},

1047 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};

1048

1049 unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd];

1050 unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd];

1051

1052 SDNode *AddLo;

1053 if (!ConsumeCarry) {

1055 AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);

1056 } else {

1058 AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);

1059 }

1064 };

1065 SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);

1066

1067 SDValue RegSequenceArgs[] = {

1068 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),

1070 Sub0,

1072 Sub1,

1073 };

1075 MVT::i64, RegSequenceArgs);

1076

1077 if (ProduceCarry) {

1078

1080 }

1081

1082

1084}

1085

1086void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {

1089 SDValue CI = N->getOperand(2);

1090

1091 if (N->isDivergent()) {

1093 : AMDGPU::V_SUBB_U32_e64;

1094 CurDAG->SelectNodeTo(

1095 N, Opc, N->getVTList(),

1096 {LHS, RHS, CI,

1097 CurDAG->getTargetConstant(0, {}, MVT::i1) });

1098 } else {

1100 : AMDGPU::S_SUB_CO_PSEUDO;

1101 CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});

1102 }

1103}

1104

1105void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {

1106

1107

1108

1109 bool IsAdd = N->getOpcode() == ISD::UADDO;

1110 bool IsVALU = N->isDivergent();

1111

1112 for (SDNode::user_iterator UI = N->user_begin(), E = N->user_end(); UI != E;

1113 ++UI)

1114 if (UI.getUse().getResNo() == 1) {

1115 if (UI->isMachineOpcode()) {

1116 if (UI->getMachineOpcode() !=

1117 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {

1118 IsVALU = true;

1119 break;

1120 }

1121 } else {

1123 IsVALU = true;

1124 break;

1125 }

1126 }

1127 }

1128

1129 if (IsVALU) {

1130 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;

1131

1132 CurDAG->SelectNodeTo(

1133 N, Opc, N->getVTList(),

1134 {N->getOperand(0), N->getOperand(1),

1135 CurDAG->getTargetConstant(0, {}, MVT::i1) });

1136 } else {

1137 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;

1138

1139 CurDAG->SelectNodeTo(N, Opc, N->getVTList(),

1140 {N->getOperand(0), N->getOperand(1)});

1141 }

1142}

1143

1144void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {

1145

1147

1148 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);

1149 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);

1150 SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);

1151 Ops[8] = N->getOperand(0);

1152 Ops[9] = N->getOperand(4);

1153

1154

1155

1156 bool UseFMAC = Subtarget->hasDLInsts() &&

1160 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;

1161 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);

1162}

1163

1164void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {

1165

1167

1168 SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);

1169 SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);

1170 Ops[6] = N->getOperand(0);

1171 Ops[7] = N->getOperand(3);

1172

1173 CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);

1174}

1175

1176

1177

1178void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {

1179 EVT VT = N->getValueType(0);

1180

1181 assert(VT == MVT::f32 || VT == MVT::f64);

1182

1183 unsigned Opc

1184 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;

1185

1186

1187

1189 SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);

1190 SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]);

1191 SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]);

1193}

1194

1195

1196

1197void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {

1198 SDLoc SL(N);

1200 unsigned Opc;

1201 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() && N->hasAnyUseOfValue(1);

1202 if (Subtarget->hasMADIntraFwdBug())

1203 Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64

1204 : AMDGPU::V_MAD_U64_U32_gfx11_e64;

1205 else if (UseNoCarry)

1206 Opc = Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;

1207 else

1208 Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;

1209

1210 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);

1211 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),

1212 Clamp };

1213

1214 if (UseNoCarry) {

1215 MachineSDNode *Mad = CurDAG->getMachineNode(Opc, SL, MVT::i64, Ops);

1217 CurDAG->RemoveDeadNode(N);

1218 return;

1219 }

1220

1222}

1223

1224

1225

1226void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {

1227 SDLoc SL(N);

1229 SDVTList VTList;

1230 unsigned Opc;

1231 if (Subtarget->hasMadU64U32NoCarry()) {

1232 VTList = CurDAG->getVTList(MVT::i64);

1233 Opc = Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;

1234 } else {

1235 VTList = CurDAG->getVTList(MVT::i64, MVT::i1);

1236 if (Subtarget->hasMADIntraFwdBug()) {

1237 Opc = Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64

1238 : AMDGPU::V_MAD_U64_U32_gfx11_e64;

1239 } else {

1240 Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;

1241 }

1242 }

1243

1245 SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);

1246 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};

1247 SDNode *Mad = CurDAG->getMachineNode(Opc, SL, VTList, Ops);

1248 if (SDValue(N, 0).use_empty()) {

1249 SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);

1250 SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,

1251 MVT::i32, SDValue(Mad, 0), Sub0);

1253 }

1254 if (SDValue(N, 1).use_empty()) {

1255 SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);

1256 SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,

1257 MVT::i32, SDValue(Mad, 0), Sub1);

1259 }

1260 CurDAG->RemoveDeadNode(N);

1261}

1262

1263bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {

1265 return false;

1266

1267 if (Base || Subtarget->hasUsableDSOffset() ||

1268 Subtarget->unsafeDSOffsetFoldingEnabled())

1269 return true;

1270

1271

1272

1274}

1275

1276bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,

1278 SDLoc DL(Addr);

1279 if (CurDAG->isBaseWithConstantOffset(Addr)) {

1283 if (isDSOffsetLegal(N0, C1->getSExtValue())) {

1284

1287 return true;

1288 }

1290

1292 int64_t ByteOffset = C->getSExtValue();

1293 if (isDSOffsetLegal(SDValue(), ByteOffset)) {

1295

1296

1297

1298

1301

1302 if (isDSOffsetLegal(Sub, ByteOffset)) {

1306

1307

1308 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;

1309 if (Subtarget->hasAddNoCarry()) {

1310 SubOp = AMDGPU::V_SUB_U32_e64;

1312 CurDAG->getTargetConstant(0, {}, MVT::i1));

1313 }

1314

1315 MachineSDNode *MachineSub =

1316 CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds);

1317

1319 Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);

1320 return true;

1321 }

1322 }

1323 }

1325

1326

1327

1328

1329

1330 SDLoc DL(Addr);

1331

1332 if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) {

1334 MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,

1335 DL, MVT::i32, Zero);

1337 Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);

1338 return true;

1339 }

1340 }

1341

1342

1343 Base = Addr;

1344 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);

1345 return true;

1346}

1347

1348bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,

1349 unsigned Offset1,

1350 unsigned Size) const {

1351 if (Offset0 % Size != 0 || Offset1 % Size != 0)

1352 return false;

1354 return false;

1355

1356 if (Base || Subtarget->hasUsableDSOffset() ||

1357 Subtarget->unsafeDSOffsetFoldingEnabled())

1358 return true;

1359

1360

1361

1363}

1364

1365

1371

1372

1373

1374

1375bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {

1377 return true;

1378

1379

1380

1381 if (Subtarget->hasSignedScratchOffsets())

1382 return true;

1383

1386

1387

1388

1389

1390

1391 ConstantSDNode *ImmOp = nullptr;

1394 return true;

1395 }

1396

1397 return CurDAG->SignBitIsZero(LHS);

1398}

1399

1400

1401

1402bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {

1404 return true;

1405

1406

1407

1408 if (Subtarget->hasSignedScratchOffsets())

1409 return true;

1410

1414}

1415

1416

1417

1418bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {

1419

1420

1422 return true;

1423

1426

1427

1428

1429

1432 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))

1433 return true;

1434

1435 auto LHS = Base.getOperand(0);

1436 auto RHS = Base.getOperand(1);

1438}

1439

1440

1441bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,

1443 SDValue &Offset1) const {

1444 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4);

1445}

1446

1447bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base,

1449 SDValue &Offset1) const {

1450 return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8);

1451}

1452

1453bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,

1455 unsigned Size) const {

1456 SDLoc DL(Addr);

1457

1458 if (CurDAG->isBaseWithConstantOffset(Addr)) {

1463 unsigned OffsetValue1 = OffsetValue0 + Size;

1464

1465

1466 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) {

1468 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i32);

1469 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i32);

1470 return true;

1471 }

1473

1474 if (const ConstantSDNode *C =

1476 unsigned OffsetValue0 = C->getZExtValue();

1477 unsigned OffsetValue1 = OffsetValue0 + Size;

1478

1479 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {

1480 SDLoc DL(Addr);

1482

1483

1484

1485

1488

1489 if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) {

1493 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;

1494 if (Subtarget->hasAddNoCarry()) {

1495 SubOp = AMDGPU::V_SUB_U32_e64;

1497 CurDAG->getTargetConstant(0, {}, MVT::i1));

1498 }

1499

1500 MachineSDNode *MachineSub = CurDAG->getMachineNode(

1502

1504 Offset0 =

1505 CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i32);

1506 Offset1 =

1507 CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i32);

1508 return true;

1509 }

1510 }

1511 }

1513 unsigned OffsetValue0 = CAddr->getZExtValue();

1514 unsigned OffsetValue1 = OffsetValue0 + Size;

1515

1516 if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) {

1518 MachineSDNode *MovZero =

1519 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero);

1521 Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i32);

1522 Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i32);

1523 return true;

1524 }

1525 }

1526

1527

1528

1529 Base = Addr;

1530 Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i32);

1531 Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i32);

1532 return true;

1533}

1534

1538 SDValue &Addr64) const {

1539

1540

1541 if (Subtarget->useFlatForGlobal())

1542 return false;

1543

1544 SDLoc DL(Addr);

1545

1546 Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);

1547 Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);

1548 Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);

1549 SOffset = Subtarget->hasRestrictedSOffset()

1550 ? CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)

1551 : CurDAG->getTargetConstant(0, DL, MVT::i32);

1552

1553 ConstantSDNode *C1 = nullptr;

1555 if (CurDAG->isBaseWithConstantOffset(Addr)) {

1559 else

1560 C1 = nullptr;

1561 }

1562

1564

1565

1568 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);

1569

1572

1573

1574 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);

1575 VAddr = N0;

1576 } else {

1577

1578 Ptr = N3;

1579 VAddr = N2;

1580 }

1581 } else {

1582

1583 Ptr = N2;

1584 VAddr = N3;

1585 }

1586 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1588

1589

1590 Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0);

1591 VAddr = N0;

1592 Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);

1593 } else {

1594

1595

1596 VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);

1597 Ptr = N0;

1598 }

1599

1600 if (!C1) {

1601

1602 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1603 return true;

1604 }

1605

1606 const SIInstrInfo *TII = Subtarget->getInstrInfo();

1608

1610 return true;

1611 }

1612

1613

1614 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1615 SOffset =

1617 AMDGPU::S_MOV_B32, DL, MVT::i32,

1619 0);

1620 return true;

1621}

1622

1623bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,

1626 SDValue Ptr, Offen, Idxen, Addr64;

1627

1628

1629

1630 if (!Subtarget->hasAddr64())

1631 return false;

1632

1633 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))

1634 return false;

1635

1637 if (C->getSExtValue()) {

1638 SDLoc DL(Addr);

1639

1640 const SITargetLowering& Lowering =

1642

1644 return true;

1645 }

1646

1647 return false;

1648}

1649

1650std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {

1651 SDLoc DL(N);

1652

1655 FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;

1656

1657

1658

1659

1660

1661 return std::pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));

1662}

1663

1664bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,

1667 SDValue &ImmOffset) const {

1668

1669 SDLoc DL(Addr);

1670 MachineFunction &MF = CurDAG->getMachineFunction();

1671 const SIMachineFunctionInfo *Info = MF.getInfo();

1672

1673 Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);

1674

1676 int64_t Imm = CAddr->getSExtValue();

1677 const int64_t NullPtr =

1679

1680 if (Imm != NullPtr) {

1683 CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);

1684 MachineSDNode *MovHighBits = CurDAG->getMachineNode(

1685 AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);

1686 VAddr = SDValue(MovHighBits, 0);

1687

1688 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1689 ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);

1690 return true;

1691 }

1692 }

1693

1694 if (CurDAG->isBaseWithConstantOffset(Addr)) {

1695

1696

1699

1700

1701

1702

1703

1704

1705

1706

1707

1708

1709

1710

1711

1712

1713

1714

1715 const SIInstrInfo *TII = Subtarget->getInstrInfo();

1716 if (TII->isLegalMUBUFImmOffset(C1) &&

1717 (!Subtarget->privateMemoryResourceIsRangeChecked() ||

1718 CurDAG->SignBitIsZero(N0))) {

1719 std::tie(VAddr, SOffset) = foldFrameIndex(N0);

1720 ImmOffset = CurDAG->getTargetConstant(C1, DL, MVT::i32);

1721 return true;

1722 }

1723 }

1724

1725

1726 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);

1727 ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1728 return true;

1729}

1730

1733 return false;

1735 if (Reg.isPhysical())

1736 return false;

1737 const auto *RC = TRI.getPhysRegBaseClass(Reg);

1738 return RC && TRI.isSGPRClass(RC);

1739}

1740

1741bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,

1746 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();

1747 const SIInstrInfo *TII = Subtarget->getInstrInfo();

1748 MachineFunction &MF = CurDAG->getMachineFunction();

1749 const SIMachineFunctionInfo *Info = MF.getInfo();

1750 SDLoc DL(Addr);

1751

1752

1754 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);

1755 SOffset = Addr;

1756 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1757 return true;

1758 }

1759

1760 ConstantSDNode *CAddr;

1762

1764 if (!CAddr || TII->isLegalMUBUFImmOffset(CAddr->getZExtValue()))

1765 return false;

1767 return false;

1768

1772

1773 SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);

1774 } else {

1775 return false;

1776 }

1777

1778 SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);

1779

1781 return true;

1782}

1783

1784bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,

1786 ) const {

1787 SDValue Ptr, VAddr, Offen, Idxen, Addr64;

1788 const SIInstrInfo *TII = Subtarget->getInstrInfo();

1789

1790 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))

1791 return false;

1792

1796 uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |

1798 SDLoc DL(Addr);

1799

1800 const SITargetLowering& Lowering =

1802

1804 return true;

1805 }

1806 return false;

1807}

1808

1809bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,

1810 SDValue &SOffset) const {

1811 if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {

1812 SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);

1813 return true;

1814 }

1815

1816 SOffset = ByteOffsetNode;

1817 return true;

1818}

1819

1820

1821

1825 return MN;

1827 for (SDValue V : N->op_values())

1830 return MN;

1832}

1833

1834bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,

1836 uint64_t FlatVariant) const {

1837 int64_t OffsetVal = 0;

1838

1840

1841 bool CanHaveFlatSegmentOffsetBug =

1842 Subtarget->hasFlatSegmentOffsetBug() &&

1845

1846 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {

1848 if (isBaseWithConstantOffset64(Addr, N0, N1) &&

1850 isFlatScratchBaseLegal(Addr))) {

1852

1853

1854

1855

1856 bool IsInBounds =

1858 if (COffsetVal == 0 || FlatVariant != SIInstrFlags::FLAT || IsInBounds) {

1859 const SIInstrInfo *TII = Subtarget->getInstrInfo();

1860 if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {

1861 Addr = N0;

1862 OffsetVal = COffsetVal;

1863 } else {

1864

1865

1866

1867

1868

1869

1870

1871

1872

1873

1874 SDLoc DL(N);

1875 uint64_t RemainderOffset;

1876

1877 std::tie(OffsetVal, RemainderOffset) =

1878 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);

1879

1881 getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);

1882 SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

1883

1888 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;

1889 if (Subtarget->hasAddNoCarry()) {

1890 AddOp = AMDGPU::V_ADD_U32_e64;

1892 }

1893 Addr =

1894 SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0);

1895 } else {

1896

1897

1899 CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);

1901 CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);

1902

1903 SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

1904 DL, MVT::i32, N0, Sub0);

1905 SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,

1906 DL, MVT::i32, N0, Sub1);

1907

1909 getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);

1910

1911 SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);

1912

1913 SDNode *Add =

1914 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,

1915 {AddOffsetLo, SDValue(N0Lo, 0), Clamp});

1916

1917 SDNode *Addc = CurDAG->getMachineNode(

1918 AMDGPU::V_ADDC_U32_e64, DL, VTs,

1920

1921 SDValue RegSequenceArgs[] = {

1922 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL,

1923 MVT::i32),

1925

1926 Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,

1927 MVT::i64, RegSequenceArgs),

1928 0);

1929 }

1930 }

1931 }

1932 }

1933 }

1934

1935 VAddr = Addr;

1936 Offset = CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);

1937 return true;

1938}

1939

1940bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr,

1944}

1945

1946bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr,

1950}

1951

1952bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr,

1955 return SelectFlatOffsetImpl(N, Addr, VAddr, Offset,

1957}

1958

1959

1960

1963 if (Op.getValueType() == MVT::i32)

1964 return Op;

1965

1971

1972 SDValue ExtSrc = Op.getOperand(0);

1974}

1975

1976

1977

1978bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,

1981 bool NeedIOffset) const {

1982 int64_t ImmOffset = 0;

1983 ScaleOffset = false;

1984

1985

1986

1987

1989 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {

1991 const SIInstrInfo *TII = Subtarget->getInstrInfo();

1992

1993 if (NeedIOffset &&

1996 Addr = LHS;

1997 ImmOffset = COffsetVal;

1998 } else if (LHS->isDivergent()) {

1999 if (COffsetVal > 0) {

2000 SDLoc SL(N);

2001

2002

2003

2004 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;

2005 if (NeedIOffset) {

2006 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(

2008 }

2009

2010 if (Subtarget->hasSignedGVSOffset() ? isInt<32>(RemainderOffset)

2012 SDNode *VMov = CurDAG->getMachineNode(

2013 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,

2014 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));

2015 VOffset = SDValue(VMov, 0);

2016 SAddr = LHS;

2017 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);

2018 return true;

2019 }

2020 }

2021

2022

2023

2024

2025

2026

2027 unsigned NumLiterals =

2028 TII->isInlineConstant(APInt(32, Lo_32(COffsetVal))) +

2029 TII->isInlineConstant(APInt(32, Hi_32(COffsetVal)));

2030 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)

2031 return false;

2032 }

2033 }

2034

2035

2038

2039 if (LHS->isDivergent()) {

2040

2042 ScaleOffset = SelectScaleOffset(N, RHS, Subtarget->hasSignedGVSOffset());

2044 RHS, Subtarget->hasSignedGVSOffset(), CurDAG)) {

2045 SAddr = LHS;

2046 VOffset = ExtRHS;

2047 }

2048 }

2049

2051 if (!SAddr && RHS->isDivergent()) {

2052

2053 ScaleOffset = SelectScaleOffset(N, LHS, Subtarget->hasSignedGVSOffset());

2055 LHS, Subtarget->hasSignedGVSOffset(), CurDAG)) {

2056 SAddr = RHS;

2057 VOffset = ExtLHS;

2058 }

2059 }

2060

2061 if (SAddr) {

2062 Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);

2063 return true;

2064 }

2065 }

2066

2067 if (Subtarget->hasScaleOffset() &&

2068 (Addr.getOpcode() == (Subtarget->hasSignedGVSOffset()

2076

2077 unsigned Size =

2078 (unsigned)cast(N)->getMemoryVT().getFixedSizeInBits() / 8;

2080 if (ScaleOffset) {

2083 Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);

2084 return true;

2085 }

2086 }

2087

2090 return false;

2091

2092

2093

2094 SAddr = Addr;

2095 SDNode *VMov =

2096 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,

2097 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));

2098 VOffset = SDValue(VMov, 0);

2099 Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);

2100 return true;

2101}

2102

2103bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, SDValue Addr,

2107 bool ScaleOffset;

2108 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))

2109 return false;

2110

2112 SDLoc(), MVT::i32);

2113 return true;

2114}

2115

2116bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(SDNode *N, SDValue Addr,

2120 bool ScaleOffset;

2121 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))

2122 return false;

2123

2124

2125 auto PassedCPol =

2126 N->getConstantOperandVal(N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;

2128 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);

2129 return true;

2130}

2131

2132bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(SDNode *N, SDValue Addr,

2137 bool ScaleOffset;

2138 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))

2139 return false;

2140

2141

2142 auto PassedCPol =

2143 N->getConstantOperandVal(N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;

2145 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);

2146 return true;

2147}

2148

2149bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(SDNode *N, SDValue Addr,

2153 bool ScaleOffset;

2154 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, Offset, ScaleOffset))

2155 return false;

2156

2158 CPol = CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);

2159 return true;

2160}

2161

2162bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(SDNode *N, SDValue Addr,

2166 bool ScaleOffset;

2168 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,

2169 false))

2170 return false;

2171

2172

2173 auto PassedCPol =

2174 N->getConstantOperandVal(N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;

2176 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);

2177 return true;

2178}

2179

2180bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(SDNode *N, SDValue Addr,

2184 bool ScaleOffset;

2186 if (!SelectGlobalSAddr(N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,

2187 false))

2188 return false;

2189

2190

2191 auto PassedCPol =

2194 (ScaleOffset ? AMDGPU::CPol::SCAL : 0) | PassedCPol, SDLoc(), MVT::i32);

2195 return true;

2196}

2197

2200 SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));

2203

2204

2207 FI->getValueType(0));

2209 MVT::i32, TFI, SAddr.getOperand(1)),

2210 0);

2211 }

2212

2213 return SAddr;

2214}

2215

2216

2217bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,

2221 return false;

2222

2223 SDLoc DL(Addr);

2224

2225 int64_t COffsetVal = 0;

2226

2227 if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {

2230 } else {

2231 SAddr = Addr;

2232 }

2233

2235

2236 const SIInstrInfo *TII = Subtarget->getInstrInfo();

2237

2240 int64_t SplitImmOffset, RemainderOffset;

2241 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(

2243

2244 COffsetVal = SplitImmOffset;

2245

2248 ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL)

2249 : CurDAG->getSignedTargetConstant(RemainderOffset, DL, MVT::i32);

2250 SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32,

2251 SAddr, AddOffset),

2252 0);

2253 }

2254

2255 Offset = CurDAG->getSignedTargetConstant(COffsetVal, DL, MVT::i32);

2256

2257 return true;

2258}

2259

2260

2261bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(

2262 SDValue VAddr, SDValue SAddr, uint64_t ImmOffset) const {

2263 if (!Subtarget->hasFlatScratchSVSSwizzleBug())

2264 return false;

2265

2266

2267

2268

2269 KnownBits VKnown = CurDAG->computeKnownBits(VAddr);

2270 KnownBits SKnown =

2273 true)));

2276 return (VMax & 3) + (SMax & 3) >= 4;

2277}

2278

2279bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,

2283 int64_t ImmOffset = 0;

2284

2286 SDValue OrigAddr = Addr;

2287 if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {

2289 const SIInstrInfo *TII = Subtarget->getInstrInfo();

2290

2293 Addr = LHS;

2294 ImmOffset = COffsetVal;

2295 } else if (LHS->isDivergent() && COffsetVal > 0) {

2296 SDLoc SL(N);

2297

2298

2299 int64_t SplitImmOffset, RemainderOffset;

2300 std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset(

2302

2304 SDNode *VMov = CurDAG->getMachineNode(

2305 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,

2306 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));

2307 VAddr = SDValue(VMov, 0);

2308 SAddr = LHS;

2309 if (!isFlatScratchBaseLegal(Addr))

2310 return false;

2311 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))

2312 return false;

2313 Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);

2314 CPol = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);

2315 return true;

2316 }

2317 }

2318 }

2319

2321 return false;

2322

2325

2326 if (LHS->isDivergent() && RHS->isDivergent()) {

2327 SAddr = LHS;

2328 VAddr = RHS;

2329 } else if (RHS->isDivergent() && LHS->isDivergent()) {

2330 SAddr = RHS;

2331 VAddr = LHS;

2332 } else {

2333 return false;

2334 }

2335

2336 if (OrigAddr != Addr) {

2337 if (!isFlatScratchBaseLegalSVImm(OrigAddr))

2338 return false;

2339 } else {

2340 if (!isFlatScratchBaseLegalSV(OrigAddr))

2341 return false;

2342 }

2343

2344 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))

2345 return false;

2347 Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);

2348

2349 bool ScaleOffset = SelectScaleOffset(N, VAddr, true );

2351 SDLoc(), MVT::i32);

2352 return true;

2353}

2354

2355

2356

2357

2358bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(SDValue *SOffset,

2359 bool Imm32Only,

2360 bool IsBuffer,

2361 int64_t ImmOffset) const {

2362 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&

2364 KnownBits SKnown = CurDAG->computeKnownBits(*SOffset);

2366 return false;

2367 }

2368

2369 return true;

2370}

2371

2372

2373

2374

2376 bool IsSigned) const {

2377 bool ScaleOffset = false;

2378 if (!Subtarget->hasScaleOffset() || Offset)

2379 return false;

2380

2381 unsigned Size =

2382 (unsigned)cast(N)->getMemoryVT().getFixedSizeInBits() / 8;

2383

2386 Off = Ext;

2387

2390 ScaleOffset = C->getZExtValue() == Log2_32(Size);

2392 (IsSigned && Offset.getOpcode() == AMDGPUISD::MUL_I24) ||

2393 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||

2394 (Offset.isMachineOpcode() &&

2395 Offset.getMachineOpcode() ==

2396 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO

2397 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {

2399 ScaleOffset = C->getZExtValue() == Size;

2400 }

2401

2402 if (ScaleOffset)

2404

2405 return ScaleOffset;

2406}

2407

2408

2409

2410

2411bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDNode *N, SDValue ByteOffsetNode,

2413 bool Imm32Only, bool IsBuffer,

2414 bool HasSOffset, int64_t ImmOffset,

2415 bool *ScaleOffset) const {

2417 "Cannot match both soffset and offset at the same time!");

2418

2419 if (ScaleOffset) {

2421

2422 *ScaleOffset = SelectScaleOffset(N, ByteOffsetNode, false );

2423 }

2424

2426 if (C) {

2427 if (!SOffset)

2428 return false;

2429

2432 *SOffset = ByteOffsetNode;

2433 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,

2434 ImmOffset);

2435 }

2438 *SOffset = ByteOffsetNode.getOperand(0);

2439 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,

2440 ImmOffset);

2441 }

2442 }

2443 return false;

2444 }

2445

2446 SDLoc SL(ByteOffsetNode);

2447

2448

2449

2450 int64_t ByteOffset = IsBuffer ? C->getZExtValue() : C->getSExtValue();

2452 *Subtarget, ByteOffset, IsBuffer, HasSOffset);

2453 if (EncodedOffset && Offset && !Imm32Only) {

2454 *Offset = CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);

2455 return true;

2456 }

2457

2458

2459 if (ByteOffset < 0)

2460 return false;

2461

2463 if (EncodedOffset && Offset && Imm32Only) {

2464 *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);

2465 return true;

2466 }

2467

2469 return false;

2470

2471 if (SOffset) {

2472 SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);

2474 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);

2475 return true;

2476 }

2477

2478 return false;

2479}

2480

2481SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {

2483 return Addr;

2484

2485

2486 SDLoc SL(Addr);

2487

2488 const MachineFunction &MF = CurDAG->getMachineFunction();

2489 const SIMachineFunctionInfo *Info = MF.getInfo();

2490 unsigned AddrHiVal = Info->get32BitAddressHighBits();

2491 SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);

2492

2494 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),

2495 Addr,

2496 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),

2497 SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),

2498 0),

2499 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),

2500 };

2501

2502 return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,

2503 Ops), 0);

2504}

2505

2506

2507

2508

2509bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDNode *N, SDValue Addr,

2512 bool IsBuffer, bool HasSOffset,

2513 int64_t ImmOffset,

2514 bool *ScaleOffset) const {

2515 if (SOffset && Offset) {

2516 assert(!Imm32Only && !IsBuffer);

2518

2519 if (!SelectSMRDBaseOffset(N, Addr, B, nullptr, Offset, false, false, true))

2520 return false;

2521

2522 int64_t ImmOff = 0;

2524 ImmOff = C->getSExtValue();

2525

2526 return SelectSMRDBaseOffset(N, B, SBase, SOffset, nullptr, false, false,

2527 true, ImmOff, ScaleOffset);

2528 }

2529

2530

2531

2534 return false;

2535

2537

2543 }

2544 if (!N0 || !N1)

2545 return false;

2546

2547 if (SelectSMRDOffset(N, N1, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,

2548 ImmOffset, ScaleOffset)) {

2550 return true;

2551 }

2552 if (SelectSMRDOffset(N, N0, SOffset, Offset, Imm32Only, IsBuffer, HasSOffset,

2553 ImmOffset, ScaleOffset)) {

2555 return true;

2556 }

2557 return false;

2558}

2559

2562 bool Imm32Only, bool *ScaleOffset) const {

2563 if (SelectSMRDBaseOffset(N, Addr, SBase, SOffset, Offset, Imm32Only,

2564 false, false,

2565 0, ScaleOffset)) {

2566 SBase = Expand32BitAddress(SBase);

2567 return true;

2568 }

2569

2571 SBase = Expand32BitAddress(Addr);

2572 *Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);

2573 return true;

2574 }

2575

2576 return false;

2577}

2578

2579bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,

2581 return SelectSMRD( nullptr, Addr, SBase, nullptr,

2583}

2584

2585bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,

2588 return SelectSMRD( nullptr, Addr, SBase, nullptr,

2589 &Offset, true);

2590}

2591

2594 bool ScaleOffset;

2595 if (!SelectSMRD(N, Addr, SBase, &SOffset, nullptr,

2596 false, &ScaleOffset))

2597 return false;

2598

2600 SDLoc(N), MVT::i32);

2601 return true;

2602}

2603

2604bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDNode *N, SDValue Addr,

2608 bool ScaleOffset;

2609 if (!SelectSMRD(N, Addr, SBase, &SOffset, &Offset, false, &ScaleOffset))

2610 return false;

2611

2613 SDLoc(N), MVT::i32);

2614 return true;

2615}

2616

2618 return SelectSMRDOffset( nullptr, N, nullptr, &Offset,

2619 false, true);

2620}

2621

2622bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue N,

2625 return SelectSMRDOffset( nullptr, N, nullptr, &Offset,

2626 true, true);

2627}

2628

2629bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(SDValue N, SDValue &SOffset,

2631

2632

2633 return N.getValueType() == MVT::i32 &&

2634 SelectSMRDBaseOffset( nullptr, N, SOffset,

2635 nullptr, &Offset,

2636 false, true);

2637}

2638

2639bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,

2642 SDLoc DL(Index);

2643

2644 if (CurDAG->isBaseWithConstantOffset(Index)) {

2648

2649

2650

2651

2652

2657 return true;

2658 }

2659 }

2660

2662 return false;

2663

2665 Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);

2666 return true;

2667}

2668

2669SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,

2671 uint32_t Width) {

2673 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;

2676

2677 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);

2678 }

2679 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;

2680

2681

2682

2683 uint32_t PackedVal = Offset | (Width << 16);

2684 SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);

2685

2686 return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);

2687}

2688

2689void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {

2690

2691

2692

2693

2694 const SDValue &Shl = N->getOperand(0);

2697

2698 if (B && C) {

2699 uint32_t BVal = B->getZExtValue();

2700 uint32_t CVal = C->getZExtValue();

2701

2702 if (0 < BVal && BVal <= CVal && CVal < 32) {

2705 32 - CVal));

2706 return;

2707 }

2708 }

2709 SelectCode(N);

2710}

2711

2712void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {

2713 switch (N->getOpcode()) {

2715 if (N->getOperand(0).getOpcode() == ISD::SRL) {

2716

2717

2718 const SDValue &Srl = N->getOperand(0);

2721

2722 if (Shift && Mask) {

2724 uint32_t MaskVal = Mask->getZExtValue();

2725

2729 WidthVal));

2730 return;

2731 }

2732 }

2733 }

2734 break;

2736 if (N->getOperand(0).getOpcode() == ISD::AND) {

2737

2738

2742

2743 if (Shift && Mask) {

2745 uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;

2746

2749 ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,

2750 WidthVal));

2751 return;

2752 }

2753 }

2754 } else if (N->getOperand(0).getOpcode() == ISD::SHL) {

2755 SelectS_BFEFromShifts(N);

2756 return;

2757 }

2758 break;

2760 if (N->getOperand(0).getOpcode() == ISD::SHL) {

2761 SelectS_BFEFromShifts(N);

2762 return;

2763 }

2764 break;

2765

2767

2768 SDValue Src = N->getOperand(0);

2769 if (Src.getOpcode() != ISD::SRL)

2770 break;

2771

2773 if (!Amt)

2774 break;

2775

2776 unsigned Width = cast(N->getOperand(1))->getVT().getSizeInBits();

2777 ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),

2779 return;

2780 }

2781 }

2782

2783 SelectCode(N);

2784}

2785

2786bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {

2787 assert(N->getOpcode() == ISD::BRCOND);

2788 if (N->hasOneUse())

2789 return false;

2790

2794

2796 return false;

2797

2798 MVT VT = Cond.getOperand(0).getSimpleValueType();

2799 if (VT == MVT::i32)

2800 return true;

2801

2802 if (VT == MVT::i64) {

2805 Subtarget->hasScalarCompareEq64();

2806 }

2807

2808 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())

2809 return true;

2810

2811 return false;

2812}

2813

2816

2817

2818

2819

2820

2821

2822

2823

2824

2828

2832

2835 return Cond;

2836 }

2837 }

2839}

2840

2841void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {

2843

2844 if (Cond.isUndef()) {

2845 CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,

2846 N->getOperand(2), N->getOperand(0));

2847 return;

2848 }

2849

2850 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();

2851

2852 bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);

2853 bool AndExec = !UseSCCBr;

2854 bool Negate = false;

2855

2857 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {

2862

2864

2865

2866

2867

2868

2869

2870

2872 bool NegatedBallot = false;

2874 Cond = BallotCond;

2875 UseSCCBr = !BallotCond->isDivergent();

2876 Negate = Negate ^ NegatedBallot;

2877 } else {

2878

2879

2880 Cond = VCMP;

2881 UseSCCBr = false;

2882 }

2883 }

2884

2885

2886

2887 AndExec = false;

2888 }

2889

2890 unsigned BrOp =

2891 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)

2892 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);

2893 Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC();

2894 SDLoc SL(N);

2895

2896 if (AndExec) {

2897

2898

2899

2900

2901

2902

2903

2904

2905

2906

2907

2908

2909

2911 CurDAG->getMachineNode(

2912 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,

2913 MVT::i1,

2914 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO

2915 : AMDGPU::EXEC,

2916 MVT::i1),

2918 0);

2919 }

2920

2921 SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);

2922 CurDAG->SelectNodeTo(N, BrOp, MVT::Other,

2923 N->getOperand(2),

2925}

2926

2927void AMDGPUDAGToDAGISel::SelectFP_EXTEND(SDNode *N) {

2928 if (Subtarget->hasSALUFloatInsts() && N->getValueType(0) == MVT::f32 &&

2929 N->isDivergent()) {

2930 SDValue Src = N->getOperand(0);

2931 if (Src.getValueType() == MVT::f16) {

2933 CurDAG->SelectNodeTo(N, AMDGPU::S_CVT_HI_F32_F16, N->getVTList(),

2934 {Src});

2935 return;

2936 }

2937 }

2938 }

2939

2940 SelectCode(N);

2941}

2942

2943void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {

2944

2945

2946 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?

2947 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;

2948

2949 SDValue Chain = N->getOperand(0);

2952 MachineMemOperand *MMO = M->getMemOperand();

2954

2956 if (CurDAG->isBaseWithConstantOffset(Ptr)) {

2959

2960 const APInt &OffsetVal = PtrOffset->getAsAPIntVal();

2961 if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {

2962 N = glueCopyToM0(N, PtrBase);

2963 Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);

2964 }

2965 }

2966

2968 N = glueCopyToM0(N, Ptr);

2969 Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);

2970 }

2971

2974 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),

2975 Chain,

2976 N->getOperand(N->getNumOperands() - 1)

2977 };

2978

2979 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

2981}

2982

2983

2984

2985void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N, unsigned IntrID) {

2986 unsigned Opc;

2987 switch (IntrID) {

2988 case Intrinsic::amdgcn_ds_bvh_stack_rtn:

2989 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:

2990 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;

2991 break;

2992 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:

2993 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;

2994 break;

2995 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:

2996 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;

2997 break;

2998 }

2999 SDValue Ops[] = {N->getOperand(2), N->getOperand(3), N->getOperand(4),

3000 N->getOperand(5), N->getOperand(0)};

3001

3003 MachineMemOperand *MMO = M->getMemOperand();

3004 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

3006}

3007

3009 switch (IntrID) {

3010 case Intrinsic::amdgcn_ds_gws_init:

3011 return AMDGPU::DS_GWS_INIT;

3012 case Intrinsic::amdgcn_ds_gws_barrier:

3013 return AMDGPU::DS_GWS_BARRIER;

3014 case Intrinsic::amdgcn_ds_gws_sema_v:

3015 return AMDGPU::DS_GWS_SEMA_V;

3016 case Intrinsic::amdgcn_ds_gws_sema_br:

3017 return AMDGPU::DS_GWS_SEMA_BR;

3018 case Intrinsic::amdgcn_ds_gws_sema_p:

3019 return AMDGPU::DS_GWS_SEMA_P;

3020 case Intrinsic::amdgcn_ds_gws_sema_release_all:

3021 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;

3022 default:

3024 }

3025}

3026

3027void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {

3028 if (!Subtarget->hasGWS() ||

3029 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&

3030 !Subtarget->hasGWSSemaReleaseAll())) {

3031

3032 SelectCode(N);

3033 return;

3034 }

3035

3036

3037 const bool HasVSrc = N->getNumOperands() == 4;

3038 assert(HasVSrc || N->getNumOperands() == 3);

3039

3040 SDLoc SL(N);

3041 SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2);

3042 int ImmOffset = 0;

3044 MachineMemOperand *MMO = M->getMemOperand();

3045

3046

3047

3048

3049

3050

3051

3053

3054

3055

3056

3057 glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32));

3058 ImmOffset = ConstOffset->getZExtValue();

3059 } else {

3060 if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {

3062 BaseOffset = BaseOffset.getOperand(0);

3063 }

3064

3065

3066

3067

3068 SDNode *SGPROffset

3069 = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,

3070 BaseOffset);

3071

3072 SDNode *M0Base

3073 = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,

3075 CurDAG->getTargetConstant(16, SL, MVT::i32));

3076 glueCopyToM0(N, SDValue(M0Base, 0));

3077 }

3078

3080 SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);

3081

3083

3084 const MCInstrDesc &InstrDesc = TII->get(Opc);

3085 int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0);

3086

3087 const TargetRegisterClass *DataRC = TII->getRegClass(InstrDesc, Data0Idx);

3088

3090 if (HasVSrc) {

3091 const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();

3092

3094 MVT DataVT = Data.getValueType().getSimpleVT();

3095 if (TRI->isTypeLegalForClass(*DataRC, DataVT)) {

3096

3097 Ops.push_back(N->getOperand(2));

3098 } else {

3099

3100

3101 const SDValue RegSeqOps[] = {

3102 CurDAG->getTargetConstant(DataRC->getID(), SL, MVT::i32), Data,

3103 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),

3105 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),

3106 0),

3107 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};

3108

3109 Ops.push_back(SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,

3110 SL, MVT::v2i32, RegSeqOps),

3111 0));

3112 }

3113 }

3114

3115 Ops.push_back(OffsetField);

3116 Ops.push_back(Chain);

3117

3118 SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);

3120}

3121

3122void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) {

3123 if (Subtarget->getLDSBankCount() != 16) {

3124

3125 SelectCode(N);

3126 return;

3127 }

3128

3129 SDLoc DL(N);

3130

3131

3132

3133

3134

3135

3136

3137

3138

3139

3140

3141

3142

3143

3144

3145

3146

3147

3148

3149

3151 N->getOperand(5), SDValue());

3152

3153 SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other);

3154

3155 SDNode *InterpMov =

3156 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, {

3157 CurDAG->getTargetConstant(2, DL, MVT::i32),

3158 N->getOperand(3),

3159 N->getOperand(2),

3161 });

3162

3163 SDNode *InterpP1LV =

3164 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, {

3165 CurDAG->getTargetConstant(0, DL, MVT::i32),

3166 N->getOperand(1),

3167 N->getOperand(3),

3168 N->getOperand(2),

3169 CurDAG->getTargetConstant(0, DL, MVT::i32),

3170 SDValue(InterpMov, 0),

3171 N->getOperand(4),

3172 CurDAG->getTargetConstant(0, DL, MVT::i1),

3173 CurDAG->getTargetConstant(0, DL, MVT::i32),

3175 });

3176

3178}

3179

3180void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {

3181 unsigned IntrID = N->getConstantOperandVal(1);

3182 switch (IntrID) {

3183 case Intrinsic::amdgcn_ds_append:

3184 case Intrinsic::amdgcn_ds_consume: {

3185 if (N->getValueType(0) != MVT::i32)

3186 break;

3187 SelectDSAppendConsume(N, IntrID);

3188 return;

3189 }

3190 case Intrinsic::amdgcn_ds_bvh_stack_rtn:

3191 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:

3192 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:

3193 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:

3194 SelectDSBvhStackIntrinsic(N, IntrID);

3195 return;

3196 case Intrinsic::amdgcn_init_whole_wave:

3197 CurDAG->getMachineFunction()

3198 .getInfo()

3199 ->setInitWholeWave();

3200 break;

3201 }

3202

3203 SelectCode(N);

3204}

3205

3206void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {

3207 unsigned IntrID = N->getConstantOperandVal(0);

3208 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;

3209 SDNode *ConvGlueNode = N->getGluedNode();

3210 if (ConvGlueNode) {

3211

3212 assert(ConvGlueNode->getOpcode() == ISD::CONVERGENCECTRL_GLUE);

3214 ConvGlueNode =

3215 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},

3216 MVT::Glue, SDValue(ConvGlueNode, 0));

3217 } else {

3218 ConvGlueNode = nullptr;

3219 }

3220 switch (IntrID) {

3221 case Intrinsic::amdgcn_wqm:

3222 Opcode = AMDGPU::WQM;

3223 break;

3224 case Intrinsic::amdgcn_softwqm:

3225 Opcode = AMDGPU::SOFT_WQM;

3226 break;

3227 case Intrinsic::amdgcn_wwm:

3228 case Intrinsic::amdgcn_strict_wwm:

3229 Opcode = AMDGPU::STRICT_WWM;

3230 break;

3231 case Intrinsic::amdgcn_strict_wqm:

3232 Opcode = AMDGPU::STRICT_WQM;

3233 break;

3234 case Intrinsic::amdgcn_interp_p1_f16:

3235 SelectInterpP1F16(N);

3236 return;

3237 case Intrinsic::amdgcn_permlane16_swap:

3238 case Intrinsic::amdgcn_permlane32_swap: {

3239 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&

3240 !Subtarget->hasPermlane16Swap()) ||

3241 (IntrID == Intrinsic::amdgcn_permlane32_swap &&

3242 !Subtarget->hasPermlane32Swap())) {

3243 SelectCode(N);

3244 return;

3245 }

3246

3247 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap

3248 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64

3249 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;

3250

3252 if (ConvGlueNode)

3253 NewOps.push_back(SDValue(ConvGlueNode, 0));

3254

3255 bool FI = N->getConstantOperandVal(3);

3256 NewOps[2] = CurDAG->getTargetConstant(

3258

3259 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), NewOps);

3260 return;

3261 }

3262 default:

3263 SelectCode(N);

3264 break;

3265 }

3266

3267 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {

3268 SDValue Src = N->getOperand(1);

3269 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});

3270 }

3271

3272 if (ConvGlueNode) {

3274 NewOps.push_back(SDValue(ConvGlueNode, 0));

3275 CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);

3276 }

3277}

3278

3279void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {

3280 unsigned IntrID = N->getConstantOperandVal(1);

3281 switch (IntrID) {

3282 case Intrinsic::amdgcn_ds_gws_init:

3283 case Intrinsic::amdgcn_ds_gws_barrier:

3284 case Intrinsic::amdgcn_ds_gws_sema_v:

3285 case Intrinsic::amdgcn_ds_gws_sema_br:

3286 case Intrinsic::amdgcn_ds_gws_sema_p:

3287 case Intrinsic::amdgcn_ds_gws_sema_release_all:

3288 SelectDS_GWS(N, IntrID);

3289 return;

3290 default:

3291 break;

3292 }

3293

3294 SelectCode(N);

3295}

3296

3297void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(SDNode *N) {

3299 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(N), MVT::i32);

3300 CurDAG->SelectNodeTo(N, AMDGPU::S_LSHR_B32, N->getVTList(),

3301 {N->getOperand(0), Log2WaveSize});

3302}

3303

3304void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(SDNode *N) {

3305 SDValue SrcVal = N->getOperand(1);

3307 SelectCode(N);

3308 return;

3309 }

3310

3312 Register SP = TLI->getStackPointerRegisterToSaveRestore();

3313 SDLoc SL(N);

3314

3317 } else {

3318 SDValue Log2WaveSize = CurDAG->getTargetConstant(

3319 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);

3320

3321 if (N->isDivergent()) {

3322 SrcVal = SDValue(CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,

3323 MVT::i32, SrcVal),

3324 0);

3325 }

3326

3327 CopyVal = SDValue(CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,

3328 {SrcVal, Log2WaveSize}),

3329 0);

3330 }

3331

3332 SDValue CopyToSP = CurDAG->getCopyToReg(N->getOperand(0), SL, SP, CopyVal);

3333 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyToSP);

3334}

3335

3336bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,

3337 unsigned &Mods,

3338 bool IsCanonicalizing,

3339 bool AllowAbs) const {

3341 Src = In;

3342

3343 if (Src.getOpcode() == ISD::FNEG) {

3345 Src = Src.getOperand(0);

3346 } else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {

3347

3348

3350 if (LHS && LHS->isZero()) {

3352 Src = Src.getOperand(1);

3353 }

3354 }

3355

3356 if (AllowAbs && Src.getOpcode() == ISD::FABS) {

3358 Src = Src.getOperand(0);

3359 }

3360

3362 return true;

3363

3364

3365

3366

3367

3368

3369

3370

3371 if (IsCanonicalizing)

3372 return true;

3373

3374

3375

3376

3379

3380

3381

3382

3383

3384

3386 EVT VT = Src.getValueType();

3388 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))

3389 return true;

3390

3392 if (!CRHS)

3393 return true;

3394

3395 auto ReplaceSrc = [&]() -> SDValue {

3397 return Src.getOperand(0);

3398

3402 Src.getValueType(), LHS, Index);

3403 };

3404

3405

3406

3407

3408

3409

3412 Src = ReplaceSrc();

3413 } else if (Opc == ISD::AND && AllowAbs &&

3416 Src = ReplaceSrc();

3419 Src = ReplaceSrc();

3420 }

3421

3422 return true;

3423}

3424

3425bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,

3426 SDValue &SrcMods) const {

3427 unsigned Mods;

3428 if (SelectVOP3ModsImpl(In, Src, Mods, true,

3429 true)) {

3430 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3431 return true;

3432 }

3433

3434 return false;

3435}

3436

3437bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(

3439 unsigned Mods;

3440 if (SelectVOP3ModsImpl(In, Src, Mods, false,

3441 true)) {

3442 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3443 return true;

3444 }

3445

3446 return false;

3447}

3448

3449bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,

3450 SDValue &SrcMods) const {

3451 unsigned Mods;

3452 if (SelectVOP3ModsImpl(In, Src, Mods,

3453 true,

3454 false)) {

3455 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3456 return true;

3457 }

3458

3459 return false;

3460}

3461

3462bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {

3463 if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)

3464 return false;

3465

3466 Src = In;

3467 return true;

3468}

3469

3470bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,

3472 bool OpSel) const {

3473 unsigned Mods;

3474 if (SelectVOP3ModsImpl(In, Src, Mods,

3475 true,

3476 false)) {

3477 if (OpSel)

3479 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3480 return true;

3481 }

3482

3483 return false;

3484}

3485

3486bool AMDGPUDAGToDAGISel::SelectVINTERPMods(SDValue In, SDValue &Src,

3487 SDValue &SrcMods) const {

3488 return SelectVINTERPModsImpl(In, Src, SrcMods, false);

3489}

3490

3491bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(SDValue In, SDValue &Src,

3492 SDValue &SrcMods) const {

3493 return SelectVINTERPModsImpl(In, Src, SrcMods, true);

3494}

3495

3496bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,

3499 SDLoc DL(In);

3500 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

3501 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);

3502

3503 return SelectVOP3Mods(In, Src, SrcMods);

3504}

3505

3506bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src,

3509 SDLoc DL(In);

3510 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

3511 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);

3512

3513 return SelectVOP3BMods(In, Src, SrcMods);

3514}

3515

3516bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,

3518 Src = In;

3519

3520 SDLoc DL(In);

3521 Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);

3522 Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);

3523

3524 return true;

3525}

3526

3527bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,

3528 SDValue &SrcMods, bool IsDOT) const {

3530 Src = In;

3531

3532

3533 if (Src.getOpcode() == ISD::FNEG) {

3535 Src = Src.getOperand(0);

3536 }

3537

3538 if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 &&

3539 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {

3540 unsigned VecMods = Mods;

3541

3542 SDValue Lo = stripBitcast(Src.getOperand(0));

3543 SDValue Hi = stripBitcast(Src.getOperand(1));

3544

3545 if (Lo.getOpcode() == ISD::FNEG) {

3546 Lo = stripBitcast(Lo.getOperand(0));

3548 }

3549

3550 if (Hi.getOpcode() == ISD::FNEG) {

3551 Hi = stripBitcast(Hi.getOperand(0));

3553 }

3554

3557

3560

3561 unsigned VecSize = Src.getValueSizeInBits();

3562 Lo = stripExtractLoElt(Lo);

3563 Hi = stripExtractLoElt(Hi);

3564

3565 if (Lo.getValueSizeInBits() > VecSize) {

3566 Lo = CurDAG->getTargetExtractSubreg(

3567 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),

3569 }

3570

3571 if (Hi.getValueSizeInBits() > VecSize) {

3572 Hi = CurDAG->getTargetExtractSubreg(

3573 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),

3575 }

3576

3577 assert(Lo.getValueSizeInBits() <= VecSize &&

3578 Hi.getValueSizeInBits() <= VecSize);

3579

3580 if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {

3581

3582

3583

3584 if (VecSize == Lo.getValueSizeInBits()) {

3585 Src = Lo;

3586 } else if (VecSize == 32) {

3587 Src = createVOP3PSrc32FromLo16(Lo, Src, CurDAG, Subtarget);

3588 } else {

3589 assert(Lo.getValueSizeInBits() == 32 && VecSize == 64);

3590

3591 SDLoc SL(In);

3593 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,

3594 Lo.getValueType()), 0);

3595 auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID

3596 : AMDGPU::SReg_64RegClassID;

3598 CurDAG->getTargetConstant(RC, SL, MVT::i32),

3599 Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),

3600 Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };

3601

3602 Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,

3603 Src.getValueType(), Ops), 0);

3604 }

3605 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3606 return true;

3607 }

3608

3611 .bitcastToAPInt().getZExtValue();

3613 Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);

3614 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3615 return true;

3616 }

3617 }

3618

3619 Mods = VecMods;

3621 Src.getNumOperands() == 2) {

3622

3623

3624

3625

3627 ArrayRef Mask = SVN->getMask();

3628

3629 if (Mask[0] < 2 && Mask[1] < 2) {

3630

3631 SDValue ShuffleSrc = SVN->getOperand(0);

3632

3633 if (ShuffleSrc.getOpcode() == ISD::FNEG) {

3634 ShuffleSrc = ShuffleSrc.getOperand(0);

3636 }

3637

3638 if (Mask[0] == 1)

3640 if (Mask[1] == 1)

3642

3643 Src = ShuffleSrc;

3644 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3645 return true;

3646 }

3647 }

3648

3649

3651

3652 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3653 return true;

3654}

3655

3656bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,

3657 SDValue &SrcMods) const {

3658 return SelectVOP3PMods(In, Src, SrcMods, true);

3659}

3660

3661bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,

3664 assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");

3665

3667 unsigned SrcVal = C->getZExtValue();

3668 if (SrcVal == 1)

3670

3671 Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3672 return true;

3673}

3674

3678 unsigned DstRegClass;

3679 EVT DstTy;

3680 switch (Elts.size()) {

3681 case 8:

3682 DstRegClass = AMDGPU::VReg_256RegClassID;

3683 DstTy = MVT::v8i32;

3684 break;

3685 case 4:

3686 DstRegClass = AMDGPU::VReg_128RegClassID;

3687 DstTy = MVT::v4i32;

3688 break;

3689 case 2:

3690 DstRegClass = AMDGPU::VReg_64RegClassID;

3691 DstTy = MVT::v2i32;

3692 break;

3693 default:

3695 }

3696

3699 for (unsigned i = 0; i < Elts.size(); ++i) {

3700 Ops.push_back(Elts[i]);

3703 }

3704 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, DstTy, Ops);

3705}

3706

3711 assert("unhandled Reg sequence size" &&

3712 (Elts.size() == 8 || Elts.size() == 16));

3713

3714

3715

3716 for (unsigned i = 0; i < Elts.size(); i += 2) {

3717 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));

3719 if (isExtractHiElt(Elts[i + 1], HiSrc) && LoSrc == HiSrc) {

3721 } else {

3724 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64, DL, MVT::i32,

3725 {Elts[i + 1], Elts[i], PackLoLo});

3727 }

3728 }

3729

3731}

3732

3735 const SDLoc &DL, unsigned ElementSize) {

3736 if (ElementSize == 16)

3738 if (ElementSize == 32)

3741}

3742

3746 unsigned ElementSize) {

3747 if (ModOpcode == ISD::FNEG) {

3749

3751 for (auto El : Elts) {

3752 if (El.getOpcode() != ISD::FABS)

3753 break;

3754 NegAbsElts.push_back(El->getOperand(0));

3755 }

3756 if (Elts.size() != NegAbsElts.size()) {

3757

3759 } else {

3760

3763 }

3764 } else {

3765 assert(ModOpcode == ISD::FABS);

3766

3769 }

3770}

3771

3772

3773

3774static void

3776 std::function<bool(SDValue)> ModifierCheck) {

3777 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

3778 if (auto *F16Pair =

3780 for (unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {

3781 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));

3782 if (!ModifierCheck(ElF16))

3783 break;

3784 }

3785 }

3786 }

3787}

3788

3789bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(SDValue In, SDValue &Src,

3790 SDValue &SrcMods) const {

3791 Src = In;

3793

3794

3797

3799 if (Element.getOpcode() != ISD::FNEG)

3800 return false;

3802 return true;

3803 });

3804

3805

3810 }

3811 }

3812

3813

3816 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

3818

3819 if (ElV2f16.getOpcode() != ISD::FNEG)

3820 break;

3822 }

3823

3824

3829 }

3830 }

3831

3832 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3833 return true;

3834}

3835

3836bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(SDValue In, SDValue &Src,

3837 SDValue &SrcMods) const {

3838 Src = In;

3840 unsigned ModOpcode;

3841

3842

3846

3847 if (EltsF16.empty())

3848 ModOpcode = (ElF16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;

3849 if (ElF16.getOpcode() != ModOpcode)

3850 return false;

3852 return true;

3853 });

3854

3855

3858 16);

3859 }

3860

3861

3864

3865 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

3867

3868 if (EltsV2F16.empty())

3869 ModOpcode = (ElV2f16.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;

3870 if (ElV2f16->getOpcode() != ModOpcode)

3871 break;

3873 }

3874

3875

3878 32);

3879 }

3880

3881 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3882 return true;

3883}

3884

3885bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,

3886 SDValue &SrcMods) const {

3887 Src = In;

3890

3893

3895 unsigned ModOpcode =

3896 (ElF32.getOpcode() == ISD::FNEG) ? ISD::FNEG : ISD::FABS;

3897 for (unsigned i = 0; i < BV->getNumOperands(); ++i) {

3899 if (ElF32.getOpcode() != ModOpcode)

3900 break;

3902 }

3903

3904

3907 32);

3908 }

3909

3910 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

3911 return true;

3912}

3913

3914bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(SDValue In, SDValue &Src) const {

3916 BitVector UndefElements;

3918 if (isInlineImmediate(Splat.getNode())) {

3920 unsigned Imm = C->getAPIntValue().getSExtValue();

3921 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);

3922 return true;

3923 }

3925 unsigned Imm = C->getValueAPF().bitcastToAPInt().getSExtValue();

3926 Src = CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);

3927 return true;

3928 }

3930 }

3931 }

3932

3933

3934 SDValue SplatSrc32 = stripBitcast(In);

3936 if (SDValue Splat32 = SplatSrc32BV->getSplatValue()) {

3937 SDValue SplatSrc16 = stripBitcast(Splat32);

3939 if (SDValue Splat = SplatSrc16BV->getSplatValue()) {

3940 const SIInstrInfo *TII = Subtarget->getInstrInfo();

3941 std::optional RawValue;

3943 RawValue = C->getValueAPF().bitcastToAPInt();

3945 RawValue = C->getAPIntValue();

3946

3947 if (RawValue.has_value()) {

3948 EVT VT = In.getValueType().getScalarType();

3953 RawValue.value());

3954 if (TII->isInlineConstant(FloatVal)) {

3955 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),

3956 MVT::i16);

3957 return true;

3958 }

3959 } else if (VT.getSimpleVT() == MVT::i16) {

3960 if (TII->isInlineConstant(RawValue.value())) {

3961 Src = CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),

3962 MVT::i16);

3963 return true;

3964 }

3965 } else

3967 }

3968 }

3969 }

3970

3971 return false;

3972}

3973

3974bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(SDValue In, SDValue &Src,

3975 SDValue &IndexKey) const {

3976 unsigned Key = 0;

3977 Src = In;

3978

3980 const llvm::SDValue &ShiftSrc = In.getOperand(0);

3985 Src = ShiftSrc;

3986 }

3987 }

3988

3989 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);

3990 return true;

3991}

3992

3993bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,

3994 SDValue &IndexKey) const {

3995 unsigned Key = 0;

3996 Src = In;

3997

3999 const llvm::SDValue &ShiftSrc = In.getOperand(0);

4003 Key = 1;

4004 Src = ShiftSrc;

4005 }

4006 }

4007

4008 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);

4009 return true;

4010}

4011

4012bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(SDValue In, SDValue &Src,

4013 SDValue &IndexKey) const {

4014 unsigned Key = 0;

4015 Src = In;

4016

4018

4020 const SDValue &ExtendSrc = In.getOperand(0);

4022 InI32 = ExtendSrc;

4023 } else if (In->getOpcode() == ISD::BITCAST) {

4024 const SDValue &CastSrc = In.getOperand(0);

4028 if (Zero && Zero->getZExtValue() == 0)

4030 }

4031 }

4032

4038 Key = 1;

4039 Src = ExtractVecEltSrc;

4040 }

4041 }

4042

4043 IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);

4044 return true;

4045}

4046

4047bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,

4048 SDValue &SrcMods) const {

4049 Src = In;

4050

4051 SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);

4052 return true;

4053}

4054

4055bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,

4056 SDValue &SrcMods) const {

4057

4058 return SelectVOP3Mods(In, Src, SrcMods);

4059}

4060

4061

4062

4063

4064

4065

4066

4068 if (Op.getValueType() != MVT::f32 || Op.getOpcode() != ISD::BITCAST)

4070 Op = Op.getOperand(0);

4071

4072 IsExtractHigh = false;

4075 if (!Low16 || !Low16->isZero())

4077 Op = stripBitcast(Op.getOperand(1));

4078 if (Op.getValueType() != MVT::bf16)

4080 return Op;

4081 }

4082

4083 if (Op.getValueType() != MVT::i32)

4085

4088 if (Mask->getZExtValue() == 0xffff0000) {

4089 IsExtractHigh = true;

4090 return Op.getOperand(0);

4091 }

4092 }

4094 }

4095

4099 return Op.getOperand(0);

4100 }

4101 }

4102

4104}

4105

4106

4107

4108bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,

4109 unsigned &Mods,

4110 MVT VT) const {

4111 Mods = 0;

4112 SelectVOP3ModsImpl(In, Src, Mods);

4113

4114 bool IsExtractHigh = false;

4115 if (Src.getOpcode() == ISD::FP_EXTEND) {

4116 Src = Src.getOperand(0);

4117 } else if (VT == MVT::bf16) {

4119 if (!B16)

4120 return false;

4121 Src = B16;

4122 } else

4123 return false;

4124

4125 if (Src.getValueType() != VT &&

4126 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))

4127 return false;

4128

4129 Src = stripBitcast(Src);

4130

4131

4132

4134 unsigned ModsTmp;

4135 SelectVOP3ModsImpl(Src, Src, ModsTmp);

4136

4139

4142 }

4143

4144

4145

4146

4147

4148

4150 if (Src.getValueSizeInBits() == 16) {

4153

4154

4155 return true;

4156 }

4157

4159 Src.getOperand(0).getValueType() == MVT::i32) {

4160 Src = Src.getOperand(0);

4161 return true;

4162 }

4163

4164 if (Subtarget->useRealTrue16Insts())

4165

4166 Src = createVOP3PSrc32FromLo16(Src, In, CurDAG, Subtarget);

4167 } else if (IsExtractHigh)

4169

4170 return true;

4171}

4172

4173bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,

4174 SDValue &SrcMods) const {

4175 unsigned Mods = 0;

4176 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))

4177 return false;

4178 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

4179 return true;

4180}

4181

4182bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,

4183 SDValue &SrcMods) const {

4184 unsigned Mods = 0;

4185 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);

4186 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

4187 return true;

4188}

4189

4190bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(SDValue In, SDValue &Src,

4191 SDValue &SrcMods) const {

4192 unsigned Mods = 0;

4193 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))

4194 return false;

4195 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

4196 return true;

4197}

4198

4199bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(SDValue In, SDValue &Src,

4200 SDValue &SrcMods) const {

4201 unsigned Mods = 0;

4202 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);

4203 SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);

4204 return true;

4205}

4206

4207

4208

4211 unsigned NumOpcodes = 0;

4212 uint8_t LHSBits, RHSBits;

4213

4214 auto getOperandBits = [&Src, In](SDValue Op, uint8_t &Bits) -> bool {

4215

4216

4217

4218

4219

4220

4221

4222

4223

4224 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };

4225

4227 if (C->isAllOnes()) {

4228 Bits = 0xff;

4229 return true;

4230 }

4231 if (C->isZero()) {

4232 Bits = 0;

4233 return true;

4234 }

4235 }

4236

4237 for (unsigned I = 0; I < Src.size(); ++I) {

4238

4239 if (Src[I] == Op) {

4240 Bits = SrcBits[I];

4241 return true;

4242 }

4243

4244 if (Src[I] == In) {

4245 Bits = SrcBits[I];

4246 Src[I] = Op;

4247 return true;

4248 }

4249 }

4250

4251 if (Src.size() == 3) {

4252

4253

4254

4257 if (C->isAllOnes()) {

4259 for (unsigned I = 0; I < Src.size(); ++I) {

4260 if (Src[I] == LHS) {

4261 Bits = ~SrcBits[I];

4262 return true;

4263 }

4264 }

4265 }

4266 }

4267 }

4268

4269 return false;

4270 }

4271

4272 Bits = SrcBits[Src.size()];

4273 Src.push_back(Op);

4274 return true;

4275 };

4276

4277 switch (In.getOpcode()) {

4283

4285 if (!getOperandBits(LHS, LHSBits) ||

4286 !getOperandBits(RHS, RHSBits)) {

4287 Src = Backup;

4288 return std::make_pair(0, 0);

4289 }

4290

4291

4293 if (Op.first) {

4294 NumOpcodes += Op.first;

4295 LHSBits = Op.second;

4296 }

4297

4299 if (Op.first) {

4300 NumOpcodes += Op.first;

4301 RHSBits = Op.second;

4302 }

4303 break;

4304 }

4305 default:

4306 return std::make_pair(0, 0);

4307 }

4308

4310 switch (In.getOpcode()) {

4312 TTbl = LHSBits & RHSBits;

4313 break;

4315 TTbl = LHSBits | RHSBits;

4316 break;

4318 TTbl = LHSBits ^ RHSBits;

4319 break;

4320 default:

4321 break;

4322 }

4323

4324 return std::make_pair(NumOpcodes + 1, TTbl);

4325}

4326

4330 uint8_t TTbl;

4331 unsigned NumOpcodes;

4332

4333 std::tie(NumOpcodes, TTbl) = BitOp3_Op(In, Src);

4334

4335

4336

4337 if (NumOpcodes < 2 || Src.empty())

4338 return false;

4339

4340

4341

4342

4343 if (NumOpcodes < 4 && !In->isDivergent())

4344 return false;

4345

4346 if (NumOpcodes == 2 && In.getValueType() == MVT::i32) {

4347

4348

4349

4351 (In.getOperand(0).getOpcode() == In.getOpcode() ||

4352 In.getOperand(1).getOpcode() == In.getOpcode()))

4353 return false;

4354

4355 if (In.getOpcode() == ISD::OR &&

4356 (In.getOperand(0).getOpcode() == ISD::AND ||

4357 In.getOperand(1).getOpcode() == ISD::AND))

4358 return false;

4359 }

4360

4361

4362

4363

4364

4365

4366 while (Src.size() < 3)

4367 Src.push_back(Src[0]);

4368

4369 Src0 = Src[0];

4370 Src1 = Src[1];

4371 Src2 = Src[2];

4372

4373 Tbl = CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);

4374 return true;

4375}

4376

4377SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {

4378 if (In.isUndef())

4379 return CurDAG->getUNDEF(MVT::i32);

4380

4382 SDLoc SL(In);

4383 return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);

4384 }

4385

4387 SDLoc SL(In);

4388 return CurDAG->getConstant(

4389 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);

4390 }

4391

4394 return Src;

4395

4397}

4398

4399bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {

4400 assert(CurDAG->getTarget().getTargetTriple().isAMDGCN());

4401

4402 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();

4403 const SIInstrInfo *SII = Subtarget->getInstrInfo();

4404

4405 unsigned Limit = 0;

4406 bool AllUsesAcceptSReg = true;

4407 for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();

4408 Limit < 10 && U != E; ++U, ++Limit) {

4409 const TargetRegisterClass *RC =

4410 getOperandRegClass(U->getUser(), U->getOperandNo());

4411

4412

4413

4414

4416 return false;

4417

4418 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&

4419 RC != &AMDGPU::VS_64_Align2RegClass) {

4420 AllUsesAcceptSReg = false;

4421 SDNode *User = U->getUser();

4422 if (User->isMachineOpcode()) {

4423 unsigned Opc = User->getMachineOpcode();

4424 const MCInstrDesc &Desc = SII->get(Opc);

4425 if (Desc.isCommutable()) {

4426 unsigned OpIdx = Desc.getNumDefs() + U->getOperandNo();

4429 unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs();

4430 const TargetRegisterClass *CommutedRC =

4431 getOperandRegClass(U->getUser(), CommutedOpNo);

4432 if (CommutedRC == &AMDGPU::VS_32RegClass ||

4433 CommutedRC == &AMDGPU::VS_64RegClass ||

4434 CommutedRC == &AMDGPU::VS_64_Align2RegClass)

4435 AllUsesAcceptSReg = true;

4436 }

4437 }

4438 }

4439

4440

4441

4442

4443 if (!AllUsesAcceptSReg)

4444 break;

4445 }

4446 }

4447 return !AllUsesAcceptSReg && (Limit < 10);

4448}

4449

4450bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode *N) const {

4452 const MachineMemOperand *MMO = Ld->getMemOperand();

4453

4454

4455

4456

4457

4458

4459

4461 return false;

4462

4464 Ld->getAlign() >=

4466 uint64_t(4))) &&

4470 (Subtarget->getScalarizeGlobalBehavior() &&

4472 Ld->isSimple() &&

4474 ->isMemOpHasNoClobberedMemOperand(N)));

4475}

4476

4480 bool IsModified = false;

4481 do {

4482 IsModified = false;

4483

4484

4486 while (Position != CurDAG->allnodes_end()) {

4489 if (!MachineNode)

4490 continue;

4491

4493 if (ResNode != Node) {

4494 if (ResNode)

4496 IsModified = true;

4497 }

4498 }

4499 CurDAG->RemoveDeadNodes();

4500 } while (IsModified);

4501}

4502

4507

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)

Definition AMDGPUISelDAGToDAG.cpp:905

static MachineSDNode * buildRegSequence32(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)

Definition AMDGPUISelDAGToDAG.cpp:3675

static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)

Definition AMDGPUISelDAGToDAG.cpp:2198

static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)

Definition AMDGPUISelDAGToDAG.cpp:1961

static MemSDNode * findMemSDNode(SDNode *N)

Definition AMDGPUISelDAGToDAG.cpp:1822

static MachineSDNode * buildRegSequence16(SmallVectorImpl< SDValue > &Elts, llvm::SelectionDAG *CurDAG, const SDLoc &DL)

Definition AMDGPUISelDAGToDAG.cpp:3707

static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)

Definition AMDGPUISelDAGToDAG.cpp:1731

static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)

Definition AMDGPUISelDAGToDAG.cpp:2814

static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)

Definition AMDGPUISelDAGToDAG.cpp:4067

static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)

Definition AMDGPUISelDAGToDAG.cpp:3775

Defines an instruction selector for the AMDGPU target.

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

static bool isNoUnsignedWrap(MachineInstr *Addr)

static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)

static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)

static unsigned gwsIntrinToOpcode(unsigned IntrID)

static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)

static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)

Provides AMDGPU specific target descriptions.

Base class for AMDGPU specific classes of TargetSubtarget.

The AMDGPU TargetMachine interface definition for hw codegen targets.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

Register const TargetRegisterInfo * TRI

Promote Memory to Register

MachineInstr unsigned OpIdx

FunctionAnalysisManager FAM

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Provides R600 specific target descriptions.

Interface definition for R600RegisterInfo.

const SmallVectorImpl< MachineOperand > & Cond

SI DAG Lowering interface definition.

LLVM IR instance of the generic uniformity analysis.

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...

Definition AMDGPUISelDAGToDAG.cpp:240

AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)

Definition AMDGPUISelDAGToDAG.cpp:4503

bool runOnMachineFunction(MachineFunction &MF) override

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

Definition AMDGPUISelDAGToDAG.cpp:229

StringRef getPassName() const override

getPassName - Return a nice clean name for a pass.

Definition AMDGPUISelDAGToDAG.cpp:952

AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.

void SelectBuildVector(SDNode *N, unsigned RegClassID)

Definition AMDGPUISelDAGToDAG.cpp:491

void Select(SDNode *N) override

Main hook for targets to transform nodes into machine nodes.

Definition AMDGPUISelDAGToDAG.cpp:662

bool runOnMachineFunction(MachineFunction &MF) override

Definition AMDGPUISelDAGToDAG.cpp:159

void SelectVectorShuffle(SDNode *N)

Definition AMDGPUISelDAGToDAG.cpp:573

void PreprocessISelDAG() override

PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...

Definition AMDGPUISelDAGToDAG.cpp:330

AMDGPUDAGToDAGISel()=delete

void PostprocessISelDAG() override

PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.

Definition AMDGPUISelDAGToDAG.cpp:4477

bool matchLoadD16FromBuildVector(SDNode *N) const

Definition AMDGPUISelDAGToDAG.cpp:250

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

Definition AMDGPUISelDAGToDAG.cpp:961

AMDGPUISelDAGToDAGPass(TargetMachine &TM)

Definition AMDGPUISelDAGToDAG.cpp:956

bool useRealTrue16Insts() const

Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...

static SDValue stripBitcast(SDValue Val)

static int64_t getNullPointerValue(unsigned AddrSpace)

Get the integer value of a null pointer in the given address space.

static const fltSemantics & BFloat()

static const fltSemantics & IEEEhalf()

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

bool isSignMask() const

Check if the APInt's value is returned by getSignMask.

bool isMaxSignedValue() const

Determine if this is the largest signed value.

int64_t getSExtValue() const

Get sign extended value.

unsigned countr_one() const

Count the number of trailing one bits.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

Represent the analysis usage information of a pass.

AnalysisUsage & addRequired()

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

LLVM Basic Block Representation.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

A "pseudo-class" with methods for operating on BUILD_VECTORs.

LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const

Returns the demanded splatted value or a null value if this is not a splat.

uint64_t getZExtValue() const

const APInt & getAPIntValue() const

int64_t getSExtValue() const

Analysis pass which computes a DominatorTree.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

FunctionPass class - This class is used to implement most global optimizations.

const SIInstrInfo * getInstrInfo() const override

Generation getGeneration() const

void checkSubtargetFeatures(const Function &F) const

Diagnose inconsistent subtarget features before attempting to codegen function F.

This class is used to represent ISD::LOAD nodes.

const SDValue & getBasePtr() const

ISD::LoadExtType getExtensionType() const

Return whether this is a plain node, or one of the varieties of value-extending loads.

TypeSize getValue() const

Analysis pass that exposes the LoopInfo for a function.

SmallVector< LoopT *, 4 > getLoopsInPreorder() const

Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...

The legacy pass manager's analysis pass to compute loop information.

static MVT getIntegerVT(unsigned BitWidth)

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

LocationSize getSize() const

Return the size in bytes of the memory reference.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

An SDNode that represents everything that will be needed to construct a MachineInstr.

This is an abstract virtual class for memory operations.

unsigned getAddressSpace() const

Return the address space for the associated pointer.

MachineMemOperand * getMemOperand() const

Return a MachineMemOperand object describing the memory reference performed by operation.

const SDValue & getChain() const

EVT getMemoryVT() const

Return the type of the in-memory value.

AnalysisType & getAnalysis() const

getAnalysis() - This function is used by subclasses to get to the analysis information ...

A set of analyses that are preserved following a run of a transformation pass.

Wrapper class representing virtual and physical registers.

Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...

Represents one node in the SelectionDAG.

const APInt & getAsAPIntVal() const

Helper method returns the APInt value of a ConstantSDNode.

unsigned getOpcode() const

Return the SelectionDAG opcode value for this node.

SDNodeFlags getFlags() const

uint64_t getAsZExtVal() const

Helper method returns the zero-extended integer value of a ConstantSDNode.

unsigned getNumOperands() const

Return the number of values used by this operation.

const SDValue & getOperand(unsigned Num) const

uint64_t getConstantOperandVal(unsigned Num) const

Helper method returns the integer value of a ConstantSDNode operand.

bool isPredecessorOf(const SDNode *N) const

Return true if this node is a predecessor of N.

bool isAnyAdd() const

Returns true if the node type is ADD or PTRADD.

static use_iterator use_end()

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

SDNode * getNode() const

get the SDNode which holds the desired result

SDValue getValue(unsigned R) const

EVT getValueType() const

Return the ValueType of the referenced return value.

TypeSize getValueSizeInBits() const

Returns the size of the value in bits.

const SDValue & getOperand(unsigned i) const

uint64_t getConstantOperandVal(unsigned i) const

unsigned getOpcode() const

static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)

bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override

static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)

static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)

static bool isSGPRClass(const TargetRegisterClass *RC)

bool runOnMachineFunction(MachineFunction &MF) override

runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

void getAnalysisUsage(AnalysisUsage &AU) const override

getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.

SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)

SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)

PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)

std::unique_ptr< FunctionLoweringInfo > FuncInfo

const TargetLowering * TLI

const TargetInstrInfo * TII

void ReplaceUses(SDValue F, SDValue T)

ReplaceUses - replace all uses of the old node F with the use of the new node T.

void ReplaceNode(SDNode *F, SDNode *T)

Replace all uses of F with T, then remove F from the DAG.

SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)

virtual bool runOnMachineFunction(MachineFunction &mf)

const TargetLowering * getTargetLowering() const

This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...

LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)

These are used for target selectors to create a new node with specified return type(s),...

LLVM_ABI SDValue getRegister(Register Reg, EVT VT)

SDValue getTargetFrameIndex(int FI, EVT VT)

LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const

Return true if the sign bit of Op is known to be zero.

SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const

Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...

MachineFunction & getMachineFunction() const

LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const

Determine which bits of Op are known to be either zero or one and return them in Known.

ilist< SDNode >::iterator allnodes_iterator

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

static const unsigned CommuteAnyOperandIndex

Primary interface to the complete machine description for the target machine.

unsigned getID() const

Return the register class ID number.

Legacy analysis pass which computes a CycleInfo.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ FLAT_ADDRESS

Address space for flat memory.

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

@ PRIVATE_ADDRESS

Address space for private memory.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)

bool isGFX12Plus(const MCSubtargetInfo &STI)

bool isValid32BitLiteral(uint64_t Val, bool IsFP64)

bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)

bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)

std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)

bool isUniformMMO(const MachineMemOperand *MMO)

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

@ SETCC

SetCC operator - This evaluates to a true value iff the condition is true.

@ SMUL_LOHI

SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...

@ ADDC

Carry-setting nodes for multiple precision addition and subtraction.

@ FMAD

FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.

@ ADD

Simple integer binary arithmetic operators.

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

@ INTRINSIC_VOID

OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ FADD

Simple binary floating point operators.

@ BUILD_PAIR

BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.

@ SIGN_EXTEND

Conversion operators.

@ SCALAR_TO_VECTOR

SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...

@ FCANONICALIZE

Returns platform specific canonical encoding of a floating point number.

@ UNDEF

UNDEF - An undefined node.

@ CopyFromReg

CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...

@ SHL

Shift and rotation operations.

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

@ EXTRACT_VECTOR_ELT

EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...

@ CopyToReg

CopyToReg - This node has three operands: a chain, a register number to set to this value,...

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ SIGN_EXTEND_INREG

SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...

@ UADDO_CARRY

Carry-using nodes for multiple precision addition and subtraction.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ INTRINSIC_WO_CHAIN

RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...

@ ADDE

Carry-using nodes for multiple precision addition and subtraction.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

@ INTRINSIC_W_CHAIN

RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...

@ BUILD_VECTOR

BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...

bool isExtOpcode(unsigned Opcode)

CondCode

ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...

@ Undef

Value of the register doesn't matter.

@ User

could "use" a pointer

This is an optimization pass for GlobalISel generic memory operations.

FunctionAddr VTableAddr Value

constexpr bool isInt(int64_t x)

Checks if an integer fits into the given bit width.

LLVM_ABI bool isNullConstant(SDValue V)

Returns true if V is a constant integer zero.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

constexpr bool isMask_32(uint32_t Value)

Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...

AnalysisManager< MachineFunction > MachineFunctionAnalysisManager

constexpr int popcount(T Value) noexcept

Count the number of set bits in a value.

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

bool isBoolSGPR(SDValue V)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

static bool getConstantValue(SDValue N, uint32_t &Out)

constexpr bool isUInt(uint64_t x)

Checks if an unsigned integer fits into the given bit width.

CodeGenOptLevel

Code generation optimization level.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key

FunctionAddr VTableAddr uintptr_t uintptr_t Data

FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)

This pass converts a legalized DAG into a AMDGPU-specific.

Definition AMDGPUISelDAGToDAG.cpp:150

@ SMax

Signed integer max implemented in terms of select(cmp()).

@ And

Bitwise or logical AND of integers.

@ Sub

Subtraction of integers.

DWARFExpression::Operation Op

unsigned M0(unsigned Val)

LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)

Returns the SDNode if it is a constant splat BuildVector or constant int.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

Implement std::hash so that hash_code can be used in STL containers.

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

uint64_t getScalarSizeInBits() const

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

bool bitsEq(EVT VT) const

Return true if this has the same number of bits as VT.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

bool isScalarInteger() const

Return true if this is an integer, but not a vector.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

static KnownBits makeConstant(const APInt &C)

Create known bits from a known constant.

static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)

Compute knownbits resulting from addition of LHS and RHS.

APInt getMaxValue() const

Return the maximal unsigned value possible given these KnownBits.

APInt getMinValue() const

Return the minimal unsigned value possible given these KnownBits.

static unsigned getSubRegFromChannel(unsigned Channel)

bool hasNoUnsignedWrap() const

This represents a list of ValueType's that has been intern'd by a SelectionDAG.