LLVM: lib/Target/AMDGPU/AMDGPUISelLowering.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

26#include "llvm/IR/IntrinsicsAMDGPU.h"

30

31using namespace llvm;

32

33#include "AMDGPUGenCallingConv.inc"

34

36 "amdgpu-bypass-slow-div",

37 cl::desc("Skip 64-bit divide for dynamic 32-bit values"),

39

40

43 if (StoreSize <= 32)

45

46 if (StoreSize % 32 == 0)

48

49 return VT;

50}

51

55

61

66

67

71

72

74

75

76

79

82

85

88

91

94

97

100

103

106

109

112

115

118

121

124

127

130

133

136

139

142

145

148

151

154

157

158

161

164

167

170

173

176

179

182

183

184

188

190 if (VT == MVT::i64)

191 continue;

192

198 }

199 }

200

202 for (auto MemVT :

203 {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})

206

221

228

241

244

247

250

253

256

259

262

265

268

271

274

277

280

283

286

289

292

295

298

301

304

307

310

313

316

319

322

327

332

348

352

356

358

366

372

376

380

384

388

396

399

401

402

403

405

406

407

409 ISD::FROUNDEVEN, ISD::FTRUNC},

410 {MVT::f16, MVT::f32}, Legal);

412

416 {MVT::f16, MVT::f32, MVT::f64}, Expand);

417

419 {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32,

421

423

425

426 setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},

428

430

431 if (Subtarget->has16BitInsts()) {

434 } else {

437 }

438

439 setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16,

441

443 if (Subtarget->has16BitInsts()) {

445 }

446

447

448

449

451 {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,

452 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,

453 MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64,

454 MVT::v16f64},

456

459 {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16},

461

462

464

466 {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,

467 MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,

468 MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,

469 MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,

470 MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},

472

475 {MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32,

476 MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32,

477 MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v9f32,

478 MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32,

479 MVT::v12i32, MVT::v12f32, MVT::v16i32, MVT::v32f32, MVT::v32i32,

480 MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64,

481 MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64},

483

486

487 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

488 for (MVT VT : ScalarIntVTs) {

489

492

493

495

496

498

500

501

503 }

504

505

507

509

511

517

520

524

525 for (auto VT : {MVT::i8, MVT::i16})

527

529 MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,

530 MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};

531

532 for (MVT VT : VectorIntTypes) {

533

547 }

548

550 MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,

551 MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};

552

553 for (MVT VT : FloatVectorTypes) {

555 {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM,

556 ISD::FADD, ISD::FCEIL, ISD::FCOS,

557 ISD::FDIV, ISD::FEXP2, ISD::FEXP,

558 ISD::FEXP10, ISD::FLOG2, ISD::FREM,

559 ISD::FLOG, ISD::FLOG10, ISD::FPOW,

560 ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL,

561 ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,

562 ISD::FSQRT, ISD::FSIN, ISD::FSUB,

567 }

568

569

570

571

574

577

580

583

586

589

592

595

598

601

604

607

609

610

611

612

613

614

615

616

617

619

620

621

625

626

629

640

644}

645

648 return true;

649

650 const auto Flags = Op.getNode()->getFlags();

651 if (Flags.hasNoSignedZeros())

652 return true;

653

654 return false;

655}

656

657

658

659

660

663 switch (Opc) {

669 case ISD::FMINNUM:

670 case ISD::FMAXNUM:

671 case ISD::FMINNUM_IEEE:

672 case ISD::FMAXNUM_IEEE:

673 case ISD::FMINIMUM:

674 case ISD::FMAXIMUM:

675 case ISD::FMINIMUMNUM:

676 case ISD::FMAXIMUMNUM:

678 case ISD::FSIN:

679 case ISD::FTRUNC:

680 case ISD::FRINT:

681 case ISD::FNEARBYINT:

682 case ISD::FROUNDEVEN:

684 case AMDGPUISD::RCP:

685 case AMDGPUISD::RCP_LEGACY:

686 case AMDGPUISD::RCP_IFLAG:

687 case AMDGPUISD::SIN_HW:

688 case AMDGPUISD::FMUL_LEGACY:

689 case AMDGPUISD::FMIN_LEGACY:

690 case AMDGPUISD::FMAX_LEGACY:

691 case AMDGPUISD::FMED3:

692

693 return true;

694 case ISD::BITCAST:

696 default:

697 return false;

698 }

699}

700

702 unsigned Opc = N->getOpcode();

703 if (Opc == ISD::BITCAST) {

704

705

706 SDValue BCSrc = N->getOperand(0);

710 }

711

713 }

714

716}

717

718

719

720

723 return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) ||

724 VT == MVT::f64;

725}

726

727

728

731

732 return N->getValueType(0) == MVT::f32;

733}

734

735

736

740 return false;

741

742 switch (N->getOpcode()) {

746 case ISD::INLINEASM:

747 case ISD::INLINEASM_BR:

748 case AMDGPUISD::DIV_SCALE:

750

751

752

753

754 case ISD::BITCAST:

755 return false;

757 switch (N->getConstantOperandVal(0)) {

758 case Intrinsic::amdgcn_interp_p1:

759 case Intrinsic::amdgcn_interp_p2:

760 case Intrinsic::amdgcn_interp_mov:

761 case Intrinsic::amdgcn_interp_p1_f16:

762 case Intrinsic::amdgcn_interp_p2_f16:

763 return false;

764 default:

765 return true;

766 }

767 }

770 default:

771 return true;

772 }

773}

774

777

778

779

780

781

782 unsigned NumMayIncreaseSize = 0;

783 MVT VT = N->getValueType(0).getScalarType().getSimpleVT();

784

786

787

788 for (const SDNode *U : N->users()) {

790 return false;

791

794 return false;

795 }

796 }

797

798 return true;

799}

800

804

805

807 if (Size <= 32)

808 return MVT::i32;

810}

811

815

819

820

821

823 bool ForCodeSize) const {

825 return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||

826 (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));

827}

828

829

832 return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);

833}

834

837 std::optional ByteOffset) const {

838

840 return false;

841

843

844

845

846 if (NewSize >= 32)

847 return true;

848

849 EVT OldVT = N->getValueType(0);

851

854

855

856

857 if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) &&

863 return false;

864

865

866

867

868

869

870

871

872 return (OldSize < 32);

873}

874

878

880

882 return false;

883

886

887 if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))

888 return false;

889

890 unsigned Fast = 0;

892 CastTy, MMO, &Fast) &&

894}

895

896

897

898

902

906

908 switch (N->getOpcode()) {

911 return true;

913 unsigned IntrID = N->getConstantOperandVal(0);

915 }

917 unsigned IntrID = N->getConstantOperandVal(1);

919 }

920 case ISD::LOAD:

923 return true;

924 return false;

925 case AMDGPUISD::SETCC:

926 return true;

927 }

928 return false;

929}

930

934

935 switch (Op.getOpcode()) {

938

941 break;

942 }

943 case AMDGPUISD::RCP: {

945 EVT VT = Op.getValueType();

947

950 if (NegSrc)

951 return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags());

953 }

954 default:

955 break;

956 }

957

960}

961

962

963

964

965

968

969

970 return VT == MVT::f32 || VT == MVT::f64 ||

971 (Subtarget->has16BitInsts() && (VT == MVT::f16 || VT == MVT::bf16));

972}

973

976

978 return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16 || VT == MVT::bf16;

979}

980

982 unsigned NumElem,

983 unsigned AS) const {

984 return true;

985}

986

988

989

990

991

992

993

994

995

996 return true;

997}

998

1000

1001

1002 unsigned SrcSize = Source.getSizeInBits();

1004

1005 return DestSize < SrcSize && DestSize % 32 == 0 ;

1006}

1007

1009

1010

1011 unsigned SrcSize = Source->getScalarSizeInBits();

1013

1014 if (DestSize== 16 && Subtarget->has16BitInsts())

1015 return SrcSize >= 32;

1016

1017 return DestSize < SrcSize && DestSize % 32 == 0;

1018}

1019

1021 unsigned SrcSize = Src->getScalarSizeInBits();

1023

1024 if (SrcSize == 16 && Subtarget->has16BitInsts())

1025 return DestSize >= 32;

1026

1027 return SrcSize == 32 && DestSize == 64;

1028}

1029

1031

1032

1033

1034

1035

1036 if (Src == MVT::i16)

1037 return Dest == MVT::i32 ||Dest == MVT::i64 ;

1038

1039 return Src == MVT::i32 && Dest == MVT::i64;

1040}

1041

1043 EVT DestVT) const {

1044 switch (N->getOpcode()) {

1060 if (Subtarget->has16BitInsts() &&

1061 (!DestVT.isVector() || !Subtarget->hasVOP3PInsts())) {

1062

1063 if (N->isDivergent() && DestVT.isInteger() &&

1067 return false;

1068 }

1069 }

1070 return true;

1071 default:

1072 break;

1073 }

1074

1075

1076

1077

1078

1079

1080

1083

1084 return true;

1085}

1086

1091 "Expected shift op");

1092

1093 SDValue ShiftLHS = N->getOperand(0);

1095 return false;

1096

1099 return false;

1100

1101

1102

1104 N->getOpcode() != ISD::SHL || N->getOperand(0).getOpcode() != ISD::OR)

1105 return true;

1106

1107

1108 if (N->getValueType(0) == MVT::i32 && N->hasOneUse() &&

1109 (N->user_begin()->getOpcode() == ISD::SRA ||

1110 N->user_begin()->getOpcode() == ISD::SRL))

1111 return false;

1112

1113

1114 auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) {

1115 if (LHS.getOpcode() != ISD::SHL)

1116 return false;

1120 return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD &&

1121 LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&

1123 };

1124 SDValue LHS = N->getOperand(0).getOperand(0);

1125 SDValue RHS = N->getOperand(0).getOperand(1);

1126 return !(IsShiftAndLoad(LHS, RHS) || IsShiftAndLoad(RHS, LHS));

1127}

1128

1129

1130

1131

1132

1134 bool IsVarArg) {

1135 switch (CC) {

1143 return CC_AMDGPU;

1146 return CC_AMDGPU_CS_CHAIN;

1150 return CC_AMDGPU_Func;

1153 return CC_SI_Gfx;

1156 default:

1158 }

1159}

1160

1162 bool IsVarArg) {

1163 switch (CC) {

1176 return RetCC_SI_Shader;

1179 return RetCC_SI_Gfx;

1183 return RetCC_AMDGPU_Func;

1184 default:

1186 }

1187}

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208

1209

1210

1211

1212

1213

1214

1222 const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();

1224

1226 uint64_t ExplicitArgOffset = 0;

1228

1229 unsigned InIndex = 0;

1230

1232 const bool IsByRef = Arg.hasByRefAttr();

1233 Type *BaseArgTy = Arg.getType();

1234 Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;

1235 Align Alignment = DL.getValueOrABITypeAlignment(

1236 IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);

1237 MaxAlign = std::max(Alignment, MaxAlign);

1238 uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);

1239

1240 uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;

1241 ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;

1242

1243

1244

1245

1246

1247

1248

1249

1252 ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, nullptr,

1253 &Offsets, ArgOffset);

1254

1255 for (unsigned Value = 0, NumValues = ValueVTs.size();

1258

1259 EVT ArgVT = ValueVTs[Value];

1260 EVT MemVT = ArgVT;

1263

1264 if (NumRegs == 1) {

1265

1267

1268

1269 MemVT = RegisterVT;

1270 } else {

1271 MemVT = ArgVT;

1272 }

1276

1277

1278

1279 MemVT = RegisterVT;

1280 } else if (ArgVT.isVector() &&

1282

1283

1286

1287 MemVT = RegisterVT;

1288 } else {

1293 } else if (RegisterVT.isVector()) {

1296 assert(MemoryBits % NumElements == 0);

1297

1298

1300 MemoryBits / NumElements);

1301 MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);

1302 } else {

1304 }

1305 }

1306

1307

1310

1311

1316 }

1317

1318 unsigned PartOffset = 0;

1319 for (unsigned i = 0; i != NumRegs; ++i) {

1321 BasePartOffset + PartOffset,

1325 }

1326 }

1327 }

1328}

1329

1332 bool isVarArg,

1336

1337

1338

1339 return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);

1340}

1341

1342

1343

1344

1345

1346

1348 bool IsVarArg) {

1350}

1351

1353 bool IsVarArg) {

1355}

1356

1360 int ClobberedFI) const {

1363 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;

1364

1365

1366

1367

1369

1370

1374 if (FI->getIndex() < 0) {

1375 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());

1376 int64_t InLastByte = InFirstByte;

1377 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;

1378

1379 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||

1380 (FirstByte <= InFirstByte && InFirstByte <= LastByte))

1382 }

1383 }

1384 }

1385 }

1386

1387

1389}

1390

1396

1398

1399 StringRef FuncName("");

1400

1402 FuncName = G->getSymbol();

1404 FuncName = G->getGlobal()->getName();

1405

1408

1412 }

1413

1414

1416 return CLI.Chain;

1417

1420}

1421

1426

1436

1439 switch (Op.getOpcode()) {

1440 default:

1441 Op->print(errs(), &DAG);

1443 "instruction is not implemented yet!");

1444 break;

1451 case ISD::FCEIL: return LowerFCEIL(Op, DAG);

1453 case ISD::FRINT: return LowerFRINT(Op, DAG);

1455 case ISD::FROUNDEVEN:

1459 case ISD::FLOG2:

1461 case ISD::FLOG:

1462 case ISD::FLOG10:

1464 case ISD::FEXP:

1465 case ISD::FEXP10:

1467 case ISD::FEXP2:

1481 }

1482 return Op;

1483}

1484

1488 switch (N->getOpcode()) {

1490

1491

1492

1493

1494

1495

1496 return;

1497 case ISD::FLOG2:

1499 Results.push_back(Lowered);

1500 return;

1501 case ISD::FLOG:

1502 case ISD::FLOG10:

1504 Results.push_back(Lowered);

1505 return;

1506 case ISD::FEXP2:

1508 Results.push_back(Lowered);

1509 return;

1510 case ISD::FEXP:

1511 case ISD::FEXP10:

1513 Results.push_back(Lowered);

1514 return;

1518 Results.push_back(Lowered);

1519 return;

1520 default:

1521 return;

1522 }

1523}

1524

1528

1532

1535 if (std::optional<uint32_t> Address =

1537 if (IsNamedBarrier) {

1538 unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;

1540 }

1542 } else if (IsNamedBarrier) {

1543 llvm_unreachable("named barrier should have an assigned address");

1544 }

1545 }

1546

1550 GV->getName() != "llvm.amdgcn.module.lds" &&

1555 Fn, "local memory global used by non-kernel function",

1557

1558

1559

1560

1561

1562

1566 DAG.setRoot(OutputChain);

1567 return DAG.getPOISON(Op.getValueType());

1568 }

1569

1570

1571 assert(G->getOffset() == 0 &&

1572 "Do not know what to do with an non-zero offset");

1573

1574

1575

1576

1579 }

1581}

1582

1587

1588 EVT VT = Op.getValueType();

1590 unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits();

1591 if (OpBitSize >= 32 && OpBitSize % 32 == 0) {

1592 unsigned NewNumElt = OpBitSize / 32;

1593 EVT NewEltVT = (NewNumElt == 1) ? MVT::i32

1595 MVT::i32, NewNumElt);

1596 for (const SDUse &U : Op->ops()) {

1598 SDValue NewIn = DAG.getNode(ISD::BITCAST, SL, NewEltVT, In);

1599 if (NewNumElt > 1)

1601 else

1602 Args.push_back(NewIn);

1603 }

1604

1606 NewNumElt * Op.getNumOperands());

1608 return DAG.getNode(ISD::BITCAST, SL, VT, BV);

1609 }

1610 }

1611

1612 for (const SDUse &U : Op->ops())

1614

1616}

1617

1622 unsigned Start = Op.getConstantOperandVal(1);

1623 EVT VT = Op.getValueType();

1624 EVT SrcVT = Op.getOperand(0).getValueType();

1625

1629 assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types");

1630

1631

1633 EVT NewVT = NumElt == 2

1634 ? MVT::i32

1636 SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0));

1637

1639 if (NumElt == 2)

1640 Tmp = Args[0];

1641 else

1643

1644 return DAG.getNode(ISD::BITCAST, SL, VT, Tmp);

1645 }

1646

1649

1651}

1652

1653

1655 if (Val.getOpcode() == ISD::FNEG)

1657

1658 return Val;

1659}

1660

1662 if (Val.getOpcode() == ISD::FNEG)

1664 if (Val.getOpcode() == ISD::FABS)

1668 return Val;

1669}

1670

1676 switch (CCOpcode) {

1689 break;

1692 if (LHS == True)

1693 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);

1694 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);

1695 }

1700

1701

1702

1703

1707

1708

1709

1710

1711 if (LHS == True)

1712 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);

1713 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);

1714 }

1717 if (LHS == True)

1718 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);

1719 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);

1720 }

1728

1729 if (LHS == True)

1730 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);

1731 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);

1732 }

1735 }

1737}

1738

1739

1745 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))

1747

1749

1750

1751

1752

1756

1757

1758

1759

1760

1761

1762

1763

1764 if (LHS == NegTrue && CFalse && CRHS) {

1769 if (Combined)

1770 return DAG.getNode(ISD::FNEG, DL, VT, Combined);

1772 }

1773 }

1774

1776}

1777

1778std::pair<SDValue, SDValue>

1781

1782 SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);

1783

1786

1789

1790 return std::pair(Lo, Hi);

1791}

1792

1800

1808

1809

1810

1811

1812std::pair<EVT, EVT>

1814 EVT LoVT, HiVT;

1817 unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);

1819 HiVT = NumElts - LoNumElts == 1

1820 ? EltVT

1822 return std::pair(LoVT, HiVT);

1823}

1824

1825

1826

1827std::pair<SDValue, SDValue>

1829 const EVT &LoVT, const EVT &HiVT,

1831 EVT VT = N.getValueType();

1835 "More vector elements requested than available!");

1838

1840

1844

1845

1848 return {Lo, Hi};

1849 }

1850

1853 HiNumElts);

1855 return {Lo, Hi};

1856 }

1857

1860 return {Lo, Hi};

1861}

1862

1866 EVT VT = Op.getValueType();

1868

1869

1870

1871

1876 }

1877

1878 SDValue BasePtr = Load->getBasePtr();

1879 EVT MemVT = Load->getMemoryVT();

1880

1881 const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();

1882

1883 EVT LoVT, HiVT;

1884 EVT LoMemVT, HiMemVT;

1886

1888 std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);

1890

1892 Align BaseAlign = Load->getAlign();

1894

1896 Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue,

1897 LoMemVT, BaseAlign, Load->getMemOperand()->getFlags(), Load->getAAInfo());

1900 Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr,

1902 Load->getMemOperand()->getFlags(), Load->getAAInfo());

1903

1905 if (LoVT == HiVT) {

1906

1908 } else {

1913 VT, Join, HiLoad,

1915 }

1916

1919

1921}

1922

1926 EVT VT = Op.getValueType();

1927 SDValue BasePtr = Load->getBasePtr();

1928 EVT MemVT = Load->getMemoryVT();

1930 const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();

1931 Align BaseAlign = Load->getAlign();

1933

1934

1935

1936 if (NumElements != 3 ||

1937 (BaseAlign < Align(8) &&

1940

1941 assert(NumElements == 3);

1942

1943 EVT WideVT =

1945 EVT WideMemVT =

1948 Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,

1949 WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());

1954 SL);

1955}

1956

1960 SDValue Val = Store->getValue();

1962

1963

1964

1967

1968 EVT MemVT = Store->getMemoryVT();

1969 SDValue Chain = Store->getChain();

1970 SDValue BasePtr = Store->getBasePtr();

1972

1973 EVT LoVT, HiVT;

1974 EVT LoMemVT, HiMemVT;

1976

1978 std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);

1979 std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);

1980

1982

1983 const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();

1984 Align BaseAlign = Store->getAlign();

1987

1989 DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,

1990 Store->getMemOperand()->getFlags(), Store->getAAInfo());

1993 Store->getMemOperand()->getFlags(), Store->getAAInfo());

1994

1996}

1997

1998

1999

2000

2002 bool Sign) const {

2004 EVT VT = Op.getValueType();

2007 MVT IntVT = MVT::i32;

2008 MVT FltVT = MVT::f32;

2009

2011 if (LHSSignBits < 9)

2013

2015 if (RHSSignBits < 9)

2017

2019 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);

2020 unsigned DivBits = BitSize - SignBits;

2021 if (Sign)

2022 ++DivBits;

2023

2026

2028

2029 if (Sign) {

2030

2032

2033

2036

2037

2039 }

2040

2041

2043

2044

2046

2047

2049

2050

2052

2054 fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));

2055

2056

2057 fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);

2058

2059

2061

2063

2064 bool UseFmadFtz = false;

2065 if (Subtarget->isGCN()) {

2067 UseFmadFtz =

2069 }

2070

2071

2072 unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA

2073 : UseFmadFtz ? (unsigned)AMDGPUISD::FMAD_FTZ

2076

2077

2079

2080

2081 fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);

2082

2083

2084 fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);

2085

2087

2088

2090

2091

2093

2094

2096

2097

2100

2101

2102 if (Sign) {

2107 } else {

2111 }

2112

2114}

2115

2120 EVT VT = Op.getValueType();

2121

2122 assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");

2123

2125

2128

2129

2132 std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT);

2133

2136 std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT);

2137

2140

2142 LHS_Lo, RHS_Lo);

2143

2146

2147 Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));

2148 Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));

2149 return;

2150 }

2151

2153

2154

2155

2158

2159

2160 unsigned FMAD =

2164 : (unsigned)AMDGPUISD::FMAD_FTZ;

2165

2170 Cvt_Lo);

2171 SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);

2176 SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);

2179 Mul1);

2184

2189

2190

2194 SDValue Mulhi1_Lo, Mulhi1_Hi;

2195 std::tie(Mulhi1_Lo, Mulhi1_Hi) =

2198 Mulhi1_Lo, Zero1);

2200 Mulhi1_Hi, Add1_Lo.getValue(1));

2203

2204

2207 SDValue Mulhi2_Lo, Mulhi2_Hi;

2208 std::tie(Mulhi2_Lo, Mulhi2_Hi) =

2211 Mulhi2_Lo, Zero1);

2213 Mulhi2_Hi, Add2_Lo.getValue(1));

2216

2218

2220

2221 SDValue Mul3_Lo, Mul3_Hi;

2222 std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT);

2224 Mul3_Lo, Zero1);

2226 Mul3_Hi, Sub1_Lo.getValue(1));

2230

2237

2238

2239

2240

2241

2242

2244 RHS_Lo, Zero1);

2246 RHS_Hi, Sub1_Lo.getValue(1));

2251

2253

2259

2260

2262

2264 RHS_Lo, Zero1);

2266 RHS_Hi, Sub2_Lo.getValue(1));

2271

2272

2273

2274

2277

2280

2283

2284 return;

2285 }

2286

2287

2288

2291

2294 REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);

2295

2298

2299 const unsigned halfBitWidth = HalfVT.getSizeInBits();

2300

2301 for (unsigned i = 0; i < halfBitWidth; ++i) {

2302 const unsigned bitPos = halfBitWidth - i - 1;

2304

2308

2309

2311

2313

2316

2318

2319

2322 }

2323

2325 DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);

2328}

2329

2333 EVT VT = Op.getValueType();

2334

2335 if (VT == MVT::i64) {

2339 }

2340

2341 if (VT == MVT::i32) {

2343 return Res;

2344 }

2345

2348

2349

2350

2351

2352

2354

2355

2360

2361

2365

2366

2374

2375

2381

2383}

2384

2388 EVT VT = Op.getValueType();

2389

2392

2395

2396 if (VT == MVT::i32) {

2398 return Res;

2399 }

2400

2401 if (VT == MVT::i64 &&

2405

2406

2410 LHS_Lo, RHS_Lo);

2414 };

2416 }

2417

2421 SDValue RSign = LHSign;

2422

2425

2428

2431

2434

2437

2439 Div,

2440 Rem

2441 };

2443}

2444

2448

2449

2450

2451

2452

2453 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);

2454

2457

2458 EVT SetCCVT =

2460

2464

2466

2468}

2469

2472 const unsigned FractBits = 52;

2473 const unsigned ExpBits = 11;

2474

2475 SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,

2477 DAG.getConstant(FractBits - 32, SL, MVT::i32),

2478 DAG.getConstant(ExpBits, SL, MVT::i32));

2481

2482 return Exp;

2483}

2484

2488

2489 assert(Op.getValueType() == MVT::f64);

2490

2492

2493

2494

2496

2498

2499 const unsigned FractBits = 52;

2500

2501

2502 const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);

2504

2505

2507 SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);

2508

2509 SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);

2511 = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);

2512

2516

2517 EVT SetCCVT =

2519

2520 const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);

2521

2524

2527

2528 return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);

2529}

2530

2535

2536 assert(Op.getValueType() == MVT::f64);

2537

2541

2542

2543

2546

2547 SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);

2548

2551

2552 EVT SetCCVT =

2555

2556 return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);

2557}

2558

2561

2562

2563

2564 return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),

2565 Op.getOperand(0));

2566}

2567

2569 auto VT = Op.getValueType();

2570 auto Arg = Op.getOperand(0u);

2571 return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);

2572}

2573

2574

2575

2576

2577

2578

2582 EVT VT = Op.getValueType();

2583

2585

2586

2587

2589

2590 SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);

2591

2594

2595 EVT SetCCVT =

2597

2601

2604}

2605

2609

2610

2611

2612

2613

2614 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);

2615

2618

2619 EVT SetCCVT =

2621

2625

2627

2629}

2630

2631

2633 switch (Src.getOpcode()) {

2634 case ISD::FP_EXTEND:

2635 return Src.getOperand(0).getValueType() == MVT::f16;

2636 case ISD::FP16_TO_FP:

2637 case ISD::FFREXP:

2638 return true;

2640 unsigned IntrinsicID = Src.getConstantOperandVal(0);

2641 switch (IntrinsicID) {

2642 case Intrinsic::amdgcn_frexp_mant:

2643 return true;

2644 default:

2645 return false;

2646 }

2647 }

2648 default:

2649 return false;

2650 }

2651

2653}

2654

2657 return Flags.hasApproximateFuncs();

2658}

2659

2668

2673 EVT VT = Src.getValueType();

2675 SDValue SmallestNormal =

2677

2678

2679

2683

2684 return IsLtSmallestNormal;

2685}

2686

2690 EVT VT = Src.getValueType();

2693

2694 SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);

2698 return IsFinite;

2699}

2700

2701

2702

2703std::pair<SDValue, SDValue>

2707 return {};

2708

2709 MVT VT = MVT::f32;

2711 SDValue SmallestNormal =

2713

2717

2721 DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);

2722

2724 return {ScaledInput, IsLtSmallestNormal};

2725}

2726

2728

2729

2730

2731

2732

2733

2735 EVT VT = Op.getValueType();

2738

2739 if (VT == MVT::f16) {

2740

2741 assert(!Subtarget->has16BitInsts());

2742 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);

2743 SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags);

2746 }

2747

2748 auto [ScaledInput, IsLtSmallestNormal] =

2750 if (!ScaledInput)

2751 return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);

2752

2753 SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);

2754

2758 DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero);

2760}

2761

2767

2771 EVT VT = Op.getValueType();

2774 const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;

2775 assert(IsLog10 || Op.getOpcode() == ISD::FLOG);

2776

2778 if (VT == MVT::f16 || Flags.hasApproximateFuncs()) {

2779

2780 if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {

2781

2782 X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);

2783 }

2784

2786 if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {

2789 }

2790

2791 return Lowered;

2792 }

2793

2795 if (ScaledInput)

2796 X = ScaledInput;

2797

2799

2801 if (Subtarget->hasFastFMAF32()) {

2802

2803 const float c_log10 = 0x1.344134p-2f;

2804 const float cc_log10 = 0x1.09f79ep-26f;

2805

2806

2807 const float c_log = 0x1.62e42ep-1f;

2808 const float cc_log = 0x1.efa39ep-25f;

2809

2812

2813

2814 Flags.setAllowContract(false);

2820 } else {

2821

2822 const float ch_log10 = 0x1.344000p-2f;

2823 const float ct_log10 = 0x1.3509f6p-18f;

2824

2825

2826 const float ch_log = 0x1.62e000p-1f;

2827 const float ct_log = 0x1.0bfbe8p-15f;

2828

2831

2835 SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);

2837

2838

2839 Flags.setAllowContract(false);

2841 SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);

2843 R = getMad(DAG, DL, VT, YH, CH, Mad1);

2844 }

2845

2846 const bool IsFiniteOnly =

2847 (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs();

2848

2849

2850 if (!IsFiniteOnly) {

2853 }

2854

2855 if (IsScaled) {

2858 DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);

2862 }

2863

2864 return R;

2865}

2866

2870

2871

2872

2876 EVT VT = Src.getValueType();

2877 unsigned LogOp =

2878 VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2;

2879

2880 double Log2BaseInverted =

2882

2883 if (VT == MVT::f32) {

2884 auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags);

2885 if (ScaledInput) {

2886 SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);

2887 SDValue ScaledResultOffset =

2888 DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT);

2889

2891

2893 ScaledResultOffset, Zero, Flags);

2894

2896

2897 if (Subtarget->hasFastFMAF32())

2898 return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset,

2899 Flags);

2902 }

2903 }

2904

2905 SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);

2906 SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);

2907

2908 return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,

2909 Flags);

2910}

2911

2913

2914

2915

2917 EVT VT = Op.getValueType();

2920

2921 if (VT == MVT::f16) {

2922

2923 assert(!Subtarget->has16BitInsts());

2924 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);

2925 SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);

2928 }

2929

2930 assert(VT == MVT::f32);

2931

2933 return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);

2934

2935

2936

2937

2938

2940

2942

2945

2948

2951

2953 SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);

2954

2959

2960 return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);

2961}

2962

2966 bool IsExp10) const {

2967

2968

2969 EVT VT = X.getValueType();

2972

2974 return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP

2975 : (unsigned)ISD::FEXP2,

2976 SL, VT, Mul, Flags);

2977}

2978

2982 EVT VT = X.getValueType();

2985

2987

2990

2992

2994

2997

3000

3001 SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);

3002

3004 SDValue AdjustedResult =

3005 DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);

3006

3007 return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,

3008 Flags);

3009}

3010

3011

3012

3016 const EVT VT = X.getValueType();

3017

3018 const unsigned Exp2Op = VT == MVT::f32 ? static_cast<unsigned>(AMDGPUISD::EXP)

3019 : static_cast<unsigned>(ISD::FEXP2);

3020

3022

3025

3027 SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);

3029 SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);

3031 }

3032

3033

3034

3035

3036

3037

3038

3040

3043

3048

3051

3053 SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);

3055 SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);

3056

3058

3060 SDValue AdjustedResult =

3061 DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);

3062

3063 return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,

3064 Flags);

3065}

3066

3068 EVT VT = Op.getValueType();

3072 const bool IsExp10 = Op.getOpcode() == ISD::FEXP10;

3073

3074

3075

3076 if (allowApproxFunc(DAG, Flags)) {

3079 }

3080

3084

3085

3086

3087

3088

3089

3090

3091

3092 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);

3096 }

3097

3098 assert(VT == MVT::f32);

3099

3100

3101

3102

3103

3104

3105

3106

3107

3108

3109

3110

3111

3112

3113

3114

3115

3116

3117

3118

3119

3120

3121

3122

3123

3126

3128 if (Subtarget->hasFastFMAF32()) {

3130 const float cc_exp = 0x1.4ae0bep-26f;

3131 const float c_exp10 = 0x1.a934f0p+1f;

3132 const float cc_exp10 = 0x1.2f346ep-24f;

3133

3136

3138 SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags);

3141 } else {

3142 const float ch_exp = 0x1.714000p+0f;

3143 const float cl_exp = 0x1.47652ap-12f;

3144

3145 const float ch_exp10 = 0x1.a92000p+1f;

3146 const float cl_exp10 = 0x1.4f0978p-11f;

3147

3150

3151 SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X);

3154 SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt);

3156

3158

3160 SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags);

3161 PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);

3162 }

3163

3164 SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);

3165

3166

3168

3171 SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags);

3172

3173 SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags);

3174

3175 SDValue UnderflowCheckConst =

3176 DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);

3177

3182

3184

3185 if (!Flags.hasNoInfs()) {

3186 SDValue OverflowCheckConst =

3187 DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);

3193 }

3194

3195 return R;

3196}

3197

3201

3205

3209 auto Opc = Op.getOpcode();

3210 auto Arg = Op.getOperand(0u);

3211 auto ResultVT = Op.getValueType();

3212

3213 if (ResultVT != MVT::i8 && ResultVT != MVT::i16)

3214 return {};

3215

3217 assert(ResultVT == Arg.getValueType());

3218

3219 const uint64_t NumBits = ResultVT.getFixedSizeInBits();

3222

3225 NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits);

3226 NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);

3227 } else {

3229 NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);

3230 NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits);

3231 }

3232

3234}

3235

3239

3242 unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;

3243

3246 bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;

3247

3248 if (Src.getValueType() == MVT::i32 || Is64BitScalar) {

3249

3250

3251

3252

3253

3254

3255

3256

3257

3258

3259 SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);

3260 if (!ZeroUndef) {

3262 Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);

3263 NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);

3264 }

3266 }

3267

3270

3273

3274

3275

3276

3277

3278

3281 if (Ctlz)

3282 OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);

3283 else

3284 OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);

3285

3287 NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);

3288 if (!ZeroUndef) {

3290 NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);

3291 }

3292

3294}

3295

3297 bool Signed) const {

3298

3299

3300

3301

3302

3303

3304

3305

3306

3307

3308

3309

3310

3311

3312

3313

3314

3315

3316

3317

3318

3319

3320

3321

3322

3323

3326

3331 if (Signed && Subtarget->isGCN()) {

3332

3333

3334

3335

3336

3337

3338

3339

3340

3341

3342

3343

3344

3345

3346

3347

3348

3349

3350

3351

3352

3358 OppositeSign);

3359

3360 ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);

3361

3362

3365 ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);

3366 } else {

3368

3369

3376 }

3377

3379

3380 }

3381

3383

3385

3386

3389

3391

3392 unsigned Opc =

3395

3396

3397

3399 ShAmt);

3400

3401 if (Subtarget->isGCN())

3402 return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);

3403

3404

3405

3406

3411 DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);

3413

3417 IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);

3418 }

3419 return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);

3420}

3421

3423 bool Signed) const {

3426

3429

3431 SL, MVT::f64, Hi);

3432

3434

3435 SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,

3437

3439}

3440

3443

3444 EVT DestVT = Op.getValueType();

3446 EVT SrcVT = Src.getValueType();

3447

3448 if (SrcVT == MVT::i16) {

3449 if (DestVT == MVT::f16)

3450 return Op;

3452

3453

3456 }

3457

3458 if (DestVT == MVT::bf16) {

3463 }

3464

3465 if (SrcVT != MVT::i64)

3466 return Op;

3467

3468 if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {

3470

3476

3477 return FPRound;

3478 }

3479

3480 if (DestVT == MVT::f32)

3482

3483 assert(DestVT == MVT::f64);

3485}

3486

3489 EVT DestVT = Op.getValueType();

3490

3492 EVT SrcVT = Src.getValueType();

3493

3494 if (SrcVT == MVT::i16) {

3495 if (DestVT == MVT::f16)

3496 return Op;

3497

3499

3502 }

3503

3504 if (DestVT == MVT::bf16) {

3509 }

3510

3511 if (SrcVT != MVT::i64)

3512 return Op;

3513

3514

3515

3516 if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {

3519

3525

3526 return FPRound;

3527 }

3528

3529 if (DestVT == MVT::f32)

3531

3532 assert(DestVT == MVT::f64);

3534}

3535

3537 bool Signed) const {

3539

3541 EVT SrcVT = Src.getValueType();

3542

3543 assert(SrcVT == MVT::f32 || SrcVT == MVT::f64);

3544

3545

3546

3547

3548

3549

3550

3551

3552

3553

3554 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src);

3556 if (Signed && SrcVT == MVT::f32) {

3557

3558

3559

3560

3561

3563 DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc),

3565 Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc);

3566 }

3567

3569 if (SrcVT == MVT::f64) {

3572 SrcVT);

3575 SrcVT);

3576 } else {

3581 }

3582

3584

3586

3588

3591 SL, MVT::i32, FloorMul);

3593

3594 SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64,

3596

3597 if (Signed && SrcVT == MVT::f32) {

3599

3600 Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64,

3602

3603 Result =

3605 DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign);

3606 }

3607

3608 return Result;

3609}

3610

3614

3615

3617 return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);

3618

3619 if (Op->getFlags().hasApproximateFuncs()) {

3620

3622 }

3623

3625}

3626

3627

3630 assert(Src.getSimpleValueType() == MVT::f64);

3631

3632

3633

3634 const unsigned ExpMask = 0x7ff;

3635 const unsigned ExpBiasf64 = 1023;

3636 const unsigned ExpBiasf16 = 15;

3648

3649

3651 DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));

3652

3657

3660 MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);

3661

3664

3665

3669

3670

3674

3675

3677 One, E);

3681

3684

3689

3701

3706

3707

3712

3714}

3715

3719 unsigned OpOpcode = Op.getOpcode();

3720 EVT SrcVT = Src.getValueType();

3721 EVT DestVT = Op.getValueType();

3722

3723

3724 if (SrcVT == MVT::f16 && DestVT == MVT::i16)

3725 return Op;

3726

3727 if (SrcVT == MVT::bf16) {

3729 SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);

3730 return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc);

3731 }

3732

3733

3734 if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {

3736

3737 SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);

3739 }

3740

3741 if (DestVT != MVT::i64)

3742 return Op;

3743

3744 if (SrcVT == MVT::f16 ||

3745 (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {

3747

3748 SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);

3749 unsigned Ext =

3751 return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);

3752 }

3753

3754 if (SrcVT == MVT::f32 || SrcVT == MVT::f64)

3756

3758}

3759

3763 MVT VT = Op.getSimpleValueType();

3765

3767

3770

3771

3775

3777 for (unsigned I = 0; I < NElts; ++I)

3779

3781}

3782

3783

3784

3785

3786

3790

3797

3803

3806 unsigned NewOpcode = Node24->getOpcode();

3807 if (IsIntrin) {

3809 switch (IID) {

3810 case Intrinsic::amdgcn_mul_i24:

3811 NewOpcode = AMDGPUISD::MUL_I24;

3812 break;

3813 case Intrinsic::amdgcn_mul_u24:

3814 NewOpcode = AMDGPUISD::MUL_U24;

3815 break;

3816 case Intrinsic::amdgcn_mulhi_i24:

3817 NewOpcode = AMDGPUISD::MULHI_I24;

3818 break;

3819 case Intrinsic::amdgcn_mulhi_u24:

3820 NewOpcode = AMDGPUISD::MULHI_U24;

3821 break;

3822 default:

3824 }

3825 }

3826

3828

3829

3830

3831

3834 if (DemandedLHS || DemandedRHS)

3836 DemandedLHS ? DemandedLHS : LHS,

3837 DemandedRHS ? DemandedRHS : RHS);

3838

3839

3840

3842 return SDValue(Node24, 0);

3844 return SDValue(Node24, 0);

3845

3847}

3848

3849template

3852 if (Width + Offset < 32) {

3854 IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);

3855 if constexpr (std::is_signed_v) {

3857 } else {

3859 }

3860 }

3861

3863}

3864

3868 if (M->isVolatile())

3869 return true;

3870 }

3871 }

3872

3873 return false;

3874}

3875

3877

3879 return false;

3880

3882 return false;

3883

3885

3887 return false;

3888

3889 if (Size == 3 || (Size > 4 && (Size % 4 != 0)))

3890 return false;

3891

3892 return true;

3893}

3894

3895

3896

3901

3905

3909

3913 unsigned IsFast;

3915

3916

3917

3918

3923

3926

3928 }

3929

3930 if (!IsFast)

3932 }

3933

3936

3938

3942

3943 SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);

3946}

3947

3948

3949

3954

3958

3961

3966 unsigned IsFast;

3968

3969

3970

3971

3972

3977

3979 }

3980

3981 if (!IsFast)

3983 }

3984

3987

3990

3991

3992

3993 bool OtherUses = !Val.hasOneUse();

3994 SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);

3995 if (OtherUses) {

3996 SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);

3998 }

3999

4002}

4003

4004

4005

4006

4010 SDValue N0 = N->getOperand(0);

4011

4012

4013

4015 SDValue N1 = N->getOperand(1);

4018

4020 EVT SrcVT = Src.getValueType();

4021 if (SrcVT.bitsGE(ExtVT)) {

4022 SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);

4024 }

4025 }

4026

4028}

4029

4032 unsigned IID = N->getConstantOperandVal(0);

4033 switch (IID) {

4034 case Intrinsic::amdgcn_mul_i24:

4035 case Intrinsic::amdgcn_mul_u24:

4036 case Intrinsic::amdgcn_mulhi_i24:

4037 case Intrinsic::amdgcn_mulhi_u24:

4039 case Intrinsic::amdgcn_fract:

4040 case Intrinsic::amdgcn_rsq:

4041 case Intrinsic::amdgcn_rcp_legacy:

4042 case Intrinsic::amdgcn_rsq_legacy:

4043 case Intrinsic::amdgcn_rsq_clamp:

4044 case Intrinsic::amdgcn_tanh:

4045 case Intrinsic::amdgcn_prng_b32: {

4046

4047 SDValue Src = N->getOperand(1);

4048 return Src.isUndef() ? Src : SDValue();

4049 }

4050 case Intrinsic::amdgcn_frexp_exp: {

4051

4052

4053

4054 SDValue Src = N->getOperand(1);

4056 if (PeekSign == Src)

4059 0);

4060 }

4061 default:

4063 }

4064}

4065

4066

4067

4075

4078

4081

4082

4083

4086

4088 return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);

4089}

4090

4093 EVT VT = N->getValueType(0);

4094 SDValue LHS = N->getOperand(0);

4095 SDValue RHS = N->getOperand(1);

4099

4100 unsigned RHSVal;

4101 if (CRHS) {

4103 if (!RHSVal)

4104 return LHS;

4105

4106 switch (LHS->getOpcode()) {

4107 default:

4108 break;

4112 SDValue X = LHS->getOperand(0);

4113

4114 if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&

4116

4117

4119 MVT::v2i16, SL,

4121 return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);

4122 }

4123

4124

4125 if (VT != MVT::i64)

4126 break;

4129 if (LZ < RHSVal)

4130 break;

4131 EVT XVT = X.getValueType();

4134 }

4135 }

4136 }

4137

4140

4141

4142

4143

4145

4149 : TargetScalarType;

4150

4154

4155 if (CRHS) {

4157 TargetType);

4158 } else {

4160 const SDValue ShiftMask =

4162

4163

4164 ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);

4165 }

4166

4170

4173

4179

4181 for (unsigned I = 0; I != NElts; ++I)

4182 HiAndLoOps[2 * I + 1] = HiOps[I];

4184 } else {

4186 Vec = DAG.getBuildVector(ConcatType, SL, {Zero, NewShift});

4187 }

4188 return DAG.getNode(ISD::BITCAST, SL, VT, Vec);

4189}

4190

4193 SDValue RHS = N->getOperand(1);

4195 EVT VT = N->getValueType(0);

4196 SDValue LHS = N->getOperand(0);

4199

4202

4203

4204

4205

4206

4207

4208

4210

4214 : TargetScalarType;

4215

4218

4222 if (CRHS) {

4225 TargetType);

4227 (ElementType.getSizeInBits() - 1)) {

4228 ShiftAmt = ShiftFullAmt;

4229 } else {

4231 const SDValue ShiftMask =

4233

4234

4235 ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);

4236 }

4237

4238 EVT ConcatType;

4240 SDLoc LHSSL(LHS);

4241

4245 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4248

4250 for (unsigned I = 0; I != NElts; ++I) {

4251 HiOps[I] = HiAndLoOps[2 * I + 1];

4252 }

4254 } else {

4257 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4259 }

4260

4265 } else {

4267 HiShift = DAG.getNode(ISD::SRA, SL, TargetType, Hi, ShiftFullAmt);

4268 }

4271

4278

4281 for (unsigned I = 0; I != NElts; ++I) {

4282 HiAndLoOps[2 * I + 1] = HiOps[I];

4283 HiAndLoOps[2 * I] = LoOps[I];

4284 }

4286 } else {

4287 Vec = DAG.getBuildVector(ConcatType, SL, {NewShift, HiShift});

4288 }

4289 return DAG.getNode(ISD::BITCAST, SL, VT, Vec);

4290}

4291

4294 SDValue RHS = N->getOperand(1);

4296 EVT VT = N->getValueType(0);

4297 SDValue LHS = N->getOperand(0);

4300 unsigned RHSVal;

4301

4302 if (CRHS) {

4304

4305

4306

4307 if (LHS.getOpcode() == ISD::AND) {

4309 unsigned MaskIdx, MaskLen;

4310 if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&

4311 MaskIdx == RHSVal) {

4314 N->getOperand(1)),

4316 N->getOperand(1)));

4317 }

4318 }

4319 }

4320 }

4321

4324

4325

4326

4327

4328

4329

4330

4332

4336 : TargetScalarType;

4337

4340

4342 if (CRHS) {

4344 TargetType);

4345 } else {

4347 const SDValue ShiftMask =

4349

4350

4351 ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);

4352 }

4353

4355 EVT ConcatType;

4357 SDLoc LHSSL(LHS);

4358

4362 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4365

4367 for (unsigned I = 0; I != NElts; ++I)

4368 HiOps[I] = HiAndLoOps[2 * I + 1];

4370 } else {

4373 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4375 }

4376

4379

4385

4387 for (unsigned I = 0; I != NElts; ++I)

4388 HiAndLoOps[2 * I] = LoOps[I];

4390 } else {

4391 Vec = DAG.getBuildVector(ConcatType, SL, {NewShift, Zero});

4392 }

4393 return DAG.getNode(ISD::BITCAST, SL, VT, Vec);

4394}

4395

4400 EVT VT = N->getValueType(0);

4401 SDValue Src = N->getOperand(0);

4402

4403

4404 if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {

4411 Elt0 = DAG.getNode(ISD::BITCAST, SL,

4413 }

4414

4416 }

4417 }

4418 }

4419

4420

4421

4422

4429 unsigned BitIndex = K->getZExtValue();

4430 unsigned PartIndex = BitIndex / SrcEltSize;

4431

4432 if (PartIndex * SrcEltSize == BitIndex &&

4439 }

4440 }

4441 }

4442 }

4443 }

4444

4445

4446

4447

4448

4450 EVT SrcVT = Src.getValueType();

4452 (Src.getOpcode() == ISD::SRL ||

4453 Src.getOpcode() == ISD::SRA ||

4454 Src.getOpcode() == ISD::SHL)) {

4457

4458

4459

4460

4461

4462 const unsigned MaxCstSize =

4468

4471 Src.getOperand(0));

4473

4477 }

4478

4479 SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,

4480 Trunc, Amt);

4482 }

4483 }

4484 }

4485

4487}

4488

4489

4490

4491

4492

4495 if (Size <= 32) {

4496 unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;

4497 return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);

4498 }

4499

4500 unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;

4501 unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;

4502

4503 SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);

4504 SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);

4505

4507}

4508

4509

4510

4512 if (V->getOpcode() != ISD::ADD)

4514

4516}

4517

4521 EVT VT = N->getValueType(0);

4522

4523

4524

4525

4526

4527 if (N->isDivergent())

4529

4533

4536

4537 SDValue N0 = N->getOperand(0);

4538 SDValue N1 = N->getOperand(1);

4539

4540

4541

4542

4543

4546 if (!AddOp)

4548

4549 if (V.hasOneUse() || all_of(V->users(), [](const SDNode *U) -> bool {

4550 return U->getOpcode() == ISD::MUL;

4551 }))

4552 return AddOp;

4553

4555 };

4556

4557

4558

4559 if (SDValue MulOper = IsFoldableAdd(N0)) {

4560 SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);

4562 }

4563

4564 if (SDValue MulOper = IsFoldableAdd(N1)) {

4565 SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);

4567 }

4568

4569

4572

4573

4574

4575

4576

4579

4582

4584

4585 if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {

4589 } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {

4593 } else {

4595 }

4596

4597

4598

4600}

4601

4605 if (N->getValueType(0) != MVT::i32)

4607

4610

4612 SDValue N0 = N->getOperand(0);

4613 SDValue N1 = N->getOperand(1);

4614

4615

4616

4617

4618

4623

4624

4625

4626 unsigned LoOpcode = 0;

4627 unsigned HiOpcode = 0;

4629 if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {

4632 LoOpcode = AMDGPUISD::MUL_I24;

4633 HiOpcode = AMDGPUISD::MULHI_I24;

4634 }

4635 } else {

4636 if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {

4639 LoOpcode = AMDGPUISD::MUL_U24;

4640 HiOpcode = AMDGPUISD::MULHI_U24;

4641 }

4642 }

4643 if (!LoOpcode)

4645

4650}

4651

4654 EVT VT = N->getValueType(0);

4655

4656 if (!Subtarget->hasMulI24() || VT.isVector())

4658

4659

4660

4661

4662

4663

4664

4665 if (Subtarget->hasSMulHi() && N->isDivergent())

4667

4670

4671 SDValue N0 = N->getOperand(0);

4672 SDValue N1 = N->getOperand(1);

4673

4674 if (isI24(N0, DAG) || isI24(N1, DAG))

4676

4679

4680 SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);

4683}

4684

4687 EVT VT = N->getValueType(0);

4688

4691

4692

4693

4694

4695

4696

4697

4698 if (Subtarget->hasSMulHi() && N->isDivergent())

4700

4703

4704 SDValue N0 = N->getOperand(0);

4705 SDValue N1 = N->getOperand(1);

4706

4707 if (isU24(N0, DAG) || isU24(N1, DAG))

4709

4712

4713 SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);

4716}

4717

4721 unsigned Opc) const {

4722 EVT VT = Op.getValueType();

4724 if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&

4725 LegalVT != MVT::i16))

4727

4728 if (VT != MVT::i32)

4730

4732 if (VT != MVT::i32)

4734

4735 return FFBX;

4736}

4737

4738

4739

4740

4741

4742

4743

4744

4750

4754

4755

4756

4760 unsigned Opc =

4761 isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;

4762 return getFFBX_U32(DAG, CmpLHS, SL, Opc);

4763 }

4764

4765

4766

4770 unsigned Opc =

4771 isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;

4772

4773 return getFFBX_U32(DAG, CmpLHS, SL, Opc);

4774 }

4775

4777}

4778

4780 unsigned Op,

4781 const SDLoc &SL,

4787

4791 return DAG.getNode(Op, SL, VT, NewSelect);

4792}

4793

4794

4795

4796

4797

4798

4799

4800

4806 SDValue LHS = N.getOperand(1);

4807 SDValue RHS = N.getOperand(2);

4808

4809 EVT VT = N.getValueType();

4810 if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||

4811 (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {

4814

4817 }

4818

4819 bool Inv = false;

4820 if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {

4822 Inv = true;

4823 }

4824

4825

4827 if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&

4830

4831

4834

4835

4836 bool ShouldFoldNeg = true;

4837

4841 ShouldFoldNeg = false;

4842 if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)

4843 ShouldFoldNeg = false;

4844 }

4845

4846 if (ShouldFoldNeg) {

4847 if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative())

4849

4850

4851

4852

4853

4854

4855

4856 if (NewLHS.getOpcode() == ISD::FABS &&

4859

4862

4863 if (LHS.getOpcode() == ISD::FNEG)

4864 NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

4865

4866 if (Inv)

4868

4870 Cond, NewLHS, NewRHS);

4872 return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);

4873 }

4874 }

4875

4877}

4878

4882 return Folded;

4883

4887

4888 EVT VT = N->getValueType(0);

4892

4893 SDValue True = N->getOperand(1);

4894 SDValue False = N->getOperand(2);

4895

4896 if (Cond.hasOneUse()) {

4900

4901

4902

4903

4907

4908 SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);

4910 }

4911

4912 if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {

4915

4916

4918 }

4919 }

4920

4921

4923}

4924

4934

4935

4936

4939 if (C->isZero())

4941

4942 if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))

4944

4946}

4947

4953

4959

4961 switch (Opc) {

4962 case ISD::FMAXNUM:

4963 return ISD::FMINNUM;

4964 case ISD::FMINNUM:

4965 return ISD::FMAXNUM;

4966 case ISD::FMAXNUM_IEEE:

4967 return ISD::FMINNUM_IEEE;

4968 case ISD::FMINNUM_IEEE:

4969 return ISD::FMAXNUM_IEEE;

4970 case ISD::FMAXIMUM:

4971 return ISD::FMINIMUM;

4972 case ISD::FMINIMUM:

4973 return ISD::FMAXIMUM;

4974 case ISD::FMAXIMUMNUM:

4975 return ISD::FMINIMUMNUM;

4976 case ISD::FMINIMUMNUM:

4977 return ISD::FMAXIMUMNUM;

4978 case AMDGPUISD::FMAX_LEGACY:

4979 return AMDGPUISD::FMIN_LEGACY;

4980 case AMDGPUISD::FMIN_LEGACY:

4981 return AMDGPUISD::FMAX_LEGACY;

4982 default:

4984 }

4985}

4986

4987

4988

4990

4991

4992

4993

4995

4996

4998 return false;

4999 } else {

5002 return false;

5003 }

5004

5005 return true;

5006}

5007

5011 SDValue N0 = N->getOperand(0);

5012 EVT VT = N->getValueType(0);

5013

5015

5018

5020 switch (Opc) {

5024

5025

5028

5029 if (LHS.getOpcode() != ISD::FNEG)

5030 LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);

5031 else

5033

5034 if (RHS.getOpcode() != ISD::FNEG)

5035 RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5036 else

5038

5041 return SDValue();

5044 return Res;

5045 }

5047 case AMDGPUISD::FMUL_LEGACY: {

5048

5049

5052

5053 if (LHS.getOpcode() == ISD::FNEG)

5055 else if (RHS.getOpcode() == ISD::FNEG)

5057 else

5058 RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5059

5062 return SDValue();

5065 return Res;

5066 }

5069

5072

5073

5077

5078 if (LHS.getOpcode() == ISD::FNEG)

5080 else if (MHS.getOpcode() == ISD::FNEG)

5082 else

5083 MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);

5084

5085 if (RHS.getOpcode() != ISD::FNEG)

5086 RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5087 else

5089

5092 return SDValue();

5095 return Res;

5096 }

5097 case ISD::FMAXNUM:

5098 case ISD::FMINNUM:

5099 case ISD::FMAXNUM_IEEE:

5100 case ISD::FMINNUM_IEEE:

5101 case ISD::FMINIMUM:

5102 case ISD::FMAXIMUM:

5103 case ISD::FMINIMUMNUM:

5104 case ISD::FMAXIMUMNUM:

5105 case AMDGPUISD::FMAX_LEGACY:

5106 case AMDGPUISD::FMIN_LEGACY: {

5107

5108

5109

5110

5111

5114

5115

5116

5119

5120 SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);

5121 SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5123

5125 if (Res.getOpcode() != Opposite)

5126 return SDValue();

5129 return Res;

5130 }

5131 case AMDGPUISD::FMED3: {

5133 for (unsigned I = 0; I < 3; ++I)

5135

5137 if (Res.getOpcode() != AMDGPUISD::FMED3)

5138 return SDValue();

5139

5143

5146 }

5147

5148 return Res;

5149 }

5150 case ISD::FP_EXTEND:

5151 case ISD::FTRUNC:

5152 case ISD::FRINT:

5153 case ISD::FNEARBYINT:

5154 case ISD::FROUNDEVEN:

5155 case ISD::FSIN:

5157 case AMDGPUISD::RCP:

5158 case AMDGPUISD::RCP_LEGACY:

5159 case AMDGPUISD::RCP_IFLAG:

5160 case AMDGPUISD::SIN_HW: {

5162 if (CvtSrc.getOpcode() == ISD::FNEG) {

5163

5164

5166 }

5167

5170

5171

5172

5175 }

5178

5179 if (CvtSrc.getOpcode() == ISD::FNEG) {

5180

5183 }

5184

5187

5188

5191 }

5192 case ISD::FP16_TO_FP: {

5193

5194

5195

5197

5199 EVT SrcVT = Src.getValueType();

5200

5201

5204 return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);

5205 }

5207

5208

5210 }

5211 case ISD::BITCAST: {

5219

5220

5221

5222

5223

5224

5225

5226

5227

5228

5229 SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits);

5230 SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi);

5233

5235 Ops.back() = CastBack;

5239 SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build);

5240

5243 return Result;

5244 }

5245

5248

5249

5250

5251

5252

5257

5258 SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);

5259 SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);

5260

5262 NegRHS);

5263 }

5264

5266 }

5267 default:

5269 }

5270}

5271

5275 SDValue N0 = N->getOperand(0);

5276

5279

5281 case ISD::FP16_TO_FP: {

5282 assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");

5285 EVT SrcVT = Src.getValueType();

5286

5287

5290 return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);

5291 }

5292 default:

5294 }

5295}

5296

5300 if (!CFP)

5302

5303

5304 const APFloat &Val = CFP->getValueAPF();

5307}

5308

5313

5314 switch(N->getOpcode()) {

5315 default:

5316 break;

5317 case ISD::BITCAST: {

5318 EVT DestVT = N->getValueType(0);

5319

5320

5321

5322

5323

5324

5326 SDValue Src = N->getOperand(0);

5330 EVT SrcVT = Src.getValueType();

5332

5335

5340 CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));

5341 }

5342

5344 }

5345 }

5346 }

5347

5349 break;

5350

5351

5352

5353

5354

5355 SDValue Src = N->getOperand(0);

5358 uint64_t CVal = C->getZExtValue();

5362 return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);

5363 }

5364

5366 const APInt &Val = C->getValueAPF().bitcastToAPInt();

5372

5373 return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);

5374 }

5375

5376 break;

5377 }

5381

5382

5383

5384

5385 if (!(N->getValueType(0).isVector() &&

5388 break;

5394 }

5399 case AMDGPUISD::MUL_U24:

5400 case AMDGPUISD::MUL_I24: {

5402 return Simplified;

5403 break;

5404 }

5405 case AMDGPUISD::MULHI_I24:

5406 case AMDGPUISD::MULHI_U24:

5417 case ISD::FNEG:

5419 case ISD::FABS:

5421 case AMDGPUISD::BFE_I32:

5422 case AMDGPUISD::BFE_U32: {

5423 assert(N->getValueType(0).isVector() &&

5424 "Vector handling of BFE not implemented");

5426 if (!Width)

5427 break;

5428

5430 if (WidthVal == 0)

5432

5435 break;

5436

5437 SDValue BitsFrom = N->getOperand(0);

5439

5440 bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;

5441

5442 if (OffsetVal == 0) {

5443

5444 unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);

5445

5447 if (OpSignBits >= SignBits)

5448 return BitsFrom;

5449

5452

5453

5454

5455

5456

5457

5460 }

5461

5463 }

5464

5468 CVal->getSExtValue(),

5469 OffsetVal,

5470 WidthVal,

5471 DL);

5472 }

5473

5475 CVal->getZExtValue(),

5476 OffsetVal,

5477 WidthVal,

5478 DL);

5479 }

5480

5481 if ((OffsetVal + WidthVal) >= 32 &&

5482 !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {

5485 BitsFrom, ShiftVal);

5486 }

5487

5490 OffsetVal,

5491 OffsetVal + WidthVal);

5492

5500 }

5501 }

5502

5503 break;

5504 }

5505 case ISD::LOAD:

5507 case ISD::STORE:

5509 case AMDGPUISD::RCP:

5510 case AMDGPUISD::RCP_IFLAG:

5517 case AMDGPUISD::FMAD_FTZ: {

5518 SDValue N0 = N->getOperand(0);

5519 SDValue N1 = N->getOperand(1);

5520 SDValue N2 = N->getOperand(2);

5521 EVT VT = N->getValueType(0);

5522

5523

5524

5528 if (N0CFP && N1CFP && N2CFP) {

5529 const auto FTZ = [](const APFloat &V) {

5530 if (V.isDenormal()) {

5531 APFloat Zero(V.getSemantics(), 0);

5532 return V.isNegative() ? -Zero : Zero;

5533 }

5534 return V;

5535 };

5536

5541 V0 = FTZ(V0);

5544 }

5545 break;

5546 }

5547 }

5549}

5550

5551

5552

5553

5554

5558 const SDLoc &SL,

5559 bool RawReg) const {

5563

5564 if (MRI.isLiveIn(Reg)) {

5565 VReg = MRI.createVirtualRegister(RC);

5566 MRI.addLiveIn(Reg, VReg);

5567 } else {

5568 VReg = MRI.getLiveInVirtReg(Reg);

5569 }

5570

5571 if (RawReg)

5573

5575}

5576

5577

5578

5584 return I;

5585 }

5586 }

5587

5589}

5590

5593 const SDLoc &SL,

5594 int64_t Offset) const {

5598

5601

5605}

5606

5608 const SDLoc &SL,

5611 int64_t Offset) const {

5615

5617

5619 DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);

5623 return Store;

5624}

5625

5630 assert(Arg && "Attempting to load missing argument");

5631

5635

5637 return V;

5638

5639 unsigned Mask = Arg.getMask();

5644 DAG.getConstant(Mask >> Shift, SL, VT));

5645}

5646

5649 unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();

5650 const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr();

5652 alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;

5653 switch (Param) {

5655 return ArgOffset;

5662 }

5664}

5665

5671

5674 int &RefinementSteps,

5675 bool &UseOneConstNR,

5676 bool Reciprocal) const {

5678

5679 if (VT == MVT::f32) {

5680 RefinementSteps = 0;

5681 return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);

5682 }

5683

5684

5685

5686

5688}

5689

5692 int &RefinementSteps) const {

5694

5695 if (VT == MVT::f32) {

5696

5697

5698

5699

5700

5701 RefinementSteps = 0;

5702 return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);

5703 }

5704

5705

5706

5707

5709}

5710

5712 switch (ID) {

5713 case Intrinsic::amdgcn_workitem_id_x:

5714 return 0;

5715 case Intrinsic::amdgcn_workitem_id_y:

5716 return 1;

5717 case Intrinsic::amdgcn_workitem_id_z:

5718 return 2;

5719 default:

5721 }

5722}

5723

5727

5728 Known.resetAll();

5729

5730 unsigned Opc = Op.getOpcode();

5731

5732 switch (Opc) {

5733 default:

5734 break;

5735 case AMDGPUISD::CARRY:

5736 case AMDGPUISD::BORROW: {

5738 break;

5739 }

5740

5741 case AMDGPUISD::BFE_I32:

5742 case AMDGPUISD::BFE_U32: {

5744 if (!CWidth)

5745 return;

5746

5748

5749 if (Opc == AMDGPUISD::BFE_U32)

5751

5752 break;

5753 }

5754 case AMDGPUISD::FP_TO_FP16: {

5756

5757

5759 break;

5760 }

5761 case AMDGPUISD::MUL_U24:

5762 case AMDGPUISD::MUL_I24: {

5768

5769 if (TrailZ >= 32)

5770 break;

5771

5772

5773 LHSKnown = LHSKnown.trunc(24);

5774 RHSKnown = RHSKnown.trunc(24);

5775

5776 if (Opc == AMDGPUISD::MUL_I24) {

5779 unsigned MaxValBits = LHSValBits + RHSValBits;

5780 if (MaxValBits > 32)

5781 break;

5782 unsigned SignBits = 32 - MaxValBits + 1;

5783 bool LHSNegative = LHSKnown.isNegative();

5786 bool RHSNegative = RHSKnown.isNegative();

5789

5790 if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))

5792 else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))

5794 } else {

5797 unsigned MaxValBits = LHSValBits + RHSValBits;

5798 if (MaxValBits >= 32)

5799 break;

5801 }

5802 break;

5803 }

5804 case AMDGPUISD::PERM: {

5806 if (!CMask)

5807 return;

5808

5812

5813 for (unsigned I = 0; I < 32; I += 8) {

5814 unsigned SelBits = Sel & 0xff;

5815 if (SelBits < 4) {

5816 SelBits *= 8;

5819 } else if (SelBits < 7) {

5820 SelBits = (SelBits & 3) * 8;

5823 } else if (SelBits == 0x0c) {

5824 Known.Zero |= 0xFFull << I;

5825 } else if (SelBits > 0x0c) {

5826 Known.One |= 0xFFull << I;

5827 }

5828 Sel >>= 8;

5829 }

5830 break;

5831 }

5832 case AMDGPUISD::BUFFER_LOAD_UBYTE: {

5834 break;

5835 }

5836 case AMDGPUISD::BUFFER_LOAD_USHORT: {

5838 break;

5839 }

5840 case AMDGPUISD::LDS: {

5842 Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());

5843

5846 break;

5847 }

5848 case AMDGPUISD::SMIN3:

5849 case AMDGPUISD::SMAX3:

5850 case AMDGPUISD::SMED3:

5851 case AMDGPUISD::UMIN3:

5852 case AMDGPUISD::UMAX3:

5853 case AMDGPUISD::UMED3: {

5856 break;

5857

5860 break;

5861

5864 break;

5865

5866

5868 Known.One = Known0.One & Known1.One & Known2.One;

5869 break;

5870 }

5872 unsigned IID = Op.getConstantOperandVal(0);

5873 switch (IID) {

5874 case Intrinsic::amdgcn_workitem_id_x:

5875 case Intrinsic::amdgcn_workitem_id_y:

5876 case Intrinsic::amdgcn_workitem_id_z: {

5877 unsigned MaxValue = Subtarget->getMaxWorkitemID(

5880 break;

5881 }

5882 default:

5883 break;

5884 }

5885 }

5886 }

5887}

5888

5891 unsigned Depth) const {

5892 switch (Op.getOpcode()) {

5893 case AMDGPUISD::BFE_I32: {

5895 if (!Width)

5896 return 1;

5897

5898 unsigned SignBits = 32 - Width->getZExtValue() + 1;

5900 return SignBits;

5901

5902

5904 return std::max(SignBits, Op0SignBits);

5905 }

5906

5907 case AMDGPUISD::BFE_U32: {

5909 return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;

5910 }

5911

5912 case AMDGPUISD::CARRY:

5913 case AMDGPUISD::BORROW:

5914 return 31;

5915 case AMDGPUISD::BUFFER_LOAD_BYTE:

5916 return 25;

5917 case AMDGPUISD::BUFFER_LOAD_SHORT:

5918 return 17;

5919 case AMDGPUISD::BUFFER_LOAD_UBYTE:

5920 return 24;

5921 case AMDGPUISD::BUFFER_LOAD_USHORT:

5922 return 16;

5923 case AMDGPUISD::FP_TO_FP16:

5924 return 16;

5925 case AMDGPUISD::SMIN3:

5926 case AMDGPUISD::SMAX3:

5927 case AMDGPUISD::SMED3:

5928 case AMDGPUISD::UMIN3:

5929 case AMDGPUISD::UMAX3:

5930 case AMDGPUISD::UMED3: {

5932 if (Tmp2 == 1)

5933 return 1;

5934

5936 if (Tmp1 == 1)

5937 return 1;

5938

5940 if (Tmp0 == 1)

5941 return 1;

5942

5943 return std::min({Tmp0, Tmp1, Tmp2});

5944 }

5945 default:

5946 return 1;

5947 }

5948}

5949

5954 if (MI)

5955 return 1;

5956

5957

5958 switch (MI->getOpcode()) {

5959 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:

5960 return 25;

5961 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:

5962 return 17;

5963 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:

5964 return 24;

5965 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:

5966 return 16;

5967 case AMDGPU::G_AMDGPU_SMED3:

5968 case AMDGPU::G_AMDGPU_UMED3: {

5969 auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();

5970 unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);

5971 if (Tmp2 == 1)

5972 return 1;

5973 unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);

5974 if (Tmp1 == 1)

5975 return 1;

5976 unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);

5977 if (Tmp0 == 1)

5978 return 1;

5979 return std::min({Tmp0, Tmp1, Tmp2});

5980 }

5981 default:

5982 return 1;

5983 }

5984}

5985

5988 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {

5989 unsigned Opcode = Op.getOpcode();

5990 switch (Opcode) {

5991 case AMDGPUISD::BFE_I32:

5992 case AMDGPUISD::BFE_U32:

5993 return false;

5994 }

5997}

5998

6001 unsigned Depth) const {

6002 unsigned Opcode = Op.getOpcode();

6003 switch (Opcode) {

6004 case AMDGPUISD::FMIN_LEGACY:

6005 case AMDGPUISD::FMAX_LEGACY: {

6006 if (SNaN)

6007 return true;

6008

6009

6010

6011 return false;

6012 }

6013 case AMDGPUISD::FMUL_LEGACY:

6014 case AMDGPUISD::CVT_PKRTZ_F16_F32: {

6015 if (SNaN)

6016 return true;

6019 }

6020 case AMDGPUISD::FMED3:

6021 case AMDGPUISD::FMIN3:

6022 case AMDGPUISD::FMAX3:

6023 case AMDGPUISD::FMINIMUM3:

6024 case AMDGPUISD::FMAXIMUM3:

6025 case AMDGPUISD::FMAD_FTZ: {

6026 if (SNaN)

6027 return true;

6031 }

6032 case AMDGPUISD::CVT_F32_UBYTE0:

6033 case AMDGPUISD::CVT_F32_UBYTE1:

6034 case AMDGPUISD::CVT_F32_UBYTE2:

6035 case AMDGPUISD::CVT_F32_UBYTE3:

6036 return true;

6037

6038 case AMDGPUISD::RCP:

6039 case AMDGPUISD::RSQ:

6040 case AMDGPUISD::RCP_LEGACY:

6041 case AMDGPUISD::RSQ_CLAMP: {

6042 if (SNaN)

6043 return true;

6044

6045

6046 return false;

6047 }

6048 case ISD::FLDEXP:

6049 case AMDGPUISD::FRACT: {

6050 if (SNaN)

6051 return true;

6053 }

6054 case AMDGPUISD::DIV_SCALE:

6055 case AMDGPUISD::DIV_FMAS:

6056 case AMDGPUISD::DIV_FIXUP:

6057

6058 return SNaN;

6059 case AMDGPUISD::SIN_HW:

6060 case AMDGPUISD::COS_HW: {

6061

6062 return SNaN;

6063 }

6065 unsigned IntrinsicID = Op.getConstantOperandVal(0);

6066

6067 switch (IntrinsicID) {

6068 case Intrinsic::amdgcn_cubeid:

6069 case Intrinsic::amdgcn_cvt_off_f32_i4:

6070 return true;

6071

6072 case Intrinsic::amdgcn_frexp_mant: {

6073 if (SNaN)

6074 return true;

6076 }

6077 case Intrinsic::amdgcn_cvt_pkrtz: {

6078 if (SNaN)

6079 return true;

6082 }

6083 case Intrinsic::amdgcn_rcp:

6084 case Intrinsic::amdgcn_rsq:

6085 case Intrinsic::amdgcn_rcp_legacy:

6086 case Intrinsic::amdgcn_rsq_legacy:

6087 case Intrinsic::amdgcn_rsq_clamp:

6088 case Intrinsic::amdgcn_tanh: {

6089 if (SNaN)

6090 return true;

6091

6092

6093 return false;

6094 }

6095 case Intrinsic::amdgcn_trig_preop:

6096 case Intrinsic::amdgcn_fdot2:

6097

6098 return SNaN;

6099 case Intrinsic::amdgcn_fma_legacy:

6100 if (SNaN)

6101 return true;

6105 default:

6106 return false;

6107 }

6108 }

6109 default:

6110 return false;

6111 }

6112}

6113

6116 return MRI.hasOneNonDBGUse(N0);

6117}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)

static bool isInv2Pi(const APFloat &APF)

static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)

returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...

static unsigned inverseMinMax(unsigned Opc)

static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:2470

static unsigned workitemIntrinsicDim(unsigned ID)

Definition AMDGPUISelLowering.cpp:5711

static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, int64_t Offset)

Definition AMDGPUISelLowering.cpp:5579

static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, const SDLoc &DL)

Definition AMDGPUISelLowering.cpp:3850

static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X, SDValue Y, SDValue C, SDNodeFlags Flags=SDNodeFlags())

Definition AMDGPUISelLowering.cpp:2762

static SDValue getAddOneOp(const SDNode *V)

If V is an add of a constant 1, returns the other operand.

Definition AMDGPUISelLowering.cpp:4511

static LLVM_READONLY bool selectSupportsSourceMods(const SDNode *N)

Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the type for ISD::SELECT.

Definition AMDGPUISelLowering.cpp:730

static cl::opt< bool > AMDGPUBypassSlowDiv("amdgpu-bypass-slow-div", cl::desc("Skip 64-bit divide for dynamic 32-bit values"), cl::init(true))

static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, SDValue N0, SDValue N1, unsigned Size, bool Signed)

Definition AMDGPUISelLowering.cpp:4493

static bool fnegFoldsIntoOp(const SDNode *N)

Definition AMDGPUISelLowering.cpp:701

static bool isI24(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:3791

static bool isCttzOpc(unsigned Opc)

Definition AMDGPUISelLowering.cpp:3202

static bool isU24(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:3787

static SDValue peekFPSignOps(SDValue Val)

Definition AMDGPUISelLowering.cpp:1661

static bool valueIsKnownNeverF32Denorm(SDValue Src)

Return true if it's known that Src can never be an f32 denormal value.

Definition AMDGPUISelLowering.cpp:2632

static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, unsigned Op, const SDLoc &SL, SDValue Cond, SDValue N1, SDValue N2)

Definition AMDGPUISelLowering.cpp:4779

static SDValue peekFNeg(SDValue Val)

Definition AMDGPUISelLowering.cpp:1654

static SDValue simplifyMul24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI)

Definition AMDGPUISelLowering.cpp:3798

static bool isCtlzOpc(unsigned Opc)

Definition AMDGPUISelLowering.cpp:3198

static LLVM_READNONE bool fnegFoldsIntoOpcode(unsigned Opc)

Definition AMDGPUISelLowering.cpp:662

static bool hasVolatileUser(SDNode *Val)

Definition AMDGPUISelLowering.cpp:3865

Interface definition of the TargetLowering class that is common to all AMD GPUs.

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Function Alias Analysis Results

block Block Frequency Analysis

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Provides analysis for querying information about KnownBits during GISel passes.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first DebugLoc that has line number information, given a range of instructions.

const SmallVectorImpl< MachineOperand > & Cond

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)

Definition AMDGPUISelLowering.cpp:1133

static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)

Definition AMDGPUISelLowering.cpp:1161

uint64_t getExplicitKernArgSize() const

static std::optional< uint32_t > getLDSAbsoluteAddress(const GlobalValue &GV)

void recordNumNamedBarriers(uint32_t GVAddr, unsigned BarCnt)

unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)

bool isModuleEntryFunction() const

bool has16BitInsts() const

static const AMDGPUSubtarget & get(const MachineFunction &MF)

static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:56

SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const

Generate Min/Max node.

Definition AMDGPUISelLowering.cpp:1740

unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition AMDGPUISelLowering.cpp:5889

SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4685

EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override

Return the type that should be used to zero or sign extend a zeroext/signext integer return value.

Definition AMDGPUISelLowering.cpp:801

SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const

Split a vector load into 2 loads of half the vector.

Definition AMDGPUISelLowering.cpp:1863

SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1583

SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:3897

void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const

The SelectionDAGBuilder will automatically promote function arguments with illegal types.

Definition AMDGPUISelLowering.cpp:1215

SDValue LowerF64ToF16Safe(SDValue Src, const SDLoc &DL, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3628

SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2579

SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const

Definition AMDGPUISelLowering.cpp:5607

bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AS) const override

Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...

Definition AMDGPUISelLowering.cpp:981

SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1618

void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override

Determine which of the bits specified in Mask are known to be either zero or one and return them in t...

Definition AMDGPUISelLowering.cpp:5724

bool shouldCombineMemoryType(EVT VT) const

Definition AMDGPUISelLowering.cpp:3876

SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const

Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...

Definition AMDGPUISelLowering.cpp:4068

SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const

Definition AMDGPUISelLowering.cpp:1391

SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4007

bool isTruncateFree(EVT Src, EVT Dest) const override

Definition AMDGPUISelLowering.cpp:999

bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override

Definition AMDGPUISelLowering.cpp:987

SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2445

TargetLowering::NegatibleCost getConstantNegateCost(const ConstantFPSDNode *C) const

Definition AMDGPUISelLowering.cpp:4938

SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, bool IsLog10, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2873

bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override

Return true if Op can create undef or poison from non-undef & non-poison operands.

Definition AMDGPUISelLowering.cpp:5986

SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4652

SDValue lowerFEXPUnsafeImpl(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags, bool IsExp10) const

Definition AMDGPUISelLowering.cpp:2963

bool isSDNodeAlwaysUniform(const SDNode *N) const override

Definition AMDGPUISelLowering.cpp:907

bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override

Return true if it is profitable to move this shift by a constant amount through its operand,...

Definition AMDGPUISelLowering.cpp:1087

SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4091

bool isCheapToSpeculateCtlz(Type *Ty) const override

Return true if it is cheap to speculate a call to intrinsic ctlz.

Definition AMDGPUISelLowering.cpp:903

SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2385

bool isFNegFree(EVT VT) const override

Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...

Definition AMDGPUISelLowering.cpp:974

SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2867

SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const

Definition AMDGPUISelLowering.cpp:3422

unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition AMDGPUISelLowering.cpp:5950

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override

This callback is invoked for operations that are unsupported by the target, which are registered to u...

Definition AMDGPUISelLowering.cpp:1437

SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3611

SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const

Definition AMDGPUISelLowering.cpp:1357

bool isConstantCheaperToNegate(SDValue N) const

Definition AMDGPUISelLowering.cpp:4954

bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, Register N1) const override

Definition AMDGPUISelLowering.cpp:6114

bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override

If SNaN is false,.

Definition AMDGPUISelLowering.cpp:5999

static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, SDNodeFlags Flags)

Definition AMDGPUISelLowering.cpp:2660

uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const

Helper function that returns the byte offset of the given type of implicit parameter.

Definition AMDGPUISelLowering.cpp:5666

SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2606

SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4879

SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:5008

SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3716

virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1525

bool isConstantCostlierToNegate(SDValue N) const

Definition AMDGPUISelLowering.cpp:4948

SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const

Definition AMDGPUISelLowering.cpp:5626

SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const

Definition AMDGPUISelLowering.cpp:2001

SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const

Emit approx-funcs appropriate lowering for exp10.

Definition AMDGPUISelLowering.cpp:3013

bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT, std::optional< unsigned > ByteOffset) const override

Return true if it is profitable to reduce a load to a smaller type.

Definition AMDGPUISelLowering.cpp:835

SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3441

bool isCheapToSpeculateCttz(Type *Ty) const override

Return true if it is cheap to speculate a call to intrinsic cttz.

Definition AMDGPUISelLowering.cpp:899

SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4745

SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4191

bool isSelectSupported(SelectSupportKind) const override

Definition AMDGPUISelLowering.cpp:816

bool isZExtFree(Type *Src, Type *Dest) const override

Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...

Definition AMDGPUISelLowering.cpp:1020

SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2912

SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override

This hook must be implemented to lower calls into the specified DAG.

Definition AMDGPUISelLowering.cpp:1422

SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4292

SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3067

SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2669

bool mayIgnoreSignedZero(SDValue Op) const

Definition AMDGPUISelLowering.cpp:646

SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2687

bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const final

Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...

Definition AMDGPUISelLowering.cpp:875

std::pair< SDValue, SDValue > splitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HighVT, SelectionDAG &DAG) const

Split a vector value into two parts of types LoVT and HiVT.

Definition AMDGPUISelLowering.cpp:1828

AMDGPUTargetLowering(const TargetMachine &TM, const TargetSubtargetInfo &STI, const AMDGPUSubtarget &AMDGPUSTI)

Definition AMDGPUISelLowering.cpp:62

SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2768

SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, SDValue N) const

Definition AMDGPUISelLowering.cpp:4802

SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const

Definition AMDGPUISelLowering.cpp:3296

bool isFAbsFree(EVT VT) const override

Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...

Definition AMDGPUISelLowering.cpp:966

SDValue loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const

Similar to CreateLiveInRegister, except value maybe loaded from a stack slot rather than passed in a ...

Definition AMDGPUISelLowering.cpp:5591

SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2727

static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)

Definition AMDGPUISelLowering.cpp:41

SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override

Hooks for building estimates in place of slower divisions and square roots.

Definition AMDGPUISelLowering.cpp:5672

SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4396

SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3487

static SDValue stripBitcast(SDValue Val)

SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const

Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.

Definition AMDGPUISelLowering.cpp:5555

SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const

Split a vector store into 2 stores of half the vector.

Definition AMDGPUISelLowering.cpp:1957

SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3236

SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, NegatibleCost &Cost, unsigned Depth) const override

Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...

Definition AMDGPUISelLowering.cpp:931

std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const

Return 64-bit value Op as two 32-bit integers.

Definition AMDGPUISelLowering.cpp:1779

SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4518

SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override

Return a reciprocal estimate value for the input operand.

Definition AMDGPUISelLowering.cpp:5690

SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2559

SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3760

static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)

Definition AMDGPUISelLowering.cpp:1352

std::pair< SDValue, SDValue > getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Op, SDNodeFlags Flags) const

If denormal handling is required return the scaled input to FLOG2, and the check for denormal range.

Definition AMDGPUISelLowering.cpp:2704

static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)

Selects the correct CCAssignFn for a given CallingConvention value.

Definition AMDGPUISelLowering.cpp:1347

static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)

Definition AMDGPUISelLowering.cpp:775

SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2531

bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override

Returns true if the target can instruction select the specified FP immediate natively.

Definition AMDGPUISelLowering.cpp:822

static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:52

SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2979

SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2485

SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1427

static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags)

Definition AMDGPUISelLowering.cpp:2655

bool ShouldShrinkFPConstant(EVT VT) const override

If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...

Definition AMDGPUISelLowering.cpp:830

SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override

This hook must be implemented to lower outgoing return values, described by the Outs array,...

Definition AMDGPUISelLowering.cpp:1330

SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:3950

void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override

This callback is invoked when a node result type is illegal for the target, and the operation was reg...

Definition AMDGPUISelLowering.cpp:1485

SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:5297

SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1793

SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3206

SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:5272

SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const

Definition AMDGPUISelLowering.cpp:3536

static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc)

Definition AMDGPUISelLowering.cpp:4989

bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override

Return true if it's profitable to narrow operations of type SrcVT to DestVT.

Definition AMDGPUISelLowering.cpp:1042

SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2568

SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4030

SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2330

SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4603

SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override

This method will be invoked for all target nodes and for any target-independent nodes that the target...

Definition AMDGPUISelLowering.cpp:5309

void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const

Definition AMDGPUISelLowering.cpp:2116

SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const

Widen a suitably aligned v3 load.

Definition AMDGPUISelLowering.cpp:1923

std::pair< EVT, EVT > getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const

Split a vector type into two parts.

Definition AMDGPUISelLowering.cpp:1813

SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1801

SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:1671

unsigned getVectorIdxWidth(const DataLayout &) const override

Returns the type to be used for the index operand vector operations.

Definition AMDGPUISelLowering.cpp:812

static const fltSemantics & IEEEsingle()

static const fltSemantics & IEEEdouble()

static constexpr roundingMode rmNearestTiesToEven

static const fltSemantics & IEEEhalf()

bool bitwiseIsEqual(const APFloat &RHS) const

opStatus add(const APFloat &RHS, roundingMode RM)

const fltSemantics & getSemantics() const

opStatus multiply(const APFloat &RHS, roundingMode RM)

static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)

Returns the smallest (by magnitude) normalized finite number in the given semantics.

static APFloat getInf(const fltSemantics &Sem, bool Negative=false)

Factory for Positive and Negative Infinity.

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

void setHighBits(unsigned hiBits)

Set the top hiBits bits.

void setBitsFrom(unsigned loBit)

Set the top bits starting from loBit.

static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)

Get a value with a block of bits set.

bool ule(const APInt &RHS) const

Unsigned less or equal comparison.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

void setLowBits(unsigned loBits)

Set the bottom loBits bits.

This class represents an incoming formal argument to a Function.

CCState - This class holds information needed while lowering arguments and return values.

static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)

const APFloat & getValueAPF() const

bool isNegative() const

Return true if the value is negative.

uint64_t getZExtValue() const

A parsed version of the target data layout string in and methods for querying it.

Diagnostic information for unsupported feature in backend.

const DataLayout & getDataLayout() const

Get the data layout of the module this function belongs to.

iterator_range< arg_iterator > args()

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

Type * getValueType() const

This is an important class for using LLVM in a threaded context.

LLVM_ABI void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

This class is used to represent ISD::LOAD nodes.

const SDValue & getBasePtr() const

static auto integer_fixedlen_vector_valuetypes()

unsigned getVectorNumElements() const

bool isVector() const

Return true if this is a vector value type.

bool isInteger() const

Return true if this is an integer or a vector integer type.

static auto integer_valuetypes()

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

MVT getScalarType() const

If this is a vector, return the element type, otherwise return this.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

int getObjectIndexBegin() const

Return the minimum frame object index.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

DenormalMode getDenormalMode(const fltSemantics &FPType) const

Returns the denormal handling type for the default rounding mode of the function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Representation of each machine instruction.

A description of a memory reference used in the backend.

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOInvariant

The memory access always returns the same value (or traps).

Flags getFlags() const

Return the raw flags of the source value,.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

This is an abstract virtual class for memory operations.

unsigned getAddressSpace() const

Return the address space for the associated pointer.

bool isSimple() const

Returns true if the memory operation is neither atomic or volatile.

MachineMemOperand * getMemOperand() const

Return a MachineMemOperand object describing the memory reference performed by operation.

const SDValue & getChain() const

EVT getMemoryVT() const

Return the type of the in-memory value.

Wrapper class representing virtual and physical registers.

Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...

const DebugLoc & getDebugLoc() const

Represents one node in the SelectionDAG.

ArrayRef< SDUse > ops() const

unsigned getOpcode() const

Return the SelectionDAG opcode value for this node.

bool hasOneUse() const

Return true if there is exactly one use of this node.

SDNodeFlags getFlags() const

SDVTList getVTList() const

const SDValue & getOperand(unsigned Num) const

uint64_t getConstantOperandVal(unsigned Num) const

Helper method returns the integer value of a ConstantSDNode operand.

iterator_range< user_iterator > users()

Represents a use of a SDNode.

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

SDNode * getNode() const

get the SDNode which holds the desired result

bool hasOneUse() const

Return true if there is exactly one node using value ResNo of Node.

SDValue getValue(unsigned R) const

EVT getValueType() const

Return the ValueType of the referenced return value.

TypeSize getValueSizeInBits() const

Returns the size of the value in bits.

const SDValue & getOperand(unsigned i) const

unsigned getOpcode() const

unsigned getNumOperands() const

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

SIModeRegisterDefaults getMode() const

This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...

LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const

Get the upper bound on bit size for this Value Op as a signed integer.

const SDValue & getRoot() const

Return the root tag of the SelectionDAG.

LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)

Create a MERGE_VALUES node from the given operands.

LLVM_ABI SDVTList getVTList(EVT VT)

Return an SDVTList that represents the list of values specified.

LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)

LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)

LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())

Append the extracted elements from Start to Count out of the vector Op in Args.

LLVM_ABI SDValue getFreeze(SDValue V)

Return a freeze using the SDLoc of the value operand.

SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)

Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...

LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)

Create a ConstantFPSDNode wrapping a constant value.

LLVM_ABI SDValue getRegister(Register Reg, EVT VT)

LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)

Loads are not normal binary operators: their result type is not determined by their operands,...

LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)

Create a bitwise NOT operation as (XOR Val, -1).

const TargetLowering & getTargetLoweringInfo() const

SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)

Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).

SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)

Return an ISD::BUILD_VECTOR node.

LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)

Return a bitcast using the SDLoc of the value operand, and casting to the provided type.

SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)

SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())

Helper function to make it easier to build Select's if you just have operands and don't want to check...

LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)

Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.

const DataLayout & getDataLayout() const

LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

Create a ConstantSDNode wrapping a constant value.

LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)

Modify anything using 'From' to use 'To' instead.

LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Helper function to build ISD::STORE nodes.

LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)

Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...

bool isConstantValueOfAnyType(SDValue N) const

SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())

Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...

LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...

LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

LLVM_ABI SDValue getValueType(EVT)

LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)

Gets or creates the specified node.

LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const

Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...

SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const

Return the number of times the sign bit of the register is replicated into the other bits.

LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)

Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.

MachineFunction & getMachineFunction() const

SDValue getPOISON(EVT VT)

Return a POISON node. POISON does not have a useful SDLoc.

LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)

LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const

Determine which bits of Op are known to be either zero or one and return them in Known.

LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...

LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const

Return true if 'Op & Mask' is known to be zero.

SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)

Create an add instruction with appropriate flags when used for addressing some offset of an object.

LLVMContext * getContext() const

const SDValue & setRoot(SDValue N)

Set the current root tag of the SelectionDAG.

LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)

Mutate the specified node in-place to have the specified operands.

SDValue getEntryNode() const

Return the token chain corresponding to the entry of the function.

LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)

Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This class is used to represent ISD::STORE nodes.

const SDValue & getBasePtr() const

const SDValue & getValue() const

StringRef - Represent a constant reference to a string, i.e.

void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)

Indicate that the specified operation does not work with the specified type and indicate what to do a...

void setMaxDivRemBitWidthSupported(unsigned SizeInBits)

Set the size in bits of the maximum div/rem the backend supports.

bool PredictableSelectIsExpensive

Tells the code generator that select is more expensive than a branch if the branch is usually predict...

virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const

Return true if it is profitable to reduce a load to a smaller type.

unsigned MaxStoresPerMemcpyOptSize

Likewise for functions with the OptSize attribute.

const TargetMachine & getTargetMachine() const

virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const

Certain targets require unusual breakdowns of certain types.

unsigned MaxGluedStoresPerMemcpy

Specify max number of store instructions to glue in inlined memcpy.

virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const

Certain combinations of ABIs, Targets and features require that types are legal for some operations a...

void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)

Tells the code generator which bitwidths to bypass.

void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits)

Set the size in bits of the maximum fp to/from int conversion the backend supports.

void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)

Set the maximum atomic operation size supported by the backend.

SelectSupportKind

Enum that describes what type of support for selects the target has.

virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const

Determine if the target supports unaligned memory accesses.

unsigned MaxStoresPerMemsetOptSize

Likewise for functions with the OptSize attribute.

EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const

Returns the type for the shift amount of a shift opcode.

unsigned MaxStoresPerMemmove

Specify maximum number of store instructions per memmove call.

virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const

Return the ValueType of the result of SETCC operations.

virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const

For types supported by the target, this is an identity function.

unsigned MaxStoresPerMemmoveOptSize

Likewise for functions with the OptSize attribute.

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

void setSupportsUnalignedAtomics(bool UnalignedSupported)

Sets whether unaligned atomic operations are supported.

bool isOperationLegal(unsigned Op, EVT VT) const

Return true if the specified operation is legal on this target.

unsigned MaxStoresPerMemset

Specify maximum number of store instructions per memset call.

void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)

Indicate that the specified truncating store does not work with the specified type and indicate what ...

void setMinCmpXchgSizeInBits(unsigned SizeInBits)

Sets the minimum cmpxchg or ll/sc size supported by the backend.

void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)

If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...

void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)

Targets should invoke this method for each target independent node that they want to provide a custom...

void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)

Indicate that the specified load with extension does not work with the specified type and indicate wh...

unsigned GatherAllAliasesMaxDepth

Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...

NegatibleCost

Enum that specifies when a float negation is beneficial.

bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const

This function returns true if the memory access is aligned or if the target allows this specific unal...

unsigned MaxStoresPerMemcpy

Specify maximum number of store instructions per memcpy call.

void setSchedulingPreference(Sched::Preference Pref)

Specify the target scheduling preference.

void setJumpIsExpensive(bool isExpensive=true)

Tells the code generator not to expand logic operations on comparison predicates into separate sequen...

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const

SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const

More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...

SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const

Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.

bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const

Check to see if the specified operand of the specified instruction is a constant integer.

std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const

Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.

virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const

Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...

std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const

Turn load of vector type into a load of the individual elements.

bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const

Look at Op.

TargetLowering(const TargetLowering &)=delete

virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const

Return true if Op can create undef or poison from non-undef & non-poison operands.

Primary interface to the complete machine description for the target machine.

TargetSubtargetInfo - Generic base class for all target subtargets.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

LLVM Value Representation.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

bool isIntrinsicAlwaysUniform(unsigned IntrID)

TargetExtType * isNamedBarrier(const GlobalVariable &GV)

bool isUniformMMO(const MachineMemOperand *MMO)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_ChainPreserve

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ Cold

Attempts to make code in the caller as efficient as possible under the assumption that the call is no...

@ SPIR_KERNEL

Used for SPIR kernel functions.

@ Fast

Attempts to make calls as fast as possible (e.g.

@ AMDGPU_ES

Used for AMDPAL shader stage before geometry shader if geometry is in use.

@ AMDGPU_LS

Used for AMDPAL vertex shader if tessellation is in use.

@ C

The default llvm calling convention, compatible with C.

NodeType

ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.

@ SETCC

SetCC operator - This evaluates to a true value iff the condition is true.

@ SMUL_LOHI

SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...

@ INSERT_SUBVECTOR

INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.

@ BSWAP

Byte Swap and Counting operators.

@ ADDC

Carry-setting nodes for multiple precision addition and subtraction.

@ FMAD

FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.

@ ADD

Simple integer binary arithmetic operators.

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ CONCAT_VECTORS

CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...

@ FADD

Simple binary floating point operators.

@ SDIVREM

SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.

@ BUILD_PAIR

BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.

@ SIGN_EXTEND

Conversion operators.

@ CTTZ_ZERO_UNDEF

Bit counting operators with an undefined result for zero inputs.

@ FCANONICALIZE

Returns platform specific canonical encoding of a floating point number.

@ IS_FPCLASS

Performs a check of floating point class property, defined by IEEE-754.

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

@ EXTRACT_ELEMENT

EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...

@ MULHU

MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...

@ SHL

Shift and rotation operations.

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

@ EXTRACT_SUBVECTOR

EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.

@ EntryToken

EntryToken - This is the marker used to indicate the start of a region.

@ EXTRACT_VECTOR_ELT

EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...

@ CopyToReg

CopyToReg - This node has three operands: a chain, a register number to set to this value,...

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ SELECT_CC

Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...

@ SIGN_EXTEND_INREG

SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...

@ SMIN

[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.

@ VSELECT

Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...

@ UADDO_CARRY

Carry-using nodes for multiple precision addition and subtraction.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ INTRINSIC_WO_CHAIN

RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...

@ ADDE

Carry-using nodes for multiple precision addition and subtraction.

@ INSERT_VECTOR_ELT

INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.

@ TokenFactor

TokenFactor - This node takes multiple tokens as input and produces a single token result.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

@ AssertSext

AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...

@ FCOPYSIGN

FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.

@ INTRINSIC_W_CHAIN

RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...

@ BUILD_VECTOR

BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...

bool isNormalStore(const SDNode *N)

Returns true if the specified node is a non-truncating and unindexed store.

CondCode

ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...

LoadExtType

LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).

bool isNormalLoad(const SDNode *N)

Returns true if the specified node is a non-extending and unindexed load.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

MaybeAlign getAlign(const CallInst &I, unsigned Index)

LLVM_ABI bool isNullConstant(SDValue V)

Returns true if V is a constant integer zero.

void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())

ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

CCAssignFn - This function assigns a location for Val, updating State to reflect the change.

LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)

Returns the SDNode if it is a constant splat BuildVector or constant float.

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

int countl_zero(T Val)

Count number of 0's from the most significant bit to the least stopping at the first 1.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

To bit_cast(const From &from) noexcept

@ Mul

Product of integers.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)

Returns the SDNode if it is a constant splat BuildVector or constant int.

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool isOneConstant(SDValue V)

Returns true if V is a constant integer one.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

APFloat neg(APFloat X)

Returns the negated value of the argument.

unsigned Log2(Align A)

Returns the log2 of the alignment.

static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))

LLVM_ABI bool isAllOnesConstant(SDValue V)

Returns true if V is an integer constant with all bits set.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

MCRegister getRegister() const

unsigned getStackOffset() const

DenormalModeKind Input

Denormal treatment kind for floating point instruction inputs in the default floating-point environme...

@ PreserveSign

The sign of a flushed-to-zero number is preserved in the sign of 0.

static constexpr DenormalMode getPreserveSign()

TypeSize getStoreSize() const

Return the number of bytes overwritten by a store of the specified value type.

EVT getPow2VectorType(LLVMContext &Context) const

Widens the length of the given vector EVT up to the nearest power of 2 and returns that type.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)

Returns the EVT that represents a vector NumElements in length, where each element is of type VT.

EVT changeTypeToInteger() const

Return the type converted to an equivalently sized integer or vector with integer element type.

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

bool isByteSized() const

Return true if the bit size is a multiple of 8.

uint64_t getScalarSizeInBits() const

EVT getHalfSizedIntegerVT(LLVMContext &Context) const

Finds the smallest simple value type that is greater than or equal to half the width of this EVT.

bool isPow2VectorType() const

Returns true if the given vector is a power of 2.

TypeSize getStoreSizeInBits() const

Return the number of bits overwritten by a store of the specified value type.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)

Returns the EVT that represents an integer with the given number of bits.

uint64_t getFixedSizeInBits() const

Return the size of the specified fixed width value type in bits.

EVT getRoundIntegerType(LLVMContext &Context) const

Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...

bool isVector() const

Return true if this is a vector value type.

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

bool bitsGE(EVT VT) const

Return true if this has no less bits than VT.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

bool isExtended() const

Test if the given EVT is extended (as opposed to being simple).

EVT changeVectorElementType(EVT EltVT) const

Return a VT for a vector type whose attributes match ourselves with the exception of the element type...

LLVM_ABI const fltSemantics & getFltSemantics() const

Returns an APFloat semantics tag appropriate for the value type.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

bool bitsLE(EVT VT) const

Return true if this has no more bits than VT.

bool isInteger() const

Return true if this is an integer or a vector integer type.

InputArg - This struct carries flags and type information about a single incoming (formal) argument o...

MVT VT

Legalized type of this argument part.

bool isNonNegative() const

Returns true if this value is known to be non-negative.

unsigned countMinTrailingZeros() const

Returns the minimum number of trailing zero bits.

bool isUnknown() const

Returns true if we don't know any bits.

KnownBits trunc(unsigned BitWidth) const

Return known bits for a truncation of the value we're tracking.

unsigned getBitWidth() const

Get the bit width of this value.

void resetAll()

Resets the known state of all bits.

unsigned countMaxActiveBits() const

Returns the maximum number of bits needed to represent all possible unsigned values with these known ...

unsigned countMinLeadingZeros() const

Returns the minimum number of leading zero bits.

APInt getMaxValue() const

Return the maximal unsigned value possible given these KnownBits.

APInt getMinValue() const

Return the minimal unsigned value possible given these KnownBits.

bool isStrictlyPositive() const

Returns true if this value is known to be positive.

bool isNegative() const

Returns true if this value is known to be negative.

unsigned countMaxSignificantBits() const

Returns the maximum number of bits needed to represent all possible signed values with these known bi...

This class contains a discriminated union of information about pointers in memory operands,...

LLVM_ABI bool isDereferenceable(unsigned Size, LLVMContext &C, const DataLayout &DL) const

Return true if memory region [V, V+Offset+Size) is known to be dereferenceable.

static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)

Stack pointer relative access.

MachinePointerInfo getWithOffset(int64_t O) const

These are IR-level optimization flags that may be propagated to SDNodes.

void setAllowContract(bool b)

This represents a list of ValueType's that has been intern'd by a SelectionDAG.

DenormalMode FP32Denormals

If this is set, neither input or output denormals are flushed for most f32 instructions.

This structure contains all information that is necessary for lowering calls.

SmallVector< ISD::InputArg, 32 > Ins

bool isBeforeLegalizeOps() const

CombineLevel getDAGCombineLevel()

LLVM_ABI void AddToWorklist(SDNode *N)

bool isCalledByLegalizer() const

bool isBeforeLegalize() const

LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)

LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)

A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...