AMDGPUISelLowering.cpp Source File (original) (raw)

26#include "llvm/IR/IntrinsicsAMDGPU.h"

31using namespace llvm;

33#include "AMDGPUGenCallingConv.inc"

36 "amdgpu-bypass-slow-div",

37 cl::desc("Skip 64-bit divide for dynamic 32-bit values"),

43 if (StoreSize <= 32)

46 if (StoreSize % 32 == 0)

49 return VT;

50}

100

103

106

109

112

115

118

121

124

127

130

133

136

139

142

145

148

151

154

157

158

161

164

167

170

173

176

179

182

183

184

188

190 if (VT == MVT::i64)

191 continue;

192

198 }

199 }

200

202 for (auto MemVT :

203 {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})

206

221

228

241

244

247

250

253

256

259

262

265

268

271

274

277

280

283

286

289

292

295

298

301

304

307

310

313

316

319

322

327

332

347

351

355

357

365

371

375

379

383

387

395

398

400

401

402

404

405

406

408 ISD::FROUNDEVEN, ISD::FTRUNC},

409 {MVT::f16, MVT::f32}, Legal);

411

415 {MVT::f16, MVT::f32, MVT::f64}, Expand);

416

418 {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32,

420

422

424

425 setOperationAction({ISD::LRINT, ISD::LLRINT}, {MVT::f16, MVT::f32, MVT::f64},

427

429

430 if (Subtarget->has16BitInsts()) {

433 } else {

436 }

437

438 setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16,

440

442 if (Subtarget->has16BitInsts()) {

444 }

445

446

447

448

450 {MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,

451 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,

452 MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64,

453 MVT::v16f64},

455

458 {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16},

460

461

463

465 {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,

466 MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,

467 MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,

468 MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,

469 MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},

471

474 {MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, MVT::v4f32,

475 MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, MVT::v6i32,

476 MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, MVT::v9f32,

477 MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32, MVT::v11f32,

478 MVT::v12i32, MVT::v12f32, MVT::v16i32, MVT::v32f32, MVT::v32i32,

479 MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64,

480 MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64},

482

485

486 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };

487 for (MVT VT : ScalarIntVTs) {

488

491

492

494

495

497

499

500

502 }

503

504

506

508

510

516

519

523

524 for (auto VT : {MVT::i8, MVT::i16})

526

528 MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,

529 MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};

530

531 for (MVT VT : VectorIntTypes) {

532

546 }

547

549 MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,

550 MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};

551

552 for (MVT VT : FloatVectorTypes) {

554 {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM,

555 ISD::FADD, ISD::FCEIL, ISD::FCOS,

556 ISD::FDIV, ISD::FEXP2, ISD::FEXP,

557 ISD::FEXP10, ISD::FLOG2, ISD::FREM,

558 ISD::FLOG, ISD::FLOG10, ISD::FPOW,

559 ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL,

560 ISD::FMA, ISD::FRINT, ISD::FNEARBYINT,

561 ISD::FSQRT, ISD::FSIN, ISD::FSUB,

566 }

567

568

569

570

573

576

579

582

585

588

591

594

597

600

603

606

608

609

610

611

612

613

614

615

616

618

619

620

624

625

628

639

643}

644

647 return true;

648

649 const auto Flags = Op.getNode()->getFlags();

650 if (Flags.hasNoSignedZeros())

651 return true;

652

653 return false;

654}

655

656

657

658

659

662 switch (Opc) {

668 case ISD::FMINNUM:

669 case ISD::FMAXNUM:

670 case ISD::FMINNUM_IEEE:

671 case ISD::FMAXNUM_IEEE:

672 case ISD::FMINIMUM:

673 case ISD::FMAXIMUM:

674 case ISD::FMINIMUMNUM:

675 case ISD::FMAXIMUMNUM:

677 case ISD::FSIN:

678 case ISD::FTRUNC:

679 case ISD::FRINT:

680 case ISD::FNEARBYINT:

681 case ISD::FROUNDEVEN:

683 case AMDGPUISD::RCP:

684 case AMDGPUISD::RCP_LEGACY:

685 case AMDGPUISD::RCP_IFLAG:

686 case AMDGPUISD::SIN_HW:

687 case AMDGPUISD::FMUL_LEGACY:

688 case AMDGPUISD::FMIN_LEGACY:

689 case AMDGPUISD::FMAX_LEGACY:

690 case AMDGPUISD::FMED3:

691

692 return true;

693 case ISD::BITCAST:

695 default:

696 return false;

697 }

698}

699

701 unsigned Opc = N->getOpcode();

702 if (Opc == ISD::BITCAST) {

703

704

705 SDValue BCSrc = N->getOperand(0);

709 }

710

712 }

713

715}

716

717

718

719

722 return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) ||

723 VT == MVT::f64;

724}

725

726

727

730

731 return N->getValueType(0) == MVT::f32;

732}

733

734

735

739 return false;

740

741 switch (N->getOpcode()) {

745 case ISD::INLINEASM:

746 case ISD::INLINEASM_BR:

747 case AMDGPUISD::DIV_SCALE:

749

750

751

752

753 case ISD::BITCAST:

754 return false;

756 switch (N->getConstantOperandVal(0)) {

757 case Intrinsic::amdgcn_interp_p1:

758 case Intrinsic::amdgcn_interp_p2:

759 case Intrinsic::amdgcn_interp_mov:

760 case Intrinsic::amdgcn_interp_p1_f16:

761 case Intrinsic::amdgcn_interp_p2_f16:

762 return false;

763 default:

764 return true;

765 }

766 }

769 default:

770 return true;

771 }

772}

773

776

777

778

779

780

781 unsigned NumMayIncreaseSize = 0;

782 MVT VT = N->getValueType(0).getScalarType().getSimpleVT();

783

785

786

787 for (const SDNode *U : N->users()) {

789 return false;

790

793 return false;

794 }

795 }

796

797 return true;

798}

799

803

804

806 if (Size <= 32)

807 return MVT::i32;

809}

810

814

818

819

820

822 bool ForCodeSize) const {

824 return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||

825 (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));

826}

827

828

831 return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);

832}

833

836 std::optional ByteOffset) const {

837

839 return false;

840

842

843

844

845 if (NewSize >= 32)

846 return true;

847

848 EVT OldVT = N->getValueType(0);

850

853

854

855

856 if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) &&

862 return false;

863

864

865

866

867

868

869

870

871 return (OldSize < 32);

872}

873

877

879

881 return false;

882

885

886 if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))

887 return false;

888

889 unsigned Fast = 0;

891 CastTy, MMO, &Fast) &&

893}

894

895

896

897

901

905

907 switch (N->getOpcode()) {

910 return true;

912 unsigned IntrID = N->getConstantOperandVal(0);

914 }

916 unsigned IntrID = N->getConstantOperandVal(1);

918 }

919 case ISD::LOAD:

922 return true;

923 return false;

924 case AMDGPUISD::SETCC:

925 return true;

926 }

927 return false;

928}

929

933

934 switch (Op.getOpcode()) {

937

940 break;

941 }

942 case AMDGPUISD::RCP: {

944 EVT VT = Op.getValueType();

946

949 if (NegSrc)

950 return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags());

952 }

953 default:

954 break;

955 }

956

959}

960

961

962

963

964

967

968

969 return VT == MVT::f32 || VT == MVT::f64 ||

970 (Subtarget->has16BitInsts() && (VT == MVT::f16 || VT == MVT::bf16));

971}

972

975

977 return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16 || VT == MVT::bf16;

978}

979

981 unsigned NumElem,

982 unsigned AS) const {

983 return true;

984}

985

987

988

989

990

991

992

993

994

995 return true;

996}

997

999

1000

1001 unsigned SrcSize = Source.getSizeInBits();

1003

1004 return DestSize < SrcSize && DestSize % 32 == 0 ;

1005}

1006

1008

1009

1010 unsigned SrcSize = Source->getScalarSizeInBits();

1012

1013 if (DestSize== 16 && Subtarget->has16BitInsts())

1014 return SrcSize >= 32;

1015

1016 return DestSize < SrcSize && DestSize % 32 == 0;

1017}

1018

1020 unsigned SrcSize = Src->getScalarSizeInBits();

1022

1023 if (SrcSize == 16 && Subtarget->has16BitInsts())

1024 return DestSize >= 32;

1025

1026 return SrcSize == 32 && DestSize == 64;

1027}

1028

1030

1031

1032

1033

1034

1035 if (Src == MVT::i16)

1036 return Dest == MVT::i32 ||Dest == MVT::i64 ;

1037

1038 return Src == MVT::i32 && Dest == MVT::i64;

1039}

1040

1042 EVT DestVT) const {

1043 switch (N->getOpcode()) {

1059 if (Subtarget->has16BitInsts() &&

1060 (!DestVT.isVector() || !Subtarget->hasVOP3PInsts())) {

1061

1062 if (->isDivergent() && DestVT.isInteger() &&

1066 return false;

1067 }

1068 }

1069 return true;

1070 default:

1071 break;

1072 }

1073

1074

1075

1076

1077

1078

1079

1082

1083 return true;

1084}

1085

1090 "Expected shift op");

1091

1092 SDValue ShiftLHS = N->getOperand(0);

1094 return false;

1095

1098 return false;

1099

1100

1101

1103 N->getOpcode() != ISD::SHL || N->getOperand(0).getOpcode() != ISD::OR)

1104 return true;

1105

1106

1107 if (N->getValueType(0) == MVT::i32 && N->hasOneUse() &&

1108 (N->user_begin()->getOpcode() == ISD::SRA ||

1109 N->user_begin()->getOpcode() == ISD::SRL))

1110 return false;

1111

1112

1113 auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) {

1114 if (LHS.getOpcode() != ISD::SHL)

1115 return false;

1119 return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD &&

1120 LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&

1122 };

1123 SDValue LHS = N->getOperand(0).getOperand(0);

1124 SDValue RHS = N->getOperand(0).getOperand(1);

1125 return !(IsShiftAndLoad(LHS, RHS) || IsShiftAndLoad(RHS, LHS));

1126}

1127

1128

1129

1130

1131

1133 bool IsVarArg) {

1134 switch (CC) {

1142 return CC_AMDGPU;

1145 return CC_AMDGPU_CS_CHAIN;

1149 return CC_AMDGPU_Func;

1152 return CC_SI_Gfx;

1155 default:

1157 }

1158}

1159

1161 bool IsVarArg) {

1162 switch (CC) {

1175 return RetCC_SI_Shader;

1178 return RetCC_SI_Gfx;

1182 return RetCC_AMDGPU_Func;

1183 default:

1185 }

1186}

1187

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200

1201

1202

1203

1204

1205

1206

1207

1208

1209

1210

1211

1212

1213

1221 const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();

1223

1225 uint64_t ExplicitArgOffset = 0;

1227

1228 unsigned InIndex = 0;

1229

1231 const bool IsByRef = Arg.hasByRefAttr();

1232 Type *BaseArgTy = Arg.getType();

1233 Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;

1234 Align Alignment = DL.getValueOrABITypeAlignment(

1235 IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);

1236 MaxAlign = std::max(Alignment, MaxAlign);

1237 uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);

1238

1239 uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;

1240 ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;

1241

1242

1243

1244

1245

1246

1247

1248

1251 ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, nullptr,

1252 &Offsets, ArgOffset);

1253

1254 for (unsigned Value = 0, NumValues = ValueVTs.size();

1257

1258 EVT ArgVT = ValueVTs[Value];

1259 EVT MemVT = ArgVT;

1262

1263 if (NumRegs == 1) {

1264

1266

1267

1268 MemVT = RegisterVT;

1269 } else {

1270 MemVT = ArgVT;

1271 }

1275

1276

1277

1278 MemVT = RegisterVT;

1279 } else if (ArgVT.isVector() &&

1281

1282

1285

1286 MemVT = RegisterVT;

1287 } else {

1292 } else if (RegisterVT.isVector()) {

1295 assert(MemoryBits % NumElements == 0);

1296

1297

1299 MemoryBits / NumElements);

1300 MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);

1301 } else {

1303 }

1304 }

1305

1306

1309

1310

1315 }

1316

1317 unsigned PartOffset = 0;

1318 for (unsigned i = 0; i != NumRegs; ++i) {

1320 BasePartOffset + PartOffset,

1324 }

1325 }

1326 }

1327}

1328

1331 bool isVarArg,

1335

1336

1337

1338 return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);

1339}

1340

1341

1342

1343

1344

1345

1347 bool IsVarArg) {

1349}

1350

1352 bool IsVarArg) {

1354}

1355

1359 int ClobberedFI) const {

1362 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;

1363

1364

1365

1366

1368

1369

1373 if (FI->getIndex() < 0) {

1374 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());

1375 int64_t InLastByte = InFirstByte;

1376 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;

1377

1378 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||

1379 (FirstByte <= InFirstByte && InFirstByte <= LastByte))

1381 }

1382 }

1383 }

1384 }

1385

1386

1388}

1389

1395

1397

1398 StringRef FuncName("");

1399

1401 FuncName = G->getSymbol();

1403 FuncName = G->getGlobal()->getName();

1404

1407

1411 }

1412

1413

1415 return CLI.Chain;

1416

1419}

1420

1425

1435

1438 switch (Op.getOpcode()) {

1439 default:

1440 Op->print(errs(), &DAG);

1442 "instruction is not implemented yet!");

1443 break;

1450 case ISD::FCEIL: return LowerFCEIL(Op, DAG);

1452 case ISD::FRINT: return LowerFRINT(Op, DAG);

1454 case ISD::FROUNDEVEN:

1458 case ISD::FLOG2:

1460 case ISD::FLOG:

1461 case ISD::FLOG10:

1463 case ISD::FEXP:

1464 case ISD::FEXP10:

1466 case ISD::FEXP2:

1480 }

1481 return Op;

1482}

1483

1487 switch (N->getOpcode()) {

1489

1490

1491

1492

1493

1494

1495 return;

1496 case ISD::FLOG2:

1498 Results.push_back(Lowered);

1499 return;

1500 case ISD::FLOG:

1501 case ISD::FLOG10:

1503 Results.push_back(Lowered);

1504 return;

1505 case ISD::FEXP2:

1507 Results.push_back(Lowered);

1508 return;

1509 case ISD::FEXP:

1510 case ISD::FEXP10:

1512 Results.push_back(Lowered);

1513 return;

1517 Results.push_back(Lowered);

1518 return;

1519 default:

1520 return;

1521 }

1522}

1523

1527

1531

1534 if (std::optional<uint32_t> Address =

1536 if (IsNamedBarrier) {

1537 unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;

1539 }

1541 } else if (IsNamedBarrier) {

1542 llvm_unreachable("named barrier should have an assigned address");

1543 }

1544 }

1545

1549 GV->getName() != "llvm.amdgcn.module.lds" &&

1554 Fn, "local memory global used by non-kernel function",

1556

1557

1558

1559

1560

1561

1565 DAG.setRoot(OutputChain);

1566 return DAG.getPOISON(Op.getValueType());

1567 }

1568

1569

1570 assert(G->getOffset() == 0 &&

1571 "Do not know what to do with an non-zero offset");

1572

1573

1574

1575

1578 }

1580}

1581

1586

1587 EVT VT = Op.getValueType();

1589 unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits();

1590 if (OpBitSize >= 32 && OpBitSize % 32 == 0) {

1591 unsigned NewNumElt = OpBitSize / 32;

1592 EVT NewEltVT = (NewNumElt == 1) ? MVT::i32

1594 MVT::i32, NewNumElt);

1595 for (const SDUse &U : Op->ops()) {

1597 SDValue NewIn = DAG.getNode(ISD::BITCAST, SL, NewEltVT, In);

1598 if (NewNumElt > 1)

1600 else

1601 Args.push_back(NewIn);

1602 }

1603

1605 NewNumElt * Op.getNumOperands());

1607 return DAG.getNode(ISD::BITCAST, SL, VT, BV);

1608 }

1609 }

1610

1611 for (const SDUse &U : Op->ops())

1613

1615}

1616

1621 unsigned Start = Op.getConstantOperandVal(1);

1622 EVT VT = Op.getValueType();

1623 EVT SrcVT = Op.getOperand(0).getValueType();

1624

1628 assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types");

1629

1630

1632 EVT NewVT = NumElt == 2

1633 ? MVT::i32

1635 SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0));

1636

1638 if (NumElt == 2)

1639 Tmp = Args[0];

1640 else

1642

1643 return DAG.getNode(ISD::BITCAST, SL, VT, Tmp);

1644 }

1645

1648

1650}

1651

1652

1654 if (Val.getOpcode() == ISD::FNEG)

1656

1657 return Val;

1658}

1659

1661 if (Val.getOpcode() == ISD::FNEG)

1663 if (Val.getOpcode() == ISD::FABS)

1667 return Val;

1668}

1669

1675 switch (CCOpcode) {

1688 break;

1691 if (LHS == True)

1692 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);

1693 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);

1694 }

1699

1700

1701

1702

1706

1707

1708

1709

1710 if (LHS == True)

1711 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);

1712 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);

1713 }

1716 if (LHS == True)

1717 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);

1718 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);

1719 }

1727

1728 if (LHS == True)

1729 return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);

1730 return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);

1731 }

1734 }

1736}

1737

1738

1744 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))

1746

1748

1749

1750

1751

1755

1756

1757

1758

1759

1760

1761

1762

1763 if (LHS == NegTrue && CFalse && CRHS) {

1768 if (Combined)

1769 return DAG.getNode(ISD::FNEG, DL, VT, Combined);

1771 }

1772 }

1773

1775}

1776

1777std::pair<SDValue, SDValue>

1780

1781 SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);

1782

1785

1788

1789 return std::pair(Lo, Hi);

1790}

1791

1799

1807

1808

1809

1810

1811std::pair<EVT, EVT>

1813 EVT LoVT, HiVT;

1816 unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);

1818 HiVT = NumElts - LoNumElts == 1

1819 ? EltVT

1821 return std::pair(LoVT, HiVT);

1822}

1823

1824

1825

1826std::pair<SDValue, SDValue>

1828 const EVT &LoVT, const EVT &HiVT,

1830 EVT VT = N.getValueType();

1834 "More vector elements requested than available!");

1837

1839

1843

1844

1847 return {Lo, Hi};

1848 }

1849

1852 HiNumElts);

1854 return {Lo, Hi};

1855 }

1856

1859 return {Lo, Hi};

1860}

1861

1865 EVT VT = Op.getValueType();

1867

1868

1869

1870

1875 }

1876

1877 SDValue BasePtr = Load->getBasePtr();

1878 EVT MemVT = Load->getMemoryVT();

1879

1880 const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();

1881

1882 EVT LoVT, HiVT;

1883 EVT LoMemVT, HiMemVT;

1885

1887 std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);

1889

1891 Align BaseAlign = Load->getAlign();

1893

1895 Load->getExtensionType(), SL, LoVT, Load->getChain(), BasePtr, SrcValue,

1896 LoMemVT, BaseAlign, Load->getMemOperand()->getFlags(), Load->getAAInfo());

1899 Load->getExtensionType(), SL, HiVT, Load->getChain(), HiPtr,

1901 Load->getMemOperand()->getFlags(), Load->getAAInfo());

1902

1904 if (LoVT == HiVT) {

1905

1907 } else {

1912 VT, Join, HiLoad,

1914 }

1915

1918

1920}

1921

1925 EVT VT = Op.getValueType();

1926 SDValue BasePtr = Load->getBasePtr();

1927 EVT MemVT = Load->getMemoryVT();

1929 const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();

1930 Align BaseAlign = Load->getAlign();

1932

1933

1934

1935 if (NumElements != 3 ||

1936 (BaseAlign < Align(8) &&

1939

1940 assert(NumElements == 3);

1941

1942 EVT WideVT =

1944 EVT WideMemVT =

1947 Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,

1948 WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());

1953 SL);

1954}

1955

1959 SDValue Val = Store->getValue();

1961

1962

1963

1966

1967 EVT MemVT = Store->getMemoryVT();

1968 SDValue Chain = Store->getChain();

1969 SDValue BasePtr = Store->getBasePtr();

1971

1972 EVT LoVT, HiVT;

1973 EVT LoMemVT, HiMemVT;

1975

1977 std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);

1978 std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);

1979

1981

1982 const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();

1983 Align BaseAlign = Store->getAlign();

1986

1988 DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,

1989 Store->getMemOperand()->getFlags(), Store->getAAInfo());

1992 Store->getMemOperand()->getFlags(), Store->getAAInfo());

1993

1995}

1996

1997

1998

1999

2001 bool Sign) const {

2003 EVT VT = Op.getValueType();

2006 MVT IntVT = MVT::i32;

2007 MVT FltVT = MVT::f32;

2008

2010 if (LHSSignBits < 9)

2012

2014 if (RHSSignBits < 9)

2016

2018 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);

2019 unsigned DivBits = BitSize - SignBits;

2020 if (Sign)

2021 ++DivBits;

2022

2025

2027

2028 if (Sign) {

2029

2031

2032

2035

2036

2038 }

2039

2040

2042

2043

2045

2046

2048

2049

2051

2053 fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));

2054

2055

2056 fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);

2057

2058

2060

2062

2063 bool UseFmadFtz = false;

2064 if (Subtarget->isGCN()) {

2066 UseFmadFtz =

2068 }

2069

2070

2071 unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA

2072 : UseFmadFtz ? (unsigned)AMDGPUISD::FMAD_FTZ

2075

2076

2078

2079

2080 fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);

2081

2082

2083 fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);

2084

2086

2087

2089

2090

2092

2093

2095

2096

2099

2100

2101 if (Sign) {

2106 } else {

2110 }

2111

2113}

2114

2119 EVT VT = Op.getValueType();

2120

2121 assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");

2122

2124

2127

2128

2131 std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT);

2132

2135 std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT);

2136

2139

2141 LHS_Lo, RHS_Lo);

2142

2145

2146 Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));

2147 Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));

2148 return;

2149 }

2150

2152

2153

2154

2157

2158

2159 unsigned FMAD =

2163 : (unsigned)AMDGPUISD::FMAD_FTZ;

2164

2169 Cvt_Lo);

2170 SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);

2175 SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);

2178 Mul1);

2183

2188

2189

2193 SDValue Mulhi1_Lo, Mulhi1_Hi;

2194 std::tie(Mulhi1_Lo, Mulhi1_Hi) =

2197 Mulhi1_Lo, Zero1);

2199 Mulhi1_Hi, Add1_Lo.getValue(1));

2202

2203

2206 SDValue Mulhi2_Lo, Mulhi2_Hi;

2207 std::tie(Mulhi2_Lo, Mulhi2_Hi) =

2210 Mulhi2_Lo, Zero1);

2212 Mulhi2_Hi, Add2_Lo.getValue(1));

2215

2217

2219

2220 SDValue Mul3_Lo, Mul3_Hi;

2221 std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT);

2223 Mul3_Lo, Zero1);

2225 Mul3_Hi, Sub1_Lo.getValue(1));

2229

2236

2237

2238

2239

2240

2241

2243 RHS_Lo, Zero1);

2245 RHS_Hi, Sub1_Lo.getValue(1));

2250

2252

2258

2259

2261

2263 RHS_Lo, Zero1);

2265 RHS_Hi, Sub2_Lo.getValue(1));

2270

2271

2272

2273

2276

2279

2282

2283 return;

2284 }

2285

2286

2287

2290

2293 REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);

2294

2297

2298 const unsigned halfBitWidth = HalfVT.getSizeInBits();

2299

2300 for (unsigned i = 0; i < halfBitWidth; ++i) {

2301 const unsigned bitPos = halfBitWidth - i - 1;

2303

2307

2308

2310

2312

2315

2317

2318

2321 }

2322

2324 DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);

2327}

2328

2332 EVT VT = Op.getValueType();

2333

2334 if (VT == MVT::i64) {

2338 }

2339

2340 if (VT == MVT::i32) {

2342 return Res;

2343 }

2344

2347

2348

2349

2350

2351

2353

2354

2359

2360

2364

2365

2373

2374

2380

2382}

2383

2387 EVT VT = Op.getValueType();

2388

2391

2394

2395 if (VT == MVT::i32) {

2397 return Res;

2398 }

2399

2400 if (VT == MVT::i64 &&

2404

2405

2409 LHS_Lo, RHS_Lo);

2413 };

2415 }

2416

2420 SDValue RSign = LHSign;

2421

2424

2427

2430

2433

2436

2438 Div,

2439 Rem

2440 };

2442}

2443

2447

2448

2449

2450

2451

2452 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);

2453

2456

2457 EVT SetCCVT =

2459

2463

2465

2467}

2468

2471 const unsigned FractBits = 52;

2472 const unsigned ExpBits = 11;

2473

2474 SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,

2476 DAG.getConstant(FractBits - 32, SL, MVT::i32),

2477 DAG.getConstant(ExpBits, SL, MVT::i32));

2480

2481 return Exp;

2482}

2483

2487

2488 assert(Op.getValueType() == MVT::f64);

2489

2491

2492

2493

2495

2497

2498 const unsigned FractBits = 52;

2499

2500

2501 const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);

2503

2504

2506 SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);

2507

2508 SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);

2510 = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);

2511

2515

2516 EVT SetCCVT =

2518

2519 const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);

2520

2523

2526

2527 return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);

2528}

2529

2534

2535 assert(Op.getValueType() == MVT::f64);

2536

2540

2541

2542

2545

2546 SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);

2547

2550

2551 EVT SetCCVT =

2554

2555 return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);

2556}

2557

2560

2561

2562

2563 return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),

2564 Op.getOperand(0));

2565}

2566

2568 auto VT = Op.getValueType();

2569 auto Arg = Op.getOperand(0u);

2570 return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);

2571}

2572

2573

2574

2575

2576

2577

2581 EVT VT = Op.getValueType();

2582

2584

2585

2586

2588

2589 SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);

2590

2593

2594 EVT SetCCVT =

2596

2600

2603}

2604

2608

2609

2610

2611

2612

2613 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);

2614

2617

2618 EVT SetCCVT =

2620

2624

2626

2628}

2629

2630

2632 switch (Src.getOpcode()) {

2633 case ISD::FP_EXTEND:

2634 return Src.getOperand(0).getValueType() == MVT::f16;

2635 case ISD::FP16_TO_FP:

2636 case ISD::FFREXP:

2637 return true;

2639 unsigned IntrinsicID = Src.getConstantOperandVal(0);

2640 switch (IntrinsicID) {

2641 case Intrinsic::amdgcn_frexp_mant:

2642 return true;

2643 default:

2644 return false;

2645 }

2646 }

2647 default:

2648 return false;

2649 }

2650

2652}

2653

2656 return Flags.hasApproximateFuncs();

2657}

2658

2667

2672 EVT VT = Src.getValueType();

2674 SDValue SmallestNormal =

2676

2677

2678

2682

2683 return IsLtSmallestNormal;

2684}

2685

2689 EVT VT = Src.getValueType();

2692

2693 SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);

2697 return IsFinite;

2698}

2699

2700

2701

2702std::pair<SDValue, SDValue>

2706 return {};

2707

2708 MVT VT = MVT::f32;

2710 SDValue SmallestNormal =

2712

2716

2720 DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);

2721

2723 return {ScaledInput, IsLtSmallestNormal};

2724}

2725

2727

2728

2729

2730

2731

2732

2734 EVT VT = Op.getValueType();

2737

2738 if (VT == MVT::f16) {

2739

2740 assert(!Subtarget->has16BitInsts());

2741 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);

2742 SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags);

2745 }

2746

2747 auto [ScaledInput, IsLtSmallestNormal] =

2749 if (!ScaledInput)

2750 return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);

2751

2752 SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);

2753

2757 DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero);

2759}

2760

2766

2770 EVT VT = Op.getValueType();

2773 const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;

2774 assert(IsLog10 || Op.getOpcode() == ISD::FLOG);

2775

2777 if (VT == MVT::f16 || Flags.hasApproximateFuncs()) {

2778

2779 if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {

2780

2781 X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);

2782 }

2783

2785 if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {

2788 }

2789

2790 return Lowered;

2791 }

2792

2794 if (ScaledInput)

2795 X = ScaledInput;

2796

2798

2800 if (Subtarget->hasFastFMAF32()) {

2801

2802 const float c_log10 = 0x1.344134p-2f;

2803 const float cc_log10 = 0x1.09f79ep-26f;

2804

2805

2806 const float c_log = 0x1.62e42ep-1f;

2807 const float cc_log = 0x1.efa39ep-25f;

2808

2811

2812

2813 Flags.setAllowContract(false);

2819 } else {

2820

2821 const float ch_log10 = 0x1.344000p-2f;

2822 const float ct_log10 = 0x1.3509f6p-18f;

2823

2824

2825 const float ch_log = 0x1.62e000p-1f;

2826 const float ct_log = 0x1.0bfbe8p-15f;

2827

2830

2834 SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);

2836

2837

2838 Flags.setAllowContract(false);

2840 SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);

2842 R = getMad(DAG, DL, VT, YH, CH, Mad1);

2843 }

2844

2845 const bool IsFiniteOnly =

2846 (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && Flags.hasNoInfs();

2847

2848

2849 if (!IsFiniteOnly) {

2852 }

2853

2854 if (IsScaled) {

2857 DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);

2861 }

2862

2863 return R;

2864}

2865

2869

2870

2871

2875 EVT VT = Src.getValueType();

2876 unsigned LogOp =

2877 VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2;

2878

2879 double Log2BaseInverted =

2881

2882 if (VT == MVT::f32) {

2883 auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags);

2884 if (ScaledInput) {

2885 SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);

2886 SDValue ScaledResultOffset =

2887 DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT);

2888

2890

2892 ScaledResultOffset, Zero, Flags);

2893

2895

2896 if (Subtarget->hasFastFMAF32())

2897 return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset,

2898 Flags);

2901 }

2902 }

2903

2904 SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);

2905 SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);

2906

2907 return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,

2908 Flags);

2909}

2910

2912

2913

2914

2916 EVT VT = Op.getValueType();

2919

2920 if (VT == MVT::f16) {

2921

2922 assert(!Subtarget->has16BitInsts());

2923 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);

2924 SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);

2927 }

2928

2929 assert(VT == MVT::f32);

2930

2932 return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);

2933

2934

2935

2936

2937

2939

2941

2944

2947

2950

2952 SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);

2953

2958

2959 return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);

2960}

2961

2965 bool IsExp10) const {

2966

2967

2968 EVT VT = X.getValueType();

2971

2973 return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP

2974 : (unsigned)ISD::FEXP2,

2975 SL, VT, Mul, Flags);

2976}

2977

2981 EVT VT = X.getValueType();

2984

2986

2989

2991

2993

2996

2999

3000 SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);

3001

3003 SDValue AdjustedResult =

3004 DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);

3005

3006 return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,

3007 Flags);

3008}

3009

3010

3011

3015 const EVT VT = X.getValueType();

3016

3017 const unsigned Exp2Op = VT == MVT::f32 ? static_cast<unsigned>(AMDGPUISD::EXP)

3018 : static_cast<unsigned>(ISD::FEXP2);

3019

3021

3024

3026 SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);

3028 SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);

3030 }

3031

3032

3033

3034

3035

3036

3037

3039

3042

3047

3050

3052 SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);

3054 SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);

3055

3057

3059 SDValue AdjustedResult =

3060 DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);

3061

3062 return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,

3063 Flags);

3064}

3065

3067 EVT VT = Op.getValueType();

3071 const bool IsExp10 = Op.getOpcode() == ISD::FEXP10;

3072

3073

3074

3075 if (allowApproxFunc(DAG, Flags)) {

3078 }

3079

3083

3084

3085

3086

3087

3088

3089

3090

3091 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);

3095 }

3096

3097 assert(VT == MVT::f32);

3098

3099

3100

3101

3102

3103

3104

3105

3106

3107

3108

3109

3110

3111

3112

3113

3114

3115

3116

3117

3118

3119

3120

3121

3122

3125

3127 if (Subtarget->hasFastFMAF32()) {

3129 const float cc_exp = 0x1.4ae0bep-26f;

3130 const float c_exp10 = 0x1.a934f0p+1f;

3131 const float cc_exp10 = 0x1.2f346ep-24f;

3132

3135

3137 SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags);

3140 } else {

3141 const float ch_exp = 0x1.714000p+0f;

3142 const float cl_exp = 0x1.47652ap-12f;

3143

3144 const float ch_exp10 = 0x1.a92000p+1f;

3145 const float cl_exp10 = 0x1.4f0978p-11f;

3146

3149

3150 SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X);

3153 SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt);

3155

3157

3159 SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags);

3160 PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);

3161 }

3162

3163 SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);

3164

3165

3167

3170 SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags);

3171

3172 SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags);

3173

3174 SDValue UnderflowCheckConst =

3175 DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);

3176

3181

3183

3184 if (!Flags.hasNoInfs()) {

3185 SDValue OverflowCheckConst =

3186 DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);

3192 }

3193

3194 return R;

3195}

3196

3200

3204

3208 auto Opc = Op.getOpcode();

3209 auto Arg = Op.getOperand(0u);

3210 auto ResultVT = Op.getValueType();

3211

3212 if (ResultVT != MVT::i8 && ResultVT != MVT::i16)

3213 return {};

3214

3216 assert(ResultVT == Arg.getValueType());

3217

3218 const uint64_t NumBits = ResultVT.getFixedSizeInBits();

3221

3224 NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits);

3225 NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);

3226 } else {

3228 NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp);

3229 NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits);

3230 }

3231

3233}

3234

3238

3241 unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;

3242

3245 bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;

3246

3247 if (Src.getValueType() == MVT::i32 || Is64BitScalar) {

3248

3249

3250

3251

3252

3253

3254

3255

3256

3257

3258 SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);

3259 if (!ZeroUndef) {

3261 Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);

3262 NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);

3263 }

3265 }

3266

3269

3272

3273

3274

3275

3276

3277

3280 if (Ctlz)

3281 OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);

3282 else

3283 OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);

3284

3286 NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);

3287 if (!ZeroUndef) {

3289 NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);

3290 }

3291

3293}

3294

3296 bool Signed) const {

3297

3298

3299

3300

3301

3302

3303

3304

3305

3306

3307

3308

3309

3310

3311

3312

3313

3314

3315

3316

3317

3318

3319

3320

3321

3322

3325

3330 if (Signed && Subtarget->isGCN()) {

3331

3332

3333

3334

3335

3336

3337

3338

3339

3340

3341

3342

3343

3344

3345

3346

3347

3348

3349

3350

3351

3357 OppositeSign);

3358

3359 ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);

3360

3361

3364 ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);

3365 } else {

3367

3368

3375 }

3376

3378

3379 }

3380

3382

3384

3385

3388

3390

3391 unsigned Opc =

3394

3395

3396

3398 ShAmt);

3399

3400 if (Subtarget->isGCN())

3401 return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);

3402

3403

3404

3405

3410 DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);

3412

3416 IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);

3417 }

3418 return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);

3419}

3420

3422 bool Signed) const {

3425

3428

3430 SL, MVT::f64, Hi);

3431

3433

3434 SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,

3436

3438}

3439

3442

3443 EVT DestVT = Op.getValueType();

3445 EVT SrcVT = Src.getValueType();

3446

3447 if (SrcVT == MVT::i16) {

3448 if (DestVT == MVT::f16)

3449 return Op;

3451

3452

3455 }

3456

3457 if (DestVT == MVT::bf16) {

3462 }

3463

3464 if (SrcVT != MVT::i64)

3465 return Op;

3466

3467 if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {

3469

3475

3476 return FPRound;

3477 }

3478

3479 if (DestVT == MVT::f32)

3481

3482 assert(DestVT == MVT::f64);

3484}

3485

3488 EVT DestVT = Op.getValueType();

3489

3491 EVT SrcVT = Src.getValueType();

3492

3493 if (SrcVT == MVT::i16) {

3494 if (DestVT == MVT::f16)

3495 return Op;

3496

3498

3501 }

3502

3503 if (DestVT == MVT::bf16) {

3508 }

3509

3510 if (SrcVT != MVT::i64)

3511 return Op;

3512

3513

3514

3515 if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {

3518

3524

3525 return FPRound;

3526 }

3527

3528 if (DestVT == MVT::f32)

3530

3531 assert(DestVT == MVT::f64);

3533}

3534

3536 bool Signed) const {

3538

3540 EVT SrcVT = Src.getValueType();

3541

3542 assert(SrcVT == MVT::f32 || SrcVT == MVT::f64);

3543

3544

3545

3546

3547

3548

3549

3550

3551

3552

3553 SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src);

3555 if (Signed && SrcVT == MVT::f32) {

3556

3557

3558

3559

3560

3562 DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc),

3564 Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc);

3565 }

3566

3568 if (SrcVT == MVT::f64) {

3571 SrcVT);

3574 SrcVT);

3575 } else {

3580 }

3581

3583

3585

3587

3590 SL, MVT::i32, FloorMul);

3592

3593 SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64,

3595

3596 if (Signed && SrcVT == MVT::f32) {

3598

3599 Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64,

3601

3602 Result =

3604 DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign);

3605 }

3606

3607 return Result;

3608}

3609

3613

3614

3616 return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);

3617

3618 if (Op->getFlags().hasApproximateFuncs()) {

3619

3621 }

3622

3624}

3625

3626

3629 assert(Src.getSimpleValueType() == MVT::f64);

3630

3631

3632

3633 const unsigned ExpMask = 0x7ff;

3634 const unsigned ExpBiasf64 = 1023;

3635 const unsigned ExpBiasf16 = 15;

3647

3648

3650 DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));

3651

3656

3659 MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);

3660

3663

3664

3668

3669

3673

3674

3676 One, E);

3680

3683

3688

3700

3705

3706

3711

3713}

3714

3718 unsigned OpOpcode = Op.getOpcode();

3719 EVT SrcVT = Src.getValueType();

3720 EVT DestVT = Op.getValueType();

3721

3722

3723 if (SrcVT == MVT::f16 && DestVT == MVT::i16)

3724 return Op;

3725

3726 if (SrcVT == MVT::bf16) {

3728 SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);

3729 return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc);

3730 }

3731

3732

3733 if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {

3735

3736 SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);

3738 }

3739

3740 if (DestVT != MVT::i64)

3741 return Op;

3742

3743 if (SrcVT == MVT::f16 ||

3744 (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {

3746

3747 SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);

3748 unsigned Ext =

3750 return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);

3751 }

3752

3753 if (SrcVT == MVT::f32 || SrcVT == MVT::f64)

3755

3757}

3758

3762 MVT VT = Op.getSimpleValueType();

3764

3766

3769

3770

3774

3776 for (unsigned I = 0; I < NElts; ++I)

3778

3780}

3781

3782

3783

3784

3785

3789

3796

3802

3805 unsigned NewOpcode = Node24->getOpcode();

3806 if (IsIntrin) {

3808 switch (IID) {

3809 case Intrinsic::amdgcn_mul_i24:

3810 NewOpcode = AMDGPUISD::MUL_I24;

3811 break;

3812 case Intrinsic::amdgcn_mul_u24:

3813 NewOpcode = AMDGPUISD::MUL_U24;

3814 break;

3815 case Intrinsic::amdgcn_mulhi_i24:

3816 NewOpcode = AMDGPUISD::MULHI_I24;

3817 break;

3818 case Intrinsic::amdgcn_mulhi_u24:

3819 NewOpcode = AMDGPUISD::MULHI_U24;

3820 break;

3821 default:

3823 }

3824 }

3825

3827

3828

3829

3830

3833 if (DemandedLHS || DemandedRHS)

3835 DemandedLHS ? DemandedLHS : LHS,

3836 DemandedRHS ? DemandedRHS : RHS);

3837

3838

3839

3841 return SDValue(Node24, 0);

3843 return SDValue(Node24, 0);

3844

3846}

3847

3848template

3851 if (Width + Offset < 32) {

3853 IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);

3854 if constexpr (std::is_signed_v) {

3856 } else {

3858 }

3859 }

3860

3862}

3863

3867 if (M->isVolatile())

3868 return true;

3869 }

3870 }

3871

3872 return false;

3873}

3874

3876

3878 return false;

3879

3881 return false;

3882

3884

3886 return false;

3887

3888 if (Size == 3 || (Size > 4 && (Size % 4 != 0)))

3889 return false;

3890

3891 return true;

3892}

3893

3894

3895

3900

3904

3908

3912 unsigned IsFast;

3914

3915

3916

3917

3922

3925

3927 }

3928

3929 if (!IsFast)

3931 }

3932

3935

3937

3941

3942 SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);

3945}

3946

3947

3948

3953

3957

3960

3965 unsigned IsFast;

3967

3968

3969

3970

3971

3976

3978 }

3979

3980 if (!IsFast)

3982 }

3983

3986

3989

3990

3991

3992 bool OtherUses = !Val.hasOneUse();

3993 SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);

3994 if (OtherUses) {

3995 SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);

3997 }

3998

4001}

4002

4003

4004

4005

4009 SDValue N0 = N->getOperand(0);

4010

4011

4012

4014 SDValue N1 = N->getOperand(1);

4017

4019 EVT SrcVT = Src.getValueType();

4020 if (SrcVT.bitsGE(ExtVT)) {

4021 SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);

4023 }

4024 }

4025

4027}

4028

4031 unsigned IID = N->getConstantOperandVal(0);

4032 switch (IID) {

4033 case Intrinsic::amdgcn_mul_i24:

4034 case Intrinsic::amdgcn_mul_u24:

4035 case Intrinsic::amdgcn_mulhi_i24:

4036 case Intrinsic::amdgcn_mulhi_u24:

4038 case Intrinsic::amdgcn_fract:

4039 case Intrinsic::amdgcn_rsq:

4040 case Intrinsic::amdgcn_rcp_legacy:

4041 case Intrinsic::amdgcn_rsq_legacy:

4042 case Intrinsic::amdgcn_rsq_clamp:

4043 case Intrinsic::amdgcn_tanh:

4044 case Intrinsic::amdgcn_prng_b32: {

4045

4046 SDValue Src = N->getOperand(1);

4047 return Src.isUndef() ? Src : SDValue();

4048 }

4049 case Intrinsic::amdgcn_frexp_exp: {

4050

4051

4052

4053 SDValue Src = N->getOperand(1);

4055 if (PeekSign == Src)

4058 0);

4059 }

4060 default:

4062 }

4063}

4064

4065

4066

4074

4077

4080

4081

4082

4085

4087 return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);

4088}

4089

4092 EVT VT = N->getValueType(0);

4093 SDValue LHS = N->getOperand(0);

4094 SDValue RHS = N->getOperand(1);

4098

4099 unsigned RHSVal;

4100 if (CRHS) {

4102 if (!RHSVal)

4103 return LHS;

4104

4105 switch (LHS->getOpcode()) {

4106 default:

4107 break;

4111 SDValue X = LHS->getOperand(0);

4112

4113 if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&

4115

4116

4118 MVT::v2i16, SL,

4120 return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);

4121 }

4122

4123

4124 if (VT != MVT::i64)

4125 break;

4128 if (LZ < RHSVal)

4129 break;

4130 EVT XVT = X.getValueType();

4133 }

4134 }

4135 }

4136

4139

4140

4141

4142

4144

4148 : TargetScalarType;

4149

4153

4154 if (CRHS) {

4156 TargetType);

4157 } else {

4159 const SDValue ShiftMask =

4161

4162

4163 ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);

4164 }

4165

4169

4172

4178

4180 for (unsigned I = 0; I != NElts; ++I)

4181 HiAndLoOps[2 * I + 1] = HiOps[I];

4183 } else {

4185 Vec = DAG.getBuildVector(ConcatType, SL, {Zero, NewShift});

4186 }

4187 return DAG.getNode(ISD::BITCAST, SL, VT, Vec);

4188}

4189

4192 SDValue RHS = N->getOperand(1);

4194 EVT VT = N->getValueType(0);

4195 SDValue LHS = N->getOperand(0);

4198

4201

4202

4203

4204

4205

4206

4207

4209

4213 : TargetScalarType;

4214

4217

4221 if (CRHS) {

4224 TargetType);

4226 (ElementType.getSizeInBits() - 1)) {

4227 ShiftAmt = ShiftFullAmt;

4228 } else {

4230 const SDValue ShiftMask =

4232

4233

4234 ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);

4235 }

4236

4237 EVT ConcatType;

4239 SDLoc LHSSL(LHS);

4240

4244 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4247

4249 for (unsigned I = 0; I != NElts; ++I) {

4250 HiOps[I] = HiAndLoOps[2 * I + 1];

4251 }

4253 } else {

4256 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4258 }

4259

4264 } else {

4266 HiShift = DAG.getNode(ISD::SRA, SL, TargetType, Hi, ShiftFullAmt);

4267 }

4270

4277

4280 for (unsigned I = 0; I != NElts; ++I) {

4281 HiAndLoOps[2 * I + 1] = HiOps[I];

4282 HiAndLoOps[2 * I] = LoOps[I];

4283 }

4285 } else {

4286 Vec = DAG.getBuildVector(ConcatType, SL, {NewShift, HiShift});

4287 }

4288 return DAG.getNode(ISD::BITCAST, SL, VT, Vec);

4289}

4290

4293 SDValue RHS = N->getOperand(1);

4295 EVT VT = N->getValueType(0);

4296 SDValue LHS = N->getOperand(0);

4299 unsigned RHSVal;

4300

4301 if (CRHS) {

4303

4304

4305

4306 if (LHS.getOpcode() == ISD::AND) {

4308 unsigned MaskIdx, MaskLen;

4309 if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&

4310 MaskIdx == RHSVal) {

4313 N->getOperand(1)),

4315 N->getOperand(1)));

4316 }

4317 }

4318 }

4319 }

4320

4323

4324

4325

4326

4327

4328

4329

4331

4335 : TargetScalarType;

4336

4339

4341 if (CRHS) {

4343 TargetType);

4344 } else {

4346 const SDValue ShiftMask =

4348

4349

4350 ShiftAmt = DAG.getNode(ISD::AND, SL, TargetType, TruncShiftAmt, ShiftMask);

4351 }

4352

4354 EVT ConcatType;

4356 SDLoc LHSSL(LHS);

4357

4361 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4364

4366 for (unsigned I = 0; I != NElts; ++I)

4367 HiOps[I] = HiAndLoOps[2 * I + 1];

4369 } else {

4372 SDValue SplitLHS = DAG.getNode(ISD::BITCAST, LHSSL, ConcatType, LHS);

4374 }

4375

4378

4384

4386 for (unsigned I = 0; I != NElts; ++I)

4387 HiAndLoOps[2 * I] = LoOps[I];

4389 } else {

4390 Vec = DAG.getBuildVector(ConcatType, SL, {NewShift, Zero});

4391 }

4392 return DAG.getNode(ISD::BITCAST, SL, VT, Vec);

4393}

4394

4399 EVT VT = N->getValueType(0);

4400 SDValue Src = N->getOperand(0);

4401

4402

4403 if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {

4410 Elt0 = DAG.getNode(ISD::BITCAST, SL,

4412 }

4413

4415 }

4416 }

4417 }

4418

4419

4420

4421

4428 unsigned BitIndex = K->getZExtValue();

4429 unsigned PartIndex = BitIndex / SrcEltSize;

4430

4431 if (PartIndex * SrcEltSize == BitIndex &&

4438 }

4439 }

4440 }

4441 }

4442 }

4443

4444

4445

4446

4447

4449 EVT SrcVT = Src.getValueType();

4451 (Src.getOpcode() == ISD::SRL ||

4452 Src.getOpcode() == ISD::SRA ||

4453 Src.getOpcode() == ISD::SHL)) {

4456

4457

4458

4459

4460

4461 const unsigned MaxCstSize =

4467

4470 Src.getOperand(0));

4472

4476 }

4477

4478 SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,

4479 Trunc, Amt);

4481 }

4482 }

4483 }

4484

4486}

4487

4488

4489

4490

4491

4494 if (Size <= 32) {

4495 unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;

4496 return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);

4497 }

4498

4499 unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;

4500 unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;

4501

4502 SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);

4503 SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);

4504

4506}

4507

4508

4509

4511 if (V->getOpcode() != ISD::ADD)

4513

4515}

4516

4520 EVT VT = N->getValueType(0);

4521

4522

4523

4524

4525

4526 if (->isDivergent())

4528

4532

4535

4536 SDValue N0 = N->getOperand(0);

4537 SDValue N1 = N->getOperand(1);

4538

4539

4540

4541

4542

4545 if (!AddOp)

4547

4548 if (V.hasOneUse() || all_of(V->users(), [](const SDNode *U) -> bool {

4549 return U->getOpcode() == ISD::MUL;

4550 }))

4551 return AddOp;

4552

4554 };

4555

4556

4557

4558 if (SDValue MulOper = IsFoldableAdd(N0)) {

4559 SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);

4561 }

4562

4563 if (SDValue MulOper = IsFoldableAdd(N1)) {

4564 SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);

4566 }

4567

4568

4571

4572

4573

4574

4575

4578

4581

4583

4584 if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {

4588 } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {

4592 } else {

4594 }

4595

4596

4597

4599}

4600

4604 if (N->getValueType(0) != MVT::i32)

4606

4609

4611 SDValue N0 = N->getOperand(0);

4612 SDValue N1 = N->getOperand(1);

4613

4614

4615

4616

4617

4622

4623

4624

4625 unsigned LoOpcode = 0;

4626 unsigned HiOpcode = 0;

4628 if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {

4631 LoOpcode = AMDGPUISD::MUL_I24;

4632 HiOpcode = AMDGPUISD::MULHI_I24;

4633 }

4634 } else {

4635 if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {

4638 LoOpcode = AMDGPUISD::MUL_U24;

4639 HiOpcode = AMDGPUISD::MULHI_U24;

4640 }

4641 }

4642 if (!LoOpcode)

4644

4649}

4650

4653 EVT VT = N->getValueType(0);

4654

4655 if (!Subtarget->hasMulI24() || VT.isVector())

4657

4658

4659

4660

4661

4662

4663

4664 if (Subtarget->hasSMulHi() && ->isDivergent())

4666

4669

4670 SDValue N0 = N->getOperand(0);

4671 SDValue N1 = N->getOperand(1);

4672

4673 if ( isI24 (N0, DAG) || isI24 (N1, DAG))

4675

4678

4679 SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);

4682}

4683

4686 EVT VT = N->getValueType(0);

4687

4690

4691

4692

4693

4694

4695

4696

4697 if (Subtarget->hasSMulHi() && ->isDivergent())

4699

4702

4703 SDValue N0 = N->getOperand(0);

4704 SDValue N1 = N->getOperand(1);

4705

4706 if ( isU24 (N0, DAG) || isU24 (N1, DAG))

4708

4711

4712 SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);

4715}

4716

4720 unsigned Opc) const {

4721 EVT VT = Op.getValueType();

4723 if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&

4724 LegalVT != MVT::i16))

4726

4727 if (VT != MVT::i32)

4729

4731 if (VT != MVT::i32)

4733

4734 return FFBX;

4735}

4736

4737

4738

4739

4740

4741

4742

4743

4749

4753

4754

4755

4759 unsigned Opc =

4760 isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;

4761 return getFFBX_U32(DAG, CmpLHS, SL, Opc);

4762 }

4763

4764

4765

4769 unsigned Opc =

4770 isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;

4771

4772 return getFFBX_U32(DAG, CmpLHS, SL, Opc);

4773 }

4774

4776}

4777

4779 unsigned Op,

4780 const SDLoc &SL,

4786

4790 return DAG.getNode(Op, SL, VT, NewSelect);

4791}

4792

4793

4794

4795

4796

4797

4798

4799

4805 SDValue LHS = N.getOperand(1);

4806 SDValue RHS = N.getOperand(2);

4807

4808 EVT VT = N.getValueType();

4809 if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||

4810 (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {

4813

4816 }

4817

4818 bool Inv = false;

4819 if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {

4821 Inv = true;

4822 }

4823

4824

4826 if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&

4829

4830

4833

4834

4835 bool ShouldFoldNeg = true;

4836

4840 ShouldFoldNeg = false;

4841 if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)

4842 ShouldFoldNeg = false;

4843 }

4844

4845 if (ShouldFoldNeg) {

4846 if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative())

4848

4849

4850

4851

4852

4853

4854

4855 if (NewLHS.getOpcode() == ISD::FABS &&

4858

4861

4862 if (LHS.getOpcode() == ISD::FNEG)

4863 NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

4864

4865 if (Inv)

4867

4869 Cond, NewLHS, NewRHS);

4871 return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);

4872 }

4873 }

4874

4876}

4877

4881 return Folded;

4882

4886

4887 EVT VT = N->getValueType(0);

4891

4892 SDValue True = N->getOperand(1);

4893 SDValue False = N->getOperand(2);

4894

4895 if (Cond.hasOneUse()) {

4899

4900

4901

4902

4906

4907 SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);

4909 }

4910

4911 if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {

4914

4915

4917 }

4918 }

4919

4920

4922}

4923

4933

4934

4935

4938 if (C->isZero())

4940

4941 if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))

4943

4945}

4946

4952

4958

4960 switch (Opc) {

4961 case ISD::FMAXNUM:

4962 return ISD::FMINNUM;

4963 case ISD::FMINNUM:

4964 return ISD::FMAXNUM;

4965 case ISD::FMAXNUM_IEEE:

4966 return ISD::FMINNUM_IEEE;

4967 case ISD::FMINNUM_IEEE:

4968 return ISD::FMAXNUM_IEEE;

4969 case ISD::FMAXIMUM:

4970 return ISD::FMINIMUM;

4971 case ISD::FMINIMUM:

4972 return ISD::FMAXIMUM;

4973 case ISD::FMAXIMUMNUM:

4974 return ISD::FMINIMUMNUM;

4975 case ISD::FMINIMUMNUM:

4976 return ISD::FMAXIMUMNUM;

4977 case AMDGPUISD::FMAX_LEGACY:

4978 return AMDGPUISD::FMIN_LEGACY;

4979 case AMDGPUISD::FMIN_LEGACY:

4980 return AMDGPUISD::FMAX_LEGACY;

4981 default:

4983 }

4984}

4985

4986

4987

4989

4990

4991

4992

4994

4995

4997 return false;

4998 } else {

5001 return false;

5002 }

5003

5004 return true;

5005}

5006

5010 SDValue N0 = N->getOperand(0);

5011 EVT VT = N->getValueType(0);

5012

5014

5017

5019 switch (Opc) {

5023

5024

5027

5028 if (LHS.getOpcode() != ISD::FNEG)

5029 LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);

5030 else

5032

5033 if (RHS.getOpcode() != ISD::FNEG)

5034 RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5035 else

5037

5040 return SDValue();

5043 return Res;

5044 }

5046 case AMDGPUISD::FMUL_LEGACY: {

5047

5048

5051

5052 if (LHS.getOpcode() == ISD::FNEG)

5054 else if (RHS.getOpcode() == ISD::FNEG)

5056 else

5057 RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5058

5061 return SDValue();

5064 return Res;

5065 }

5068

5071

5072

5076

5077 if (LHS.getOpcode() == ISD::FNEG)

5079 else if (MHS.getOpcode() == ISD::FNEG)

5081 else

5082 MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);

5083

5084 if (RHS.getOpcode() != ISD::FNEG)

5085 RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5086 else

5088

5091 return SDValue();

5094 return Res;

5095 }

5096 case ISD::FMAXNUM:

5097 case ISD::FMINNUM:

5098 case ISD::FMAXNUM_IEEE:

5099 case ISD::FMINNUM_IEEE:

5100 case ISD::FMINIMUM:

5101 case ISD::FMAXIMUM:

5102 case ISD::FMINIMUMNUM:

5103 case ISD::FMAXIMUMNUM:

5104 case AMDGPUISD::FMAX_LEGACY:

5105 case AMDGPUISD::FMIN_LEGACY: {

5106

5107

5108

5109

5110

5113

5114

5115

5118

5119 SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);

5120 SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

5122

5124 if (Res.getOpcode() != Opposite)

5125 return SDValue();

5128 return Res;

5129 }

5130 case AMDGPUISD::FMED3: {

5132 for (unsigned I = 0; I < 3; ++I)

5134

5136 if (Res.getOpcode() != AMDGPUISD::FMED3)

5137 return SDValue();

5138

5142

5145 }

5146

5147 return Res;

5148 }

5149 case ISD::FP_EXTEND:

5150 case ISD::FTRUNC:

5151 case ISD::FRINT:

5152 case ISD::FNEARBYINT:

5153 case ISD::FROUNDEVEN:

5154 case ISD::FSIN:

5156 case AMDGPUISD::RCP:

5157 case AMDGPUISD::RCP_LEGACY:

5158 case AMDGPUISD::RCP_IFLAG:

5159 case AMDGPUISD::SIN_HW: {

5161 if (CvtSrc.getOpcode() == ISD::FNEG) {

5162

5163

5165 }

5166

5169

5170

5171

5174 }

5177

5178 if (CvtSrc.getOpcode() == ISD::FNEG) {

5179

5182 }

5183

5186

5187

5190 }

5191 case ISD::FP16_TO_FP: {

5192

5193

5194

5196

5198 EVT SrcVT = Src.getValueType();

5199

5200

5203 return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);

5204 }

5206

5207

5209 }

5210 case ISD::BITCAST: {

5218

5219

5220

5221

5222

5223

5224

5225

5226

5227

5228 SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits);

5229 SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi);

5232

5234 Ops.back() = CastBack;

5238 SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build);

5239

5242 return Result;

5243 }

5244

5247

5248

5249

5250

5251

5256

5257 SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);

5258 SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);

5259

5261 NegRHS);

5262 }

5263

5265 }

5266 default:

5268 }

5269}

5270

5274 SDValue N0 = N->getOperand(0);

5275

5278

5280 case ISD::FP16_TO_FP: {

5281 assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");

5284 EVT SrcVT = Src.getValueType();

5285

5286

5289 return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);

5290 }

5291 default:

5293 }

5294}

5295

5299 if (!CFP)

5301

5302

5303 const APFloat &Val = CFP->getValueAPF();

5306}

5307

5312

5313 switch(N->getOpcode()) {

5314 default:

5315 break;

5316 case ISD::BITCAST: {

5317 EVT DestVT = N->getValueType(0);

5318

5319

5320

5321

5322

5323

5325 SDValue Src = N->getOperand(0);

5329 EVT SrcVT = Src.getValueType();

5331

5334

5339 CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));

5340 }

5341

5343 }

5344 }

5345 }

5346

5348 break;

5349

5350

5351

5352

5353

5354 SDValue Src = N->getOperand(0);

5357 uint64_t CVal = C->getZExtValue();

5361 return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);

5362 }

5363

5365 const APInt &Val = C->getValueAPF().bitcastToAPInt();

5371

5372 return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);

5373 }

5374

5375 break;

5376 }

5380

5381

5382

5383

5384 if (!(N->getValueType(0).isVector() &&

5387 break;

5393 }

5398 case AMDGPUISD::MUL_U24:

5399 case AMDGPUISD::MUL_I24: {

5401 return Simplified;

5402 break;

5403 }

5404 case AMDGPUISD::MULHI_I24:

5405 case AMDGPUISD::MULHI_U24:

5416 case ISD::FNEG:

5418 case ISD::FABS:

5420 case AMDGPUISD::BFE_I32:

5421 case AMDGPUISD::BFE_U32: {

5422 assert(->getValueType(0).isVector() &&

5423 "Vector handling of BFE not implemented");

5425 if (!Width)

5426 break;

5427

5429 if (WidthVal == 0)

5431

5434 break;

5435

5436 SDValue BitsFrom = N->getOperand(0);

5438

5439 bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;

5440

5441 if (OffsetVal == 0) {

5442

5443 unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);

5444

5446 if (OpSignBits >= SignBits)

5447 return BitsFrom;

5448

5451

5452

5453

5454

5455

5456

5459 }

5460

5462 }

5463

5467 CVal->getSExtValue(),

5468 OffsetVal,

5469 WidthVal,

5470 DL);

5471 }

5472

5474 CVal->getZExtValue(),

5475 OffsetVal,

5476 WidthVal,

5477 DL);

5478 }

5479

5480 if ((OffsetVal + WidthVal) >= 32 &&

5481 !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {

5484 BitsFrom, ShiftVal);

5485 }

5486

5489 OffsetVal,

5490 OffsetVal + WidthVal);

5491

5499 }

5500 }

5501

5502 break;

5503 }

5504 case ISD::LOAD:

5506 case ISD::STORE:

5508 case AMDGPUISD::RCP:

5509 case AMDGPUISD::RCP_IFLAG:

5516 case AMDGPUISD::FMAD_FTZ: {

5517 SDValue N0 = N->getOperand(0);

5518 SDValue N1 = N->getOperand(1);

5519 SDValue N2 = N->getOperand(2);

5520 EVT VT = N->getValueType(0);

5521

5522

5523

5527 if (N0CFP && N1CFP && N2CFP) {

5528 const auto FTZ = [](const APFloat &V) {

5529 if (V.isDenormal()) {

5530 APFloat Zero(V.getSemantics(), 0);

5531 return V.isNegative() ? -Zero : Zero;

5532 }

5533 return V;

5534 };

5535

5540 V0 = FTZ(V0);

5543 }

5544 break;

5545 }

5546 }

5548}

5549

5550

5551

5552

5553

5557 const SDLoc &SL,

5558 bool RawReg) const {

5562

5563 if ( MRI .isLiveIn(Reg)) {

5564 VReg = MRI.createVirtualRegister(RC);

5565 MRI.addLiveIn(Reg, VReg);

5566 } else {

5567 VReg = MRI.getLiveInVirtReg(Reg);

5568 }

5569

5570 if (RawReg)

5572

5574}

5575

5576

5577

5583 return I;

5584 }

5585 }

5586

5588}

5589

5592 const SDLoc &SL,

5593 int64_t Offset) const {

5597

5600

5604}

5605

5607 const SDLoc &SL,

5610 int64_t Offset) const {

5614

5616

5618 DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);

5622 return Store;

5623}

5624

5629 assert(Arg && "Attempting to load missing argument");

5630

5634

5636 return V;

5637

5638 unsigned Mask = Arg.getMask();

5643 DAG.getConstant(Mask >> Shift, SL, VT));

5644}

5645

5648 unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();

5649 const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr();

5651 alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;

5652 switch (Param) {

5654 return ArgOffset;

5661 }

5663}

5664

5670

5673 int &RefinementSteps,

5674 bool &UseOneConstNR,

5675 bool Reciprocal) const {

5677

5678 if (VT == MVT::f32) {

5679 RefinementSteps = 0;

5680 return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);

5681 }

5682

5683

5684

5685

5687}

5688

5691 int &RefinementSteps) const {

5693

5694 if (VT == MVT::f32) {

5695

5696

5697

5698

5699

5700 RefinementSteps = 0;

5701 return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);

5702 }

5703

5704

5705

5706

5708}

5709

5711 switch (ID) {

5712 case Intrinsic::amdgcn_workitem_id_x:

5713 return 0;

5714 case Intrinsic::amdgcn_workitem_id_y:

5715 return 1;

5716 case Intrinsic::amdgcn_workitem_id_z:

5717 return 2;

5718 default:

5720 }

5721}

5722

5726

5727 Known.resetAll();

5728

5729 unsigned Opc = Op.getOpcode();

5730

5731 switch (Opc) {

5732 default:

5733 break;

5734 case AMDGPUISD::CARRY:

5735 case AMDGPUISD::BORROW: {

5737 break;

5738 }

5739

5740 case AMDGPUISD::BFE_I32:

5741 case AMDGPUISD::BFE_U32: {

5743 if (!CWidth)

5744 return;

5745

5747

5748 if (Opc == AMDGPUISD::BFE_U32)

5750

5751 break;

5752 }

5753 case AMDGPUISD::FP_TO_FP16: {

5755

5756

5758 break;

5759 }

5760 case AMDGPUISD::MUL_U24:

5761 case AMDGPUISD::MUL_I24: {

5767

5768 if (TrailZ >= 32)

5769 break;

5770

5771

5772 LHSKnown = LHSKnown.trunc(24);

5773 RHSKnown = RHSKnown.trunc(24);

5774

5775 if (Opc == AMDGPUISD::MUL_I24) {

5778 unsigned MaxValBits = LHSValBits + RHSValBits;

5779 if (MaxValBits > 32)

5780 break;

5781 unsigned SignBits = 32 - MaxValBits + 1;

5782 bool LHSNegative = LHSKnown.isNegative();

5785 bool RHSNegative = RHSKnown.isNegative();

5788

5789 if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))

5791 else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))

5793 } else {

5796 unsigned MaxValBits = LHSValBits + RHSValBits;

5797 if (MaxValBits >= 32)

5798 break;

5800 }

5801 break;

5802 }

5803 case AMDGPUISD::PERM: {

5805 if (!CMask)

5806 return;

5807

5811

5812 for (unsigned I = 0; I < 32; I += 8) {

5813 unsigned SelBits = Sel & 0xff;

5814 if (SelBits < 4) {

5815 SelBits *= 8;

5818 } else if (SelBits < 7) {

5819 SelBits = (SelBits & 3) * 8;

5822 } else if (SelBits == 0x0c) {

5823 Known.Zero |= 0xFFull << I;

5824 } else if (SelBits > 0x0c) {

5825 Known.One |= 0xFFull << I;

5826 }

5827 Sel >>= 8;

5828 }

5829 break;

5830 }

5831 case AMDGPUISD::BUFFER_LOAD_UBYTE: {

5833 break;

5834 }

5835 case AMDGPUISD::BUFFER_LOAD_USHORT: {

5837 break;

5838 }

5839 case AMDGPUISD::LDS: {

5841 Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());

5842

5845 break;

5846 }

5847 case AMDGPUISD::SMIN3:

5848 case AMDGPUISD::SMAX3:

5849 case AMDGPUISD::SMED3:

5850 case AMDGPUISD::UMIN3:

5851 case AMDGPUISD::UMAX3:

5852 case AMDGPUISD::UMED3: {

5855 break;

5856

5859 break;

5860

5863 break;

5864

5865

5867 Known.One = Known0.One & Known1.One & Known2.One;

5868 break;

5869 }

5871 unsigned IID = Op.getConstantOperandVal(0);

5872 switch (IID) {

5873 case Intrinsic::amdgcn_workitem_id_x:

5874 case Intrinsic::amdgcn_workitem_id_y:

5875 case Intrinsic::amdgcn_workitem_id_z: {

5876 unsigned MaxValue = Subtarget->getMaxWorkitemID(

5879 break;

5880 }

5881 default:

5882 break;

5883 }

5884 }

5885 }

5886}

5887

5890 unsigned Depth) const {

5891 switch (Op.getOpcode()) {

5892 case AMDGPUISD::BFE_I32: {

5894 if (!Width)

5895 return 1;

5896

5897 unsigned SignBits = 32 - Width->getZExtValue() + 1;

5899 return SignBits;

5900

5901

5903 return std::max(SignBits, Op0SignBits);

5904 }

5905

5906 case AMDGPUISD::BFE_U32: {

5908 return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;

5909 }

5910

5911 case AMDGPUISD::CARRY:

5912 case AMDGPUISD::BORROW:

5913 return 31;

5914 case AMDGPUISD::BUFFER_LOAD_BYTE:

5915 return 25;

5916 case AMDGPUISD::BUFFER_LOAD_SHORT:

5917 return 17;

5918 case AMDGPUISD::BUFFER_LOAD_UBYTE:

5919 return 24;

5920 case AMDGPUISD::BUFFER_LOAD_USHORT:

5921 return 16;

5922 case AMDGPUISD::FP_TO_FP16:

5923 return 16;

5924 case AMDGPUISD::SMIN3:

5925 case AMDGPUISD::SMAX3:

5926 case AMDGPUISD::SMED3:

5927 case AMDGPUISD::UMIN3:

5928 case AMDGPUISD::UMAX3:

5929 case AMDGPUISD::UMED3: {

5931 if (Tmp2 == 1)

5932 return 1;

5933

5935 if (Tmp1 == 1)

5936 return 1;

5937

5939 if (Tmp0 == 1)

5940 return 1;

5941

5942 return std::min({Tmp0, Tmp1, Tmp2});

5943 }

5944 default:

5945 return 1;

5946 }

5947}

5948

5953 if ()

5954 return 1;

5955

5956

5957 switch (MI->getOpcode()) {

5958 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:

5959 return 25;

5960 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:

5961 return 17;

5962 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:

5963 return 24;

5964 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:

5965 return 16;

5966 case AMDGPU::G_AMDGPU_SMED3:

5967 case AMDGPU::G_AMDGPU_UMED3: {

5968 auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();

5969 unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);

5970 if (Tmp2 == 1)

5971 return 1;

5972 unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);

5973 if (Tmp1 == 1)

5974 return 1;

5975 unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);

5976 if (Tmp0 == 1)

5977 return 1;

5978 return std::min({Tmp0, Tmp1, Tmp2});

5979 }

5980 default:

5981 return 1;

5982 }

5983}

5984

5987 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {

5988 unsigned Opcode = Op.getOpcode();

5989 switch (Opcode) {

5990 case AMDGPUISD::BFE_I32:

5991 case AMDGPUISD::BFE_U32:

5992 return false;

5993 }

5996}

5997

6000 unsigned Depth) const {

6001 unsigned Opcode = Op.getOpcode();

6002 switch (Opcode) {

6003 case AMDGPUISD::FMIN_LEGACY:

6004 case AMDGPUISD::FMAX_LEGACY: {

6005 if (SNaN)

6006 return true;

6007

6008

6009

6010 return false;

6011 }

6012 case AMDGPUISD::FMUL_LEGACY:

6013 case AMDGPUISD::CVT_PKRTZ_F16_F32: {

6014 if (SNaN)

6015 return true;

6018 }

6019 case AMDGPUISD::FMED3:

6020 case AMDGPUISD::FMIN3:

6021 case AMDGPUISD::FMAX3:

6022 case AMDGPUISD::FMINIMUM3:

6023 case AMDGPUISD::FMAXIMUM3:

6024 case AMDGPUISD::FMAD_FTZ: {

6025 if (SNaN)

6026 return true;

6030 }

6031 case AMDGPUISD::CVT_F32_UBYTE0:

6032 case AMDGPUISD::CVT_F32_UBYTE1:

6033 case AMDGPUISD::CVT_F32_UBYTE2:

6034 case AMDGPUISD::CVT_F32_UBYTE3:

6035 return true;

6036

6037 case AMDGPUISD::RCP:

6038 case AMDGPUISD::RSQ:

6039 case AMDGPUISD::RCP_LEGACY:

6040 case AMDGPUISD::RSQ_CLAMP: {

6041 if (SNaN)

6042 return true;

6043

6044

6045 return false;

6046 }

6047 case ISD::FLDEXP:

6048 case AMDGPUISD::FRACT: {

6049 if (SNaN)

6050 return true;

6052 }

6053 case AMDGPUISD::DIV_SCALE:

6054 case AMDGPUISD::DIV_FMAS:

6055 case AMDGPUISD::DIV_FIXUP:

6056

6057 return SNaN;

6058 case AMDGPUISD::SIN_HW:

6059 case AMDGPUISD::COS_HW: {

6060

6061 return SNaN;

6062 }

6064 unsigned IntrinsicID = Op.getConstantOperandVal(0);

6065

6066 switch (IntrinsicID) {

6067 case Intrinsic::amdgcn_cubeid:

6068 case Intrinsic::amdgcn_cvt_off_f32_i4:

6069 return true;

6070

6071 case Intrinsic::amdgcn_frexp_mant: {

6072 if (SNaN)

6073 return true;

6075 }

6076 case Intrinsic::amdgcn_cvt_pkrtz: {

6077 if (SNaN)

6078 return true;

6081 }

6082 case Intrinsic::amdgcn_rcp:

6083 case Intrinsic::amdgcn_rsq:

6084 case Intrinsic::amdgcn_rcp_legacy:

6085 case Intrinsic::amdgcn_rsq_legacy:

6086 case Intrinsic::amdgcn_rsq_clamp:

6087 case Intrinsic::amdgcn_tanh: {

6088 if (SNaN)

6089 return true;

6090

6091

6092 return false;

6093 }

6094 case Intrinsic::amdgcn_trig_preop:

6095 case Intrinsic::amdgcn_fdot2:

6096

6097 return SNaN;

6098 case Intrinsic::amdgcn_fma_legacy:

6099 if (SNaN)

6100 return true;

6104 default:

6105 return false;

6106 }

6107 }

6108 default:

6109 return false;

6110 }

6111}

6112

6115 return MRI.hasOneNonDBGUse(N0);

6116}

unsigned const MachineRegisterInfo * MRI

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)

static bool isInv2Pi(const APFloat &APF)

static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)

returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...

static unsigned inverseMinMax(unsigned Opc)

static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:2469

static unsigned workitemIntrinsicDim(unsigned ID)

Definition AMDGPUISelLowering.cpp:5710

static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, int64_t Offset)

Definition AMDGPUISelLowering.cpp:5578

static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, const SDLoc &DL)

Definition AMDGPUISelLowering.cpp:3849

static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X, SDValue Y, SDValue C, SDNodeFlags Flags=SDNodeFlags())

Definition AMDGPUISelLowering.cpp:2761

static SDValue getAddOneOp(const SDNode *V)

If V is an add of a constant 1, returns the other operand.

Definition AMDGPUISelLowering.cpp:4510

static LLVM_READONLY bool selectSupportsSourceMods(const SDNode *N)

Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the type for ISD::SELECT.

Definition AMDGPUISelLowering.cpp:729

static cl::opt< bool > AMDGPUBypassSlowDiv("amdgpu-bypass-slow-div", cl::desc("Skip 64-bit divide for dynamic 32-bit values"), cl::init(true))

static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, SDValue N0, SDValue N1, unsigned Size, bool Signed)

Definition AMDGPUISelLowering.cpp:4492

static bool fnegFoldsIntoOp(const SDNode *N)

Definition AMDGPUISelLowering.cpp:700

static bool isI24(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:3790

static bool isCttzOpc(unsigned Opc)

Definition AMDGPUISelLowering.cpp:3201

static bool isU24(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:3786

static SDValue peekFPSignOps(SDValue Val)

Definition AMDGPUISelLowering.cpp:1660

static bool valueIsKnownNeverF32Denorm(SDValue Src)

Return true if it's known that Src can never be an f32 denormal value.

Definition AMDGPUISelLowering.cpp:2631

static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, unsigned Op, const SDLoc &SL, SDValue Cond, SDValue N1, SDValue N2)

Definition AMDGPUISelLowering.cpp:4778

static SDValue peekFNeg(SDValue Val)

Definition AMDGPUISelLowering.cpp:1653

static SDValue simplifyMul24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI)

Definition AMDGPUISelLowering.cpp:3797

static bool isCtlzOpc(unsigned Opc)

Definition AMDGPUISelLowering.cpp:3197

static LLVM_READNONE bool fnegFoldsIntoOpcode(unsigned Opc)

Definition AMDGPUISelLowering.cpp:661

static bool hasVolatileUser(SDNode *Val)

Definition AMDGPUISelLowering.cpp:3864

Interface definition of the TargetLowering class that is common to all AMD GPUs.

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Function Alias Analysis Results

block Block Frequency Analysis

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Provides analysis for querying information about KnownBits during GISel passes.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)

Return the first DebugLoc that has line number information, given a range of instructions.

const SmallVectorImpl< MachineOperand > & Cond

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)

Definition AMDGPUISelLowering.cpp:1132

static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)

Definition AMDGPUISelLowering.cpp:1160

uint64_t getExplicitKernArgSize() const

static std::optional< uint32_t > getLDSAbsoluteAddress(const GlobalValue &GV)

void recordNumNamedBarriers(uint32_t GVAddr, unsigned BarCnt)

unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)

bool isModuleEntryFunction() const

bool has16BitInsts() const

static const AMDGPUSubtarget & get(const MachineFunction &MF)

static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:56

SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const

Generate Min/Max node.

Definition AMDGPUISelLowering.cpp:1739

unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition AMDGPUISelLowering.cpp:5888

SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4684

EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override

Return the type that should be used to zero or sign extend a zeroext/signext integer return value.

Definition AMDGPUISelLowering.cpp:800

SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const

Split a vector load into 2 loads of half the vector.

Definition AMDGPUISelLowering.cpp:1862

SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1582

SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:3896

void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const

The SelectionDAGBuilder will automatically promote function arguments with illegal types.

Definition AMDGPUISelLowering.cpp:1214

SDValue LowerF64ToF16Safe(SDValue Src, const SDLoc &DL, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3627

SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2578

SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const

Definition AMDGPUISelLowering.cpp:5606

bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AS) const override

Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...

Definition AMDGPUISelLowering.cpp:980

SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1617

void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override

Determine which of the bits specified in Mask are known to be either zero or one and return them in t...

Definition AMDGPUISelLowering.cpp:5723

bool shouldCombineMemoryType(EVT VT) const

Definition AMDGPUISelLowering.cpp:3875

SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const

Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...

Definition AMDGPUISelLowering.cpp:4067

SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const

Definition AMDGPUISelLowering.cpp:1390

SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4006

bool isTruncateFree(EVT Src, EVT Dest) const override

Definition AMDGPUISelLowering.cpp:998

bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override

Definition AMDGPUISelLowering.cpp:986

SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2444

TargetLowering::NegatibleCost getConstantNegateCost(const ConstantFPSDNode *C) const

Definition AMDGPUISelLowering.cpp:4937

SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, bool IsLog10, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2872

bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override

Return true if Op can create undef or poison from non-undef & non-poison operands.

Definition AMDGPUISelLowering.cpp:5985

SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4651

SDValue lowerFEXPUnsafeImpl(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags, bool IsExp10) const

Definition AMDGPUISelLowering.cpp:2962

bool isSDNodeAlwaysUniform(const SDNode *N) const override

Definition AMDGPUISelLowering.cpp:906

bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override

Return true if it is profitable to move this shift by a constant amount through its operand,...

Definition AMDGPUISelLowering.cpp:1086

SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4090

bool isCheapToSpeculateCtlz(Type *Ty) const override

Return true if it is cheap to speculate a call to intrinsic ctlz.

Definition AMDGPUISelLowering.cpp:902

SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2384

bool isFNegFree(EVT VT) const override

Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...

Definition AMDGPUISelLowering.cpp:973

SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2866

SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const

Definition AMDGPUISelLowering.cpp:3421

unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override

This method can be implemented by targets that want to expose additional information about sign bits ...

Definition AMDGPUISelLowering.cpp:5949

SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override

This callback is invoked for operations that are unsupported by the target, which are registered to u...

Definition AMDGPUISelLowering.cpp:1436

SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3610

SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const

Definition AMDGPUISelLowering.cpp:1356

bool isConstantCheaperToNegate(SDValue N) const

Definition AMDGPUISelLowering.cpp:4953

bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, Register N1) const override

Definition AMDGPUISelLowering.cpp:6113

bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override

If SNaN is false,.

Definition AMDGPUISelLowering.cpp:5998

static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, SDNodeFlags Flags)

Definition AMDGPUISelLowering.cpp:2659

uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const

Helper function that returns the byte offset of the given type of implicit parameter.

Definition AMDGPUISelLowering.cpp:5665

SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2605

SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4878

SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:5007

SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3715

virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1524

bool isConstantCostlierToNegate(SDValue N) const

Definition AMDGPUISelLowering.cpp:4947

SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const

Definition AMDGPUISelLowering.cpp:5625

SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const

Definition AMDGPUISelLowering.cpp:2000

SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const

Emit approx-funcs appropriate lowering for exp10.

Definition AMDGPUISelLowering.cpp:3012

bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT, std::optional< unsigned > ByteOffset) const override

Return true if it is profitable to reduce a load to a smaller type.

Definition AMDGPUISelLowering.cpp:834

SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3440

bool isCheapToSpeculateCttz(Type *Ty) const override

Return true if it is cheap to speculate a call to intrinsic cttz.

Definition AMDGPUISelLowering.cpp:898

SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4744

SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4190

bool isSelectSupported(SelectSupportKind) const override

Definition AMDGPUISelLowering.cpp:815

bool isZExtFree(Type *Src, Type *Dest) const override

Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...

Definition AMDGPUISelLowering.cpp:1019

SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2911

SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override

This hook must be implemented to lower calls into the specified DAG.

Definition AMDGPUISelLowering.cpp:1421

SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4291

SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3066

SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2668

bool mayIgnoreSignedZero(SDValue Op) const

Definition AMDGPUISelLowering.cpp:645

SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2686

bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const final

Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...

Definition AMDGPUISelLowering.cpp:874

std::pair< SDValue, SDValue > splitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HighVT, SelectionDAG &DAG) const

Split a vector value into two parts of types LoVT and HiVT.

Definition AMDGPUISelLowering.cpp:1827

AMDGPUTargetLowering(const TargetMachine &TM, const TargetSubtargetInfo &STI, const AMDGPUSubtarget &AMDGPUSTI)

Definition AMDGPUISelLowering.cpp:62

SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2767

SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, SDValue N) const

Definition AMDGPUISelLowering.cpp:4801

SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const

Definition AMDGPUISelLowering.cpp:3295

bool isFAbsFree(EVT VT) const override

Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...

Definition AMDGPUISelLowering.cpp:965

SDValue loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const

Similar to CreateLiveInRegister, except value maybe loaded from a stack slot rather than passed in a ...

Definition AMDGPUISelLowering.cpp:5590

SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2726

static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)

Definition AMDGPUISelLowering.cpp:41

SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override

Hooks for building estimates in place of slower divisions and square roots.

Definition AMDGPUISelLowering.cpp:5671

SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4395

SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3486

static SDValue stripBitcast(SDValue Val)

SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const

Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.

Definition AMDGPUISelLowering.cpp:5554

SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const

Split a vector store into 2 stores of half the vector.

Definition AMDGPUISelLowering.cpp:1956

SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3235

SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, NegatibleCost &Cost, unsigned Depth) const override

Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...

Definition AMDGPUISelLowering.cpp:930

std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const

Return 64-bit value Op as two 32-bit integers.

Definition AMDGPUISelLowering.cpp:1778

SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4517

SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override

Return a reciprocal estimate value for the input operand.

Definition AMDGPUISelLowering.cpp:5689

SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2558

SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3759

static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)

Definition AMDGPUISelLowering.cpp:1351

std::pair< SDValue, SDValue > getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Op, SDNodeFlags Flags) const

If denormal handling is required return the scaled input to FLOG2, and the check for denormal range.

Definition AMDGPUISelLowering.cpp:2703

static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)

Selects the correct CCAssignFn for a given CallingConvention value.

Definition AMDGPUISelLowering.cpp:1346

static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)

Definition AMDGPUISelLowering.cpp:774

SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2530

bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override

Returns true if the target can instruction select the specified FP immediate natively.

Definition AMDGPUISelLowering.cpp:821

static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)

Definition AMDGPUISelLowering.cpp:52

SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const

Definition AMDGPUISelLowering.cpp:2978

SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2484

SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1426

static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags)

Definition AMDGPUISelLowering.cpp:2654

bool ShouldShrinkFPConstant(EVT VT) const override

If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...

Definition AMDGPUISelLowering.cpp:829

SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override

This hook must be implemented to lower outgoing return values, described by the Outs array,...

Definition AMDGPUISelLowering.cpp:1329

SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:3949

void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override

This callback is invoked when a node result type is illegal for the target, and the operation was reg...

Definition AMDGPUISelLowering.cpp:1484

SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:5296

SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1792

SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:3205

SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:5271

SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const

Definition AMDGPUISelLowering.cpp:3535

static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc)

Definition AMDGPUISelLowering.cpp:4988

bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override

Return true if it's profitable to narrow operations of type SrcVT to DestVT.

Definition AMDGPUISelLowering.cpp:1041

SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2567

SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4029

SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:2329

SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:4602

SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override

This method will be invoked for all target nodes and for any target-independent nodes that the target...

Definition AMDGPUISelLowering.cpp:5308

void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const

Definition AMDGPUISelLowering.cpp:2115

SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const

Widen a suitably aligned v3 load.

Definition AMDGPUISelLowering.cpp:1922

std::pair< EVT, EVT > getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const

Split a vector type into two parts.

Definition AMDGPUISelLowering.cpp:1812

SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const

Definition AMDGPUISelLowering.cpp:1800

SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const

Definition AMDGPUISelLowering.cpp:1670

unsigned getVectorIdxWidth(const DataLayout &) const override

Returns the type to be used for the index operand vector operations.

Definition AMDGPUISelLowering.cpp:811

static const fltSemantics & IEEEsingle()

static const fltSemantics & IEEEdouble()

static constexpr roundingMode rmNearestTiesToEven

static const fltSemantics & IEEEhalf()

bool bitwiseIsEqual(const APFloat &RHS) const

opStatus add(const APFloat &RHS, roundingMode RM)

const fltSemantics & getSemantics() const

opStatus multiply(const APFloat &RHS, roundingMode RM)

static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)

Returns the smallest (by magnitude) normalized finite number in the given semantics.

static APFloat getInf(const fltSemantics &Sem, bool Negative=false)

Factory for Positive and Negative Infinity.

Class for arbitrary precision integers.

uint64_t getZExtValue() const

Get zero extended value.

void setHighBits(unsigned hiBits)

Set the top hiBits bits.

void setBitsFrom(unsigned loBit)

Set the top bits starting from loBit.

static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)

Get a value with a block of bits set.

bool ule(const APInt &RHS) const

Unsigned less or equal comparison.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

void setLowBits(unsigned loBits)

Set the bottom loBits bits.

This class represents an incoming formal argument to a Function.

CCState - This class holds information needed while lowering arguments and return values.

static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)

const APFloat & getValueAPF() const

bool isNegative() const

Return true if the value is negative.

uint64_t getZExtValue() const

A parsed version of the target data layout string in and methods for querying it.

Diagnostic information for unsupported feature in backend.

const DataLayout & getDataLayout() const

Get the data layout of the module this function belongs to.

iterator_range< arg_iterator > args()

CallingConv::ID getCallingConv() const

getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...

LLVMContext & getContext() const

getContext - Return a reference to the LLVMContext associated with this function.

Type * getValueType() const

This is an important class for using LLVM in a threaded context.

LLVM_ABI void diagnose(const DiagnosticInfo &DI)

Report a message to the currently installed diagnostic handler.

This class is used to represent ISD::LOAD nodes.

const SDValue & getBasePtr() const

static auto integer_fixedlen_vector_valuetypes()

unsigned getVectorNumElements() const

bool isVector() const

Return true if this is a vector value type.

bool isInteger() const

Return true if this is an integer or a vector integer type.

static auto integer_valuetypes()

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

MVT getScalarType() const

If this is a vector, return the element type, otherwise return this.

The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.

LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)

Create a new object at a fixed location on the stack.

int64_t getObjectSize(int ObjectIdx) const

Return the size of the specified object.

int64_t getObjectOffset(int ObjectIdx) const

Return the assigned stack offset of the specified object from the incoming stack pointer.

int getObjectIndexBegin() const

Return the minimum frame object index.

MachineFrameInfo & getFrameInfo()

getFrameInfo - Return the frame info object for the current function.

DenormalMode getDenormalMode(const fltSemantics &FPType) const

Returns the denormal handling type for the default rounding mode of the function.

MachineRegisterInfo & getRegInfo()

getRegInfo - Return information about the registers currently in use.

Function & getFunction()

Return the LLVM function that this machine code represents.

Ty * getInfo()

getInfo - Keep track of various per-function pieces of information for backends that would like to do...

Representation of each machine instruction.

A description of a memory reference used in the backend.

@ MODereferenceable

The memory access is dereferenceable (i.e., doesn't trap).

@ MOInvariant

The memory access always returns the same value (or traps).

Flags getFlags() const

Return the raw flags of the source value,.

MachineRegisterInfo - Keep track of information for virtual and physical registers,...

This is an abstract virtual class for memory operations.

unsigned getAddressSpace() const

Return the address space for the associated pointer.

bool isSimple() const

Returns true if the memory operation is neither atomic or volatile.

MachineMemOperand * getMemOperand() const

Return a MachineMemOperand object describing the memory reference performed by operation.

const SDValue & getChain() const

EVT getMemoryVT() const

Return the type of the in-memory value.

Wrapper class representing virtual and physical registers.

Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...

const DebugLoc & getDebugLoc() const

Represents one node in the SelectionDAG.

ArrayRef< SDUse > ops() const

unsigned getOpcode() const

Return the SelectionDAG opcode value for this node.

bool hasOneUse() const

Return true if there is exactly one use of this node.

SDNodeFlags getFlags() const

SDVTList getVTList() const

const SDValue & getOperand(unsigned Num) const

uint64_t getConstantOperandVal(unsigned Num) const

Helper method returns the integer value of a ConstantSDNode operand.

iterator_range< user_iterator > users()

Represents a use of a SDNode.

Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.

SDNode * getNode() const

get the SDNode which holds the desired result

bool hasOneUse() const

Return true if there is exactly one node using value ResNo of Node.

SDValue getValue(unsigned R) const

EVT getValueType() const

Return the ValueType of the referenced return value.

TypeSize getValueSizeInBits() const

Returns the size of the value in bits.

const SDValue & getOperand(unsigned i) const

unsigned getOpcode() const

unsigned getNumOperands() const

This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...

SIModeRegisterDefaults getMode() const

This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...

LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

LLVM_ABI unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const

Get the upper bound on bit size for this Value Op as a signed integer.

const SDValue & getRoot() const

Return the root tag of the SelectionDAG.

LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)

Create a MERGE_VALUES node from the given operands.

LLVM_ABI SDVTList getVTList(EVT VT)

Return an SDVTList that represents the list of values specified.

LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)

LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)

LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())

Append the extracted elements from Start to Count out of the vector Op in Args.

LLVM_ABI SDValue getFreeze(SDValue V)

Return a freeze using the SDLoc of the value operand.

SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)

Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...

LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)

Create a ConstantFPSDNode wrapping a constant value.

LLVM_ABI SDValue getRegister(Register Reg, EVT VT)

LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)

Loads are not normal binary operators: their result type is not determined by their operands,...

LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)

Create a bitwise NOT operation as (XOR Val, -1).

const TargetLowering & getTargetLoweringInfo() const

SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)

Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).

SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)

Return an ISD::BUILD_VECTOR node.

LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)

Return a bitcast using the SDLoc of the value operand, and casting to the provided type.

SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)

SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())

Helper function to make it easier to build Select's if you just have operands and don't want to check...

LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)

Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.

const DataLayout & getDataLayout() const

LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

Create a ConstantSDNode wrapping a constant value.

LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)

Modify anything using 'From' to use 'To' instead.

LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())

Helper function to build ISD::STORE nodes.

LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)

SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)

Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...

bool isConstantValueOfAnyType(SDValue N) const

SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())

Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...

LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...

LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

LLVM_ABI SDValue getValueType(EVT)

LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)

Gets or creates the specified node.

LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const

Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...

SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)

LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const

Return the number of times the sign bit of the register is replicated into the other bits.

LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)

LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)

Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.

MachineFunction & getMachineFunction() const

SDValue getPOISON(EVT VT)

Return a POISON node. POISON does not have a useful SDLoc.

LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)

LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const

Determine which bits of Op are known to be either zero or one and return them in Known.

LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)

Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...

LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const

Return true if 'Op & Mask' is known to be zero.

SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)

Create an add instruction with appropriate flags when used for addressing some offset of an object.

LLVMContext * getContext() const

const SDValue & setRoot(SDValue N)

Set the current root tag of the SelectionDAG.

LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)

Mutate the specified node in-place to have the specified operands.

SDValue getEntryNode() const

Return the token chain corresponding to the entry of the function.

LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)

Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This class is used to represent ISD::STORE nodes.

const SDValue & getBasePtr() const

const SDValue & getValue() const

StringRef - Represent a constant reference to a string, i.e.

void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)

Indicate that the specified operation does not work with the specified type and indicate what to do a...

void setMaxDivRemBitWidthSupported(unsigned SizeInBits)

Set the size in bits of the maximum div/rem the backend supports.

bool PredictableSelectIsExpensive

Tells the code generator that select is more expensive than a branch if the branch is usually predict...

virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT, std::optional< unsigned > ByteOffset=std::nullopt) const

Return true if it is profitable to reduce a load to a smaller type.

unsigned MaxStoresPerMemcpyOptSize

Likewise for functions with the OptSize attribute.

const TargetMachine & getTargetMachine() const

virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const

Certain targets require unusual breakdowns of certain types.

unsigned MaxGluedStoresPerMemcpy

Specify max number of store instructions to glue in inlined memcpy.

virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const

Certain combinations of ABIs, Targets and features require that types are legal for some operations a...

void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)

Tells the code generator which bitwidths to bypass.

void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits)

Set the size in bits of the maximum fp to/from int conversion the backend supports.

void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)

Set the maximum atomic operation size supported by the backend.

SelectSupportKind

Enum that describes what type of support for selects the target has.

virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const

Determine if the target supports unaligned memory accesses.

unsigned MaxStoresPerMemsetOptSize

Likewise for functions with the OptSize attribute.

EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const

Returns the type for the shift amount of a shift opcode.

unsigned MaxStoresPerMemmove

Specify maximum number of store instructions per memmove call.

virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const

Return the ValueType of the result of SETCC operations.

virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const

For types supported by the target, this is an identity function.

unsigned MaxStoresPerMemmoveOptSize

Likewise for functions with the OptSize attribute.

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

void setSupportsUnalignedAtomics(bool UnalignedSupported)

Sets whether unaligned atomic operations are supported.

bool isOperationLegal(unsigned Op, EVT VT) const

Return true if the specified operation is legal on this target.

unsigned MaxStoresPerMemset

Specify maximum number of store instructions per memset call.

void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)

Indicate that the specified truncating store does not work with the specified type and indicate what ...

void setMinCmpXchgSizeInBits(unsigned SizeInBits)

Sets the minimum cmpxchg or ll/sc size supported by the backend.

void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)

If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...

void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)

Targets should invoke this method for each target independent node that they want to provide a custom...

void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)

Indicate that the specified load with extension does not work with the specified type and indicate wh...

unsigned GatherAllAliasesMaxDepth

Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...

NegatibleCost

Enum that specifies when a float negation is beneficial.

bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const

This function returns true if the memory access is aligned or if the target allows this specific unal...

unsigned MaxStoresPerMemcpy

Specify maximum number of store instructions per memcpy call.

void setSchedulingPreference(Sched::Preference Pref)

Specify the target scheduling preference.

void setJumpIsExpensive(bool isExpensive=true)

Tells the code generator not to expand logic operations on comparison predicates into separate sequen...

This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...

SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const

SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const

More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...

SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const

Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.

bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const

Check to see if the specified operand of the specified instruction is a constant integer.

std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const

Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.

virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const

Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...

std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const

Turn load of vector type into a load of the individual elements.

bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const

Look at Op.

TargetLowering(const TargetLowering &)=delete

virtual bool canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const

Return true if Op can create undef or poison from non-undef & non-poison operands.

Primary interface to the complete machine description for the target machine.

TargetSubtargetInfo - Generic base class for all target subtargets.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

The instances of the Type class are immutable: once they are created, they are never changed.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

LLVM Value Representation.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ CONSTANT_ADDRESS_32BIT

Address space for 32-bit constant memory.

@ REGION_ADDRESS

Address space for region memory. (GDS)

@ LOCAL_ADDRESS

Address space for local memory.

@ CONSTANT_ADDRESS

Address space for constant memory (VTX2).

@ GLOBAL_ADDRESS

Address space for global memory (RAT0, VTX0).

bool isIntrinsicAlwaysUniform(unsigned IntrID)

TargetExtType * isNamedBarrier(const GlobalVariable &GV)

bool isUniformMMO(const MachineMemOperand *MMO)

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ AMDGPU_CS

Used for Mesa/AMDPAL compute shaders.

@ AMDGPU_VS

Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...

@ AMDGPU_KERNEL

Used for AMDGPU code object kernels.

@ AMDGPU_Gfx

Used for AMD graphics targets.

@ AMDGPU_CS_ChainPreserve

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_HS

Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).

@ AMDGPU_GS

Used for Mesa/AMDPAL geometry shaders.

@ AMDGPU_CS_Chain

Used on AMDGPUs to give the middle-end more control over argument placement.

@ AMDGPU_PS

Used for Mesa/AMDPAL pixel shaders.

@ Cold

Attempts to make code in the caller as efficient as possible under the assumption that the call is no...

@ SPIR_KERNEL

Used for SPIR kernel functions.

@ Fast

Attempts to make calls as fast as possible (e.g.

@ AMDGPU_ES

Used for AMDPAL shader stage before geometry shader if geometry is in use.

@ AMDGPU_LS

Used for AMDPAL vertex shader if tessellation is in use.

@ C

The default llvm calling convention, compatible with C.

NodeType

ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.

@ SETCC

SetCC operator - This evaluates to a true value iff the condition is true.

@ SMUL_LOHI

SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...

@ INSERT_SUBVECTOR

INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.

@ BSWAP

Byte Swap and Counting operators.

@ ADDC

Carry-setting nodes for multiple precision addition and subtraction.

@ FMAD

FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.

@ ADD

Simple integer binary arithmetic operators.

@ ANY_EXTEND

ANY_EXTEND - Used for integer types. The high bits are undefined.

@ FMA

FMA - Perform a * b + c with no intermediate rounding step.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ CONCAT_VECTORS

CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...

@ FADD

Simple binary floating point operators.

@ SDIVREM

SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.

@ BUILD_PAIR

BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.

@ SIGN_EXTEND

Conversion operators.

@ CTTZ_ZERO_UNDEF

Bit counting operators with an undefined result for zero inputs.

@ FCANONICALIZE

Returns platform specific canonical encoding of a floating point number.

@ IS_FPCLASS

Performs a check of floating point class property, defined by IEEE-754.

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

@ EXTRACT_ELEMENT

EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...

@ MULHU

MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...

@ SHL

Shift and rotation operations.

@ VECTOR_SHUFFLE

VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.

@ EXTRACT_SUBVECTOR

EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.

@ EntryToken

EntryToken - This is the marker used to indicate the start of a region.

@ EXTRACT_VECTOR_ELT

EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...

@ CopyToReg

CopyToReg - This node has three operands: a chain, a register number to set to this value,...

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ SELECT_CC

Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...

@ SIGN_EXTEND_INREG

SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...

@ SMIN

[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.

@ VSELECT

Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...

@ UADDO_CARRY

Carry-using nodes for multiple precision addition and subtraction.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ INTRINSIC_WO_CHAIN

RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...

@ ADDE

Carry-using nodes for multiple precision addition and subtraction.

@ INSERT_VECTOR_ELT

INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.

@ TokenFactor

TokenFactor - This node takes multiple tokens as input and produces a single token result.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

@ AssertSext

AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...

@ FCOPYSIGN

FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.

@ INTRINSIC_W_CHAIN

RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...

@ BUILD_VECTOR

BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...

bool isNormalStore(const SDNode *N)

Returns true if the specified node is a non-truncating and unindexed store.

CondCode

ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...

LoadExtType

LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).

bool isNormalLoad(const SDNode *N)

Returns true if the specified node is a non-extending and unindexed load.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

MaybeAlign getAlign(const CallInst &I, unsigned Index)

LLVM_ABI bool isNullConstant(SDValue V)

Returns true if V is a constant integer zero.

void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs=nullptr, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())

ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)

CCAssignFn - This function assigns a location for Val, updating State to reflect the change.

LLVM_ABI ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)

Returns the SDNode if it is a constant splat BuildVector or constant float.

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

int countl_zero(T Val)

Count number of 0's from the most significant bit to the least stopping at the first 1.

decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)

constexpr uint32_t Hi_32(uint64_t Value)

Return the high 32 bits of a 64 bit value.

constexpr uint32_t Lo_32(uint64_t Value)

Return the low 32 bits of a 64 bit value.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

To bit_cast(const From &from) noexcept

@ Mul

Product of integers.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)

Returns the SDNode if it is a constant splat BuildVector or constant int.

constexpr unsigned BitWidth

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

LLVM_ABI bool isOneConstant(SDValue V)

Returns true if V is a constant integer one.

Align commonAlignment(Align A, uint64_t Offset)

Returns the alignment that satisfies both alignments.

APFloat neg(APFloat X)

Returns the negated value of the argument.

unsigned Log2(Align A)

Returns the log2 of the alignment.

static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))

LLVM_ABI bool isAllOnesConstant(SDValue V)

Returns true if V is an integer constant with all bits set.

LLVM_ABI void reportFatalUsageError(Error Err)

Report a fatal error that does not indicate a bug in LLVM.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

MCRegister getRegister() const

unsigned getStackOffset() const

DenormalModeKind Input

Denormal treatment kind for floating point instruction inputs in the default floating-point environme...

@ PreserveSign

The sign of a flushed-to-zero number is preserved in the sign of 0.

static constexpr DenormalMode getPreserveSign()

TypeSize getStoreSize() const

Return the number of bytes overwritten by a store of the specified value type.

EVT getPow2VectorType(LLVMContext &Context) const

Widens the length of the given vector EVT up to the nearest power of 2 and returns that type.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)

Returns the EVT that represents a vector NumElements in length, where each element is of type VT.

EVT changeTypeToInteger() const

Return the type converted to an equivalently sized integer or vector with integer element type.

bool isFloatingPoint() const

Return true if this is a FP or a vector FP type.

EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

bool isByteSized() const

Return true if the bit size is a multiple of 8.

uint64_t getScalarSizeInBits() const

EVT getHalfSizedIntegerVT(LLVMContext &Context) const

Finds the smallest simple value type that is greater than or equal to half the width of this EVT.

bool isPow2VectorType() const

Returns true if the given vector is a power of 2.

TypeSize getStoreSizeInBits() const

Return the number of bits overwritten by a store of the specified value type.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)

Returns the EVT that represents an integer with the given number of bits.

uint64_t getFixedSizeInBits() const

Return the size of the specified fixed width value type in bits.

EVT getRoundIntegerType(LLVMContext &Context) const

Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...

bool isVector() const

Return true if this is a vector value type.

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

bool bitsGE(EVT VT) const

Return true if this has no less bits than VT.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

bool isExtended() const

Test if the given EVT is extended (as opposed to being simple).

EVT changeVectorElementType(EVT EltVT) const

Return a VT for a vector type whose attributes match ourselves with the exception of the element type...

LLVM_ABI const fltSemantics & getFltSemantics() const

Returns an APFloat semantics tag appropriate for the value type.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

bool bitsLE(EVT VT) const

Return true if this has no more bits than VT.

bool isInteger() const

Return true if this is an integer or a vector integer type.

InputArg - This struct carries flags and type information about a single incoming (formal) argument o...

MVT VT

Legalized type of this argument part.

bool isNonNegative() const

Returns true if this value is known to be non-negative.

unsigned countMinTrailingZeros() const

Returns the minimum number of trailing zero bits.

bool isUnknown() const

Returns true if we don't know any bits.

KnownBits trunc(unsigned BitWidth) const

Return known bits for a truncation of the value we're tracking.

unsigned getBitWidth() const

Get the bit width of this value.

void resetAll()

Resets the known state of all bits.

unsigned countMaxActiveBits() const

Returns the maximum number of bits needed to represent all possible unsigned values with these known ...

unsigned countMinLeadingZeros() const

Returns the minimum number of leading zero bits.

APInt getMaxValue() const

Return the maximal unsigned value possible given these KnownBits.

APInt getMinValue() const

Return the minimal unsigned value possible given these KnownBits.

bool isStrictlyPositive() const

Returns true if this value is known to be positive.

bool isNegative() const

Returns true if this value is known to be negative.

unsigned countMaxSignificantBits() const

Returns the maximum number of bits needed to represent all possible signed values with these known bi...

This class contains a discriminated union of information about pointers in memory operands,...

LLVM_ABI bool isDereferenceable(unsigned Size, LLVMContext &C, const DataLayout &DL) const

Return true if memory region [V, V+Offset+Size) is known to be dereferenceable.

static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)

Stack pointer relative access.

MachinePointerInfo getWithOffset(int64_t O) const

These are IR-level optimization flags that may be propagated to SDNodes.

void setAllowContract(bool b)

This represents a list of ValueType's that has been intern'd by a SelectionDAG.

DenormalMode FP32Denormals

If this is set, neither input or output denormals are flushed for most f32 instructions.

This structure contains all information that is necessary for lowering calls.

SmallVector< ISD::InputArg, 32 > Ins

bool isBeforeLegalizeOps() const

CombineLevel getDAGCombineLevel()

LLVM_ABI void AddToWorklist(SDNode *N)

bool isCalledByLegalizer() const

bool isBeforeLegalize() const

LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)

LLVM_ABI void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)

A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...