AMDGPUInstCombineIntrinsic.cpp Source File (original) (raw)

22#include "llvm/IR/IntrinsicsAMDGPU.h"

24#include

26using namespace llvm;

29#define DEBUG_TYPE "AMDGPUtti"

31namespace {

33struct AMDGPUImageDMaskIntrinsic {

34 unsigned Intr;

35};

37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL

38#include "AMDGPUGenSearchableTables.inc"

40}

53 return maxnum(Src1, Src2);

58 return maxnum(Src0, Src2);

60 return maxnum(Src0, Src1);

61}

68 Type *VTy = V.getType();

71 return false;

72 }

73 if (IsFloat) {

77 APFloat FloatValue(ConstFloat->getValueAPF());

78 bool LosesInfo = true;

80 &LosesInfo);

81 return !LosesInfo;

82 }

83 } else {

87 APInt IntValue(ConstInt->getValue());

89 }

90 }

95 if (IsExt) {

98 return true;

99 }

100

101 return false;

102}

103

104

106 Type *VTy = V.getType();

110 return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);

112 return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));

113

115}

116

117

118

119

124 Func) {

127 return std::nullopt;

128

130

131

132 Func(Args, ArgTys);

133

139

140

143

144 bool RemoveOldIntr = &OldIntr != &InstToReplace;

145

147 if (RemoveOldIntr)

149

150 return RetValue;

151}

152

153static std::optional<Instruction *>

157

158 if (const auto *LZMappingInfo =

160 if (auto *ConstantLod =

162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {

165 ImageDimIntr->Dim);

167 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);

169 });

170 }

171 }

172 }

173

174

175 if (const auto *MIPMappingInfo =

177 if (auto *ConstantMip =

179 if (ConstantMip->isZero()) {

182 ImageDimIntr->Dim);

184 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);

186 });

187 }

188 }

189 }

190

191

192 if (const auto *BiasMappingInfo =

194 if (auto *ConstantBias =

196 if (ConstantBias->isZero()) {

199 ImageDimIntr->Dim);

201 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);

203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);

204 });

205 }

206 }

207 }

208

209

210 if (const auto *OffsetMappingInfo =

212 if (auto *ConstantOffset =

214 if (ConstantOffset->isZero()) {

217 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);

219 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);

221 });

222 }

223 }

224 }

225

226

227 if (ST->hasD16Images()) {

228

231

232 if (BaseOpcode->HasD16) {

233

234

235

236

237 if (II.hasOneUse()) {

239

240 if (User->getOpcode() == Instruction::FPTrunc &&

242

244 [&](auto &Args, auto &ArgTys) {

245

246

247 ArgTys[0] = User->getType();

248 });

249 }

250 }

251

252

253

255 ExtractTruncPairs;

256 bool AllHalfExtracts = true;

257

258 for (User *U : II.users()) {

260 if (!Ext || !Ext->hasOneUse()) {

261 AllHalfExtracts = false;

262 break;

263 }

264

266 if (!Tr || !Tr->getType()->isHalfTy()) {

267 AllHalfExtracts = false;

268 break;

269 }

270

272 }

273

274 if (!ExtractTruncPairs.empty() && AllHalfExtracts) {

276 Type *HalfVecTy =

278

279

280

283 SigTys[0] = HalfVecTy;

284

288

289 II.mutateType(HalfVecTy);

290 II.setCalledFunction(HalfDecl);

291

293 for (auto &[Ext, Tr] : ExtractTruncPairs) {

294 Value *Idx = Ext->getIndexOperand();

295

296 Builder.SetInsertPoint(Tr);

297

298 Value *HalfExtract = Builder.CreateExtractElement(&II, Idx);

300

301 Tr->replaceAllUsesWith(HalfExtract);

302 }

303

304 for (auto &[Ext, Tr] : ExtractTruncPairs) {

307 }

308

309 return &II;

310 }

311 }

312 }

313

314

315 if (!ST->hasA16() && !ST->hasG16())

316 return std::nullopt;

317

318

319

320 bool HasSampler =

322 bool FloatCoord = false;

323

324 bool OnlyDerivatives = false;

325

326 for (unsigned OperandIndex = ImageDimIntr->GradientStart;

327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {

328 Value *Coord = II.getOperand(OperandIndex);

329

331 if (OperandIndex < ImageDimIntr->CoordStart ||

333 return std::nullopt;

334 }

335

336 OnlyDerivatives = true;

337 break;

338 }

339

343 }

344

345 if (!OnlyDerivatives && !ST->hasA16())

346 OnlyDerivatives = true;

347

348

349 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {

352 "Only image instructions with a sampler can have a bias");

354 OnlyDerivatives = true;

355 }

356

357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==

359 return std::nullopt;

360

363

365 II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {

366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;

367 if (!OnlyDerivatives) {

368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;

369

370

371 if (ImageDimIntr->NumBiasArgs != 0)

372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());

373 }

374

375 unsigned EndIndex =

378 OperandIndex < EndIndex; OperandIndex++) {

379 Args[OperandIndex] =

380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);

381 }

382

383

384 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {

385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);

386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);

387 }

388 });

389}

390

394

395

396

397

398

401

402 return true;

403 }

404

407

408 return true;

409 }

410 return false;

411}

412

413

415 Value *Src = nullptr;

418 if (Src->getType()->isHalfTy())

419 return Src;

421 bool LosesInfo;

424 if (!LosesInfo)

426 }

427 return nullptr;

428}

429

430

431

435 unsigned VWidth = VTy->getNumElements();

437

438 for (int i = VWidth - 1; i > 0; --i) {

440 if (!Elt)

441 break;

442

445 break;

446 } else {

447 break;

448 }

449

451 }

452

453 return DemandedElts;

454}

455

456

457

460 unsigned VWidth = VTy->getNumElements();

463

466 SVI->getShuffleMask(ShuffleMask);

467

468 for (int I = VWidth - 1; I > 0; --I) {

469 if (ShuffleMask.empty()) {

471 if (!Elt || (Elt != FirstComponent && isa<UndefValue> (Elt)))

472 break;

473 } else {

474

475

476 if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)

477 break;

478 }

480 }

481

482 return DemandedElts;

483}

484

487 APInt DemandedElts,

488 int DMaskIdx = -1,

489 bool IsLoad = true);

490

491

497

498

500 Value *V = U.get();

502 return true;

507 return false;

508

509

511 }

512 return false;

513}

514

515

516

517

520 unsigned LaneArgIdx) const {

521 unsigned MaskBits = ST->getWavefrontSizeLog2();

523

526 return true;

527

529 return false;

530

531

532

533

534

535 Value *LaneArg = II.getArgOperand(LaneArgIdx);

537 ConstantInt::get(LaneArg->getType(), Known.getConstant() & DemandedMask);

538 if (MaskedConst != LaneArg) {

539 II.getOperandUse(LaneArgIdx).set(MaskedConst);

540 return true;

541 }

542

543 return false;

544}

545

550

551 CallInst *NewCall = B.CreateCall(&NewCallee, Ops, OpBundles);

553 return NewCall;

554}

555

559 const auto IID = II.getIntrinsicID();

560 assert(IID == Intrinsic::amdgcn_readlane ||

561 IID == Intrinsic::amdgcn_readfirstlane ||

562 IID == Intrinsic::amdgcn_permlane64);

563

565

566

567

568

570 return nullptr;

571

572 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);

573

574

575

576 Value *LaneID = nullptr;

577 if (IsReadLane) {

578 LaneID = II.getOperand(1);

579

580

581

582

585 return nullptr;

586 }

587 }

588

589

590

591

592 const auto DoIt = [&](unsigned OpIdx,

595 if (IsReadLane)

596 Ops.push_back(LaneID);

597

598

600

601

604 return &NewOp;

605 };

606

607

608 if (IID == Intrinsic::amdgcn_permlane64 && isa<BitCastInst> (OpInst))

609 return nullptr;

610

612 return DoIt(0, II.getCalledFunction());

613

616 Type *SrcTy = Src->getType();

618 return nullptr;

619

622 return DoIt(0, Remangled);

623 }

624

625

627

628

630 return DoIt(1, II.getCalledFunction());

632 return DoIt(0, II.getCalledFunction());

633 }

634

635 return nullptr;

636}

637

638std::optional<Instruction *>

641 switch (IID) {

642 case Intrinsic::amdgcn_rcp: {

643 Value *Src = II.getArgOperand(0);

646

647

649 Type *Ty = II.getType();

650 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));

652 }

653

654 if (II.isStrictFP())

655 break;

656

658 const APFloat &ArgVal = C->getValueAPF();

661

662

663

664

665

667 }

668

671 break;

673 if (!SrcCI)

674 break;

675

676 auto IID = SrcCI->getIntrinsicID();

677

678

679

680

681 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {

684 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())

685 break;

686

688 break;

689

691 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});

692

693 InnerFMF |= FMF;

694 II.setFastMathFlags(InnerFMF);

695

696 II.setCalledFunction(NewDecl);

698 }

699

700 break;

701 }

702 case Intrinsic::amdgcn_sqrt:

703 case Intrinsic::amdgcn_rsq:

704 case Intrinsic::amdgcn_tanh: {

705 Value *Src = II.getArgOperand(0);

708

709

711 Type *Ty = II.getType();

712 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));

714 }

715

716

717 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {

719 II.getModule(), Intrinsic::sqrt, {II.getType()});

720 II.setCalledFunction(NewDecl);

721 return &II;

722 }

723

724 break;

725 }

726 case Intrinsic::amdgcn_log:

727 case Intrinsic::amdgcn_exp2: {

728 const bool IsLog = IID == Intrinsic::amdgcn_log;

729 const bool IsExp = IID == Intrinsic::amdgcn_exp2;

730 Value *Src = II.getArgOperand(0);

731 Type *Ty = II.getType();

732

735

738

740 if (C->isInfinity()) {

741

742

743 if (->isNegative())

745

746

747 if (IsExp && C->isNegative())

749 }

750

751 if (II.isStrictFP())

752 break;

753

754 if (C->isNaN()) {

755 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());

757 }

758

759

760 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {

762 : ConstantFP::get(Ty, 1.0);

764 }

765

766 if (IsLog && C->isNegative())

768

769

770 }

771

772 break;

773 }

774 case Intrinsic::amdgcn_frexp_mant:

775 case Intrinsic::amdgcn_frexp_exp: {

776 Value *Src = II.getArgOperand(0);

778 int Exp;

781

782 if (IID == Intrinsic::amdgcn_frexp_mant) {

784 II, ConstantFP::get(II.getContext(), Significand));

785 }

786

787

789 Exp = 0;

790

792 }

793

796

799 }

800

801 break;

802 }

803 case Intrinsic::amdgcn_class: {

804 Value *Src0 = II.getArgOperand(0);

805 Value *Src1 = II.getArgOperand(1);

807 if (CMask) {

809 II.getModule(), Intrinsic::is_fpclass, Src0->getType()));

810

811

812 II.setArgOperand(1, ConstantInt::get(Src1->getType(),

814 return &II;

815 }

816

817

820

821

824

825

830 }

831 break;

832 }

833 case Intrinsic::amdgcn_cvt_pkrtz: {

834 auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {

836

841

844 bool LosesInfo;

847 return ConstantFP::get(HalfTy, Val);

848 }

849

850 Value *Src = nullptr;

852 if (Src->getType()->isHalfTy())

853 return Src;

854 }

855

856 return nullptr;

857 };

858

859 if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {

860 if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {

865 }

866 }

867

868 break;

869 }

870 case Intrinsic::amdgcn_cvt_pknorm_i16:

871 case Intrinsic::amdgcn_cvt_pknorm_u16:

872 case Intrinsic::amdgcn_cvt_pk_i16:

873 case Intrinsic::amdgcn_cvt_pk_u16: {

874 Value *Src0 = II.getArgOperand(0);

875 Value *Src1 = II.getArgOperand(1);

876

877

880

883 }

884

885 break;

886 }

887 case Intrinsic::amdgcn_cvt_off_f32_i4: {

888 Value* Arg = II.getArgOperand(0);

889 Type *Ty = II.getType();

890

893

896

898 if (!CArg)

899 break;

900

901

902 constexpr size_t ResValsSize = 16;

903 static constexpr float ResVals[ResValsSize] = {

904 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,

905 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};

907 ConstantFP::get(Ty, ResVals[CArg->getZExtValue() & (ResValsSize - 1)]);

909 }

910 case Intrinsic::amdgcn_ubfe:

911 case Intrinsic::amdgcn_sbfe: {

912

913 Value *Src = II.getArgOperand(0);

916 }

917

918 unsigned Width;

919 Type *Ty = II.getType();

920 unsigned IntSize = Ty->getIntegerBitWidth();

921

923 if (CWidth) {

925 if ((Width & (IntSize - 1)) == 0) {

927 }

928

929

930 if (Width >= IntSize) {

932 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));

933 }

934 }

935

938 if (COffset) {

940 if (Offset >= IntSize) {

942 II, 1,

943 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));

944 }

945 }

946

947 bool Signed = IID == Intrinsic::amdgcn_sbfe;

948

949 if (!CWidth || !COffset)

950 break;

951

952

953

954

956

957

958

959 if (Offset + Width < IntSize) {

963 RightShift->takeName(&II);

965 }

966

969

970 RightShift->takeName(&II);

972 }

973 case Intrinsic::amdgcn_exp:

974 case Intrinsic::amdgcn_exp_row:

975 case Intrinsic::amdgcn_exp_compr: {

978 if (EnBits == 0xf)

979 break;

980

981 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;

983 for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {

984 if ((!IsCompr && (EnBits & (1 << I)) == 0) ||

985 (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {

986 Value *Src = II.getArgOperand(I + 2);

990 }

991 }

992 }

993

995 return &II;

996 }

997

998 break;

999 }

1000 case Intrinsic::amdgcn_fmed3: {

1001 Value *Src0 = II.getArgOperand(0);

1002 Value *Src1 = II.getArgOperand(1);

1003 Value *Src2 = II.getArgOperand(2);

1004

1005 for (Value *Src : {Src0, Src1, Src2}) {

1008 }

1009

1010 if (II.isStrictFP())

1011 break;

1012

1013

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027

1028

1029

1030

1031

1032

1033

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046 Value *V = nullptr;

1047 const APFloat *ConstSrc0 = nullptr;

1048 const APFloat *ConstSrc1 = nullptr;

1049 const APFloat *ConstSrc2 = nullptr;

1050

1054 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->isPosInfinity();

1057

1058 if (ConstSrc0 && ConstSrc0->isNaN() && ConstSrc0->isSignaling())

1060

1063 break;

1067 break;

1069 break;

1070 }

1074 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->isPosInfinity();

1077

1078 if (ConstSrc1 && ConstSrc1->isNaN() && ConstSrc1->isSignaling())

1080

1083 break;

1087 break;

1089 break;

1090 }

1096 if (ConstSrc2 && ConstSrc2->isNaN() && ConstSrc2->isSignaling()) {

1097 auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());

1099 }

1100

1104 break;

1109 break;

1111 break;

1112 }

1113 }

1114

1115 if (V) {

1117 CI->copyFastMathFlags(&II);

1118 CI->takeName(&II);

1119 }

1121 }

1122

1123 bool Swap = false;

1124

1125

1126

1129 Swap = true;

1130 }

1131

1134 Swap = true;

1135 }

1136

1139 Swap = true;

1140 }

1141

1142 if (Swap) {

1143 II.setArgOperand(0, Src0);

1144 II.setArgOperand(1, Src1);

1145 II.setArgOperand(2, Src2);

1146 return &II;

1147 }

1148

1153 C2->getValueAPF());

1155 ConstantFP::get(II.getType(), Result));

1156 }

1157 }

1158 }

1159

1160 if (!ST->hasMed3_16())

1161 break;

1162

1163

1164

1169 IID, {X->getType()}, {X, Y, Z}, &II, II.getName());

1170 return new FPExtInst(NewCall, II.getType());

1171 }

1172 }

1173 }

1174

1175 break;

1176 }

1177 case Intrinsic::amdgcn_icmp:

1178 case Intrinsic::amdgcn_fcmp: {

1180

1182 bool IsInteger = IID == Intrinsic::amdgcn_icmp;

1187 break;

1188

1189 Value *Src0 = II.getArgOperand(0);

1190 Value *Src1 = II.getArgOperand(1);

1191

1199 }

1200

1201

1202

1203

1204

1205

1210 II.getType(), Args);

1211 NewCall->addFnAttr(Attribute::Convergent);

1214 }

1215

1216

1219 II.setArgOperand(0, Src1);

1220 II.setArgOperand(1, Src0);

1221 II.setArgOperand(

1222 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));

1223 return &II;

1224 }

1225

1227 break;

1228

1229

1230

1231

1232

1233

1244 return &II;

1245 }

1246

1250

1251

1252

1253

1254

1255

1256

1257

1258

1259

1260

1267

1269 ? Intrinsic::amdgcn_fcmp

1270 : Intrinsic::amdgcn_icmp;

1271

1274

1275 unsigned Width = CmpType->getBitWidth();

1276 unsigned NewWidth = Width;

1277

1278

1279 if (Width == 1)

1280 break;

1281

1282 if (Width <= 16)

1283 NewWidth = 16;

1284 else if (Width <= 32)

1285 NewWidth = 32;

1286 else if (Width <= 64)

1287 NewWidth = 64;

1288 else

1289 break;

1290

1291 if (Width != NewWidth) {

1296 } else {

1299 }

1300 }

1301 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())

1302 break;

1303

1304 Value *Args[] = {SrcLHS, SrcRHS,

1305 ConstantInt::get(CC->getType(), SrcPred)};

1307 NewIID, {II.getType(), SrcLHS->getType()}, Args);

1310 }

1311

1312 break;

1313 }

1314 case Intrinsic::amdgcn_mbcnt_hi: {

1315

1316 if (ST->isWave32())

1318 break;

1319 }

1320 case Intrinsic::amdgcn_ballot: {

1321 Value *Arg = II.getArgOperand(0);

1324

1326 if (Src->isZero()) {

1327

1329 }

1330 }

1331 if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {

1332

1333

1334

1335

1338 {IC.Builder.getInt32Ty()},

1339 {II.getArgOperand(0)}),

1340 II.getType());

1341 Call->takeName(&II);

1343 }

1344 break;

1345 }

1346 case Intrinsic::amdgcn_wavefrontsize: {

1347 if (ST->isWaveSizeKnown())

1349 II, ConstantInt::get(II.getType(), ST->getWavefrontSize()));

1350 break;

1351 }

1352 case Intrinsic::amdgcn_wqm_vote: {

1353

1355 break;

1356

1358 }

1359 case Intrinsic::amdgcn_kill: {

1361 if ( || ->getZExtValue())

1362 break;

1363

1364

1366 }

1367 case Intrinsic::amdgcn_update_dpp: {

1368 Value *Old = II.getArgOperand(0);

1369

1373 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||

1375 break;

1376

1377

1379 }

1380 case Intrinsic::amdgcn_permlane16:

1381 case Intrinsic::amdgcn_permlane16_var:

1382 case Intrinsic::amdgcn_permlanex16:

1383 case Intrinsic::amdgcn_permlanex16_var: {

1384

1385 Value *VDstIn = II.getArgOperand(0);

1387 break;

1388

1389

1390 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||

1391 IID == Intrinsic::amdgcn_permlanex16)

1392 ? 4

1393 : 3;

1394

1395

1396

1397

1398 unsigned int BcIdx = FiIdx + 1;

1399

1403 break;

1404

1406 }

1407 case Intrinsic::amdgcn_permlane64:

1408 case Intrinsic::amdgcn_readfirstlane:

1409 case Intrinsic::amdgcn_readlane:

1410 case Intrinsic::amdgcn_ds_bpermute: {

1411

1412 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;

1413 const Use &Src = II.getArgOperandUse(SrcIdx);

1416

1417 if (IID == Intrinsic::amdgcn_readlane &&

1419 return &II;

1420

1421

1422

1423

1424 if (IID == Intrinsic::amdgcn_ds_bpermute) {

1425 const Use &Lane = II.getArgOperandUse(0);

1429 II.getModule(), Intrinsic::amdgcn_readlane, II.getType());

1430 II.setCalledFunction(NewDecl);

1431 II.setOperand(0, Src);

1432 II.setOperand(1, NewLane);

1433 return &II;

1434 }

1435 }

1436

1437 if (IID != Intrinsic::amdgcn_ds_bpermute) {

1439 return Res;

1440 }

1441

1442 return std::nullopt;

1443 }

1444 case Intrinsic::amdgcn_writelane: {

1445

1447 return &II;

1448 return std::nullopt;

1449 }

1450 case Intrinsic::amdgcn_trig_preop: {

1451

1452

1453 if (.getType()->isDoubleTy())

1454 break;

1455

1456 Value *Src = II.getArgOperand(0);

1457 Value *Segment = II.getArgOperand(1);

1460

1462 auto *QNaN = ConstantFP::get(

1465 }

1466

1468 if (!Csrc)

1469 break;

1470

1471 if (II.isStrictFP())

1472 break;

1473

1475 if (Fsrc.isNaN()) {

1476 auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet());

1478 }

1479

1481 if (!Cseg)

1482 break;

1483

1486 unsigned Shift = SegmentVal * 53;

1489

1490

1491 static const uint32_t TwoByPi[] = {

1492 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,

1493 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,

1494 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,

1495 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,

1496 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,

1497 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,

1498 0x56033046};

1499

1500

1501 unsigned Idx = Shift >> 5;

1502 if (Idx + 2 >= std::size(TwoByPi)) {

1505 }

1506

1507 unsigned BShift = Shift & 0x1f;

1510 if (BShift)

1511 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));

1512 Thi = Thi >> 11;

1514

1515 int Scale = -53 - Shift;

1517 Scale += 128;

1518

1521 }

1522 case Intrinsic::amdgcn_fmul_legacy: {

1523 Value *Op0 = II.getArgOperand(0);

1524 Value *Op1 = II.getArgOperand(1);

1525

1526 for (Value *Src : {Op0, Op1}) {

1529 }

1530

1531

1532

1533

1537

1538

1539

1542 FMul->takeName(&II);

1544 }

1545 break;

1546 }

1547 case Intrinsic::amdgcn_fma_legacy: {

1548 Value *Op0 = II.getArgOperand(0);

1549 Value *Op1 = II.getArgOperand(1);

1550 Value *Op2 = II.getArgOperand(2);

1551

1552 for (Value *Src : {Op0, Op1, Op2}) {

1555 }

1556

1557

1558

1559

1562

1563

1566 FAdd->takeName(&II);

1568 }

1569

1570

1571

1574 II.getModule(), Intrinsic::fma, II.getType()));

1575 return &II;

1576 }

1577 break;

1578 }

1579 case Intrinsic::amdgcn_is_shared:

1580 case Intrinsic::amdgcn_is_private: {

1581 Value *Src = II.getArgOperand(0);

1586

1589 break;

1590 }

1591 case Intrinsic::amdgcn_make_buffer_rsrc: {

1592 Value *Src = II.getArgOperand(0);

1595 return std::nullopt;

1596 }

1597 case Intrinsic::amdgcn_raw_buffer_store_format:

1598 case Intrinsic::amdgcn_struct_buffer_store_format:

1599 case Intrinsic::amdgcn_raw_tbuffer_store:

1600 case Intrinsic::amdgcn_struct_tbuffer_store:

1601 case Intrinsic::amdgcn_image_store_1d:

1602 case Intrinsic::amdgcn_image_store_1darray:

1603 case Intrinsic::amdgcn_image_store_2d:

1604 case Intrinsic::amdgcn_image_store_2darray:

1605 case Intrinsic::amdgcn_image_store_2darraymsaa:

1606 case Intrinsic::amdgcn_image_store_2dmsaa:

1607 case Intrinsic::amdgcn_image_store_3d:

1608 case Intrinsic::amdgcn_image_store_cube:

1609 case Intrinsic::amdgcn_image_store_mip_1d:

1610 case Intrinsic::amdgcn_image_store_mip_1darray:

1611 case Intrinsic::amdgcn_image_store_mip_2d:

1612 case Intrinsic::amdgcn_image_store_mip_2darray:

1613 case Intrinsic::amdgcn_image_store_mip_3d:

1614 case Intrinsic::amdgcn_image_store_mip_cube: {

1616 break;

1617

1618 APInt DemandedElts;

1619 if (ST->hasDefaultComponentBroadcast())

1621 else if (ST->hasDefaultComponentZero())

1623 else

1624 break;

1625

1626 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;

1628 false)) {

1630 }

1631

1632 break;

1633 }

1634 case Intrinsic::amdgcn_prng_b32: {

1635 auto *Src = II.getArgOperand(0);

1638 }

1639 return std::nullopt;

1640 }

1641 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:

1642 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {

1643 Value *Src0 = II.getArgOperand(0);

1644 Value *Src1 = II.getArgOperand(1);

1649

1650 auto getFormatNumRegs = [](unsigned FormatVal) {

1651 switch (FormatVal) {

1654 return 6u;

1656 return 4u;

1659 return 8u;

1660 default:

1662 }

1663 };

1664

1665 bool MadeChange = false;

1666 unsigned Src0NumElts = getFormatNumRegs(CBSZ);

1667 unsigned Src1NumElts = getFormatNumRegs(BLGP);

1668

1669

1670

1671 if (Src0Ty->getNumElements() > Src0NumElts) {

1675 MadeChange = true;

1676 }

1677

1678 if (Src1Ty->getNumElements() > Src1NumElts) {

1682 MadeChange = true;

1683 }

1684

1685 if (!MadeChange)

1686 return std::nullopt;

1687

1689 Args[0] = Src0;

1690 Args[1] = Src1;

1691

1696 }

1697 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:

1698 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:

1699 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {

1700 Value *Src0 = II.getArgOperand(1);

1701 Value *Src1 = II.getArgOperand(3);

1706

1707 bool MadeChange = false;

1710

1711

1712

1713 if (Src0Ty->getNumElements() > Src0NumElts) {

1717 MadeChange = true;

1718 }

1719

1720 if (Src1Ty->getNumElements() > Src1NumElts) {

1724 MadeChange = true;

1725 }

1726

1727 if (!MadeChange)

1728 return std::nullopt;

1729

1731 Args[1] = Src0;

1732 Args[3] = Src1;

1733

1735 IID, {II.getArgOperand(5)->getType(), Src0->getType(), Src1->getType()},

1736 Args, &II);

1739 }

1740 }

1744 }

1745 return std::nullopt;

1746}

1747

1748

1749

1750

1751

1752

1753

1754

1757 APInt DemandedElts,

1758 int DMaskIdx, bool IsLoad) {

1759

1761 : II.getOperand(0)->getType());

1762 unsigned VWidth = IIVTy->getNumElements();

1763 if (VWidth == 1)

1764 return nullptr;

1765 Type *EltTy = IIVTy->getElementType();

1766

1769

1770

1772

1773 if (DMaskIdx < 0) {

1774

1775

1776 const unsigned ActiveBits = DemandedElts.getActiveBits();

1777 const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();

1778

1779

1780

1781

1782 DemandedElts = (1 << ActiveBits) - 1;

1783

1784 if (UnusedComponentsAtFront > 0) {

1785 static const unsigned InvalidOffsetIdx = 0xf;

1786

1787 unsigned OffsetIdx;

1788 switch (II.getIntrinsicID()) {

1789 case Intrinsic::amdgcn_raw_buffer_load:

1790 case Intrinsic::amdgcn_raw_ptr_buffer_load:

1791 OffsetIdx = 1;

1792 break;

1793 case Intrinsic::amdgcn_s_buffer_load:

1794

1795

1796

1797 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)

1798 OffsetIdx = InvalidOffsetIdx;

1799 else

1800 OffsetIdx = 1;

1801 break;

1802 case Intrinsic::amdgcn_struct_buffer_load:

1803 case Intrinsic::amdgcn_struct_ptr_buffer_load:

1804 OffsetIdx = 2;

1805 break;

1806 default:

1807

1808 OffsetIdx = InvalidOffsetIdx;

1809 break;

1810 }

1811

1812 if (OffsetIdx != InvalidOffsetIdx) {

1813

1814 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);

1815 auto *Offset = Args[OffsetIdx];

1816 unsigned SingleComponentSizeInBits =

1818 unsigned OffsetAdd =

1819 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;

1820 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);

1822 }

1823 }

1824 } else {

1825

1826

1828 unsigned DMaskVal = DMask->getZExtValue() & 0xf;

1829

1830

1831 if (DMaskVal == 0)

1832 return nullptr;

1833

1834

1835 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;

1836

1837 unsigned NewDMaskVal = 0;

1838 unsigned OrigLdStIdx = 0;

1839 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {

1840 const unsigned Bit = 1 << SrcIdx;

1841 if (!!(DMaskVal & Bit)) {

1842 if (!!DemandedElts[OrigLdStIdx])

1843 NewDMaskVal |= Bit;

1844 OrigLdStIdx++;

1845 }

1846 }

1847

1848 if (DMaskVal != NewDMaskVal)

1849 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);

1850 }

1851

1852 unsigned NewNumElts = DemandedElts.popcount();

1853 if (!NewNumElts)

1855

1856 if (NewNumElts >= VWidth && DemandedElts.isMask()) {

1857 if (DMaskIdx >= 0)

1858 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);

1859 return nullptr;

1860 }

1861

1862

1863

1866 return nullptr;

1867

1868 Type *NewTy =

1870 OverloadTys[0] = NewTy;

1871

1872 if (!IsLoad) {

1874 for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)

1875 if (DemandedElts[OrigStoreIdx])

1876 EltMask.push_back(OrigStoreIdx);

1877

1878 if (NewNumElts == 1)

1880 else

1882 }

1883

1888

1889 if (IsLoad) {

1890 if (NewNumElts == 1) {

1893 }

1894

1896 unsigned NewLoadIdx = 0;

1897 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {

1898 if (!!DemandedElts[OrigLoadIdx])

1899 EltMask.push_back(NewLoadIdx++);

1900 else

1902 }

1903

1905

1906 return Shuffle;

1907 }

1908

1909 return NewCall;

1910}

1911

1914 APInt &UndefElts) const {

1916 if (!VT)

1917 return nullptr;

1918

1919 const unsigned FirstElt = DemandedElts.countr_zero();

1920 const unsigned LastElt = DemandedElts.getActiveBits() - 1;

1921 const unsigned MaskLen = LastElt - FirstElt + 1;

1922

1923 unsigned OldNumElts = VT->getNumElements();

1924 if (MaskLen == OldNumElts && MaskLen != 1)

1925 return nullptr;

1926

1927 Type *EltTy = VT->getElementType();

1929

1930

1931

1933 return nullptr;

1934

1935 Value *Src = II.getArgOperand(0);

1936

1937

1938

1940 II.getOperandBundlesAsDefs(OpBundles);

1941

1945

1946 if (MaskLen == 1) {

1948

1949

1951

1953 NewCall, FirstElt);

1954 }

1955

1957 for (unsigned I = 0; I != MaskLen; ++I) {

1958 if (DemandedElts[FirstElt + I])

1959 ExtractMask[I] = FirstElt + I;

1960 }

1961

1963

1964

1966

1968 for (unsigned I = 0; I != MaskLen; ++I) {

1969 if (DemandedElts[FirstElt + I])

1970 InsertMask[FirstElt + I] = I;

1971 }

1972

1973

1974

1976}

1977

1980 APInt &UndefElts2, APInt &UndefElts3,

1982 SimplifyAndSetOp) const {

1983 switch (II.getIntrinsicID()) {

1984 case Intrinsic::amdgcn_readfirstlane:

1985 SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

1987 case Intrinsic::amdgcn_raw_buffer_load:

1988 case Intrinsic::amdgcn_raw_ptr_buffer_load:

1989 case Intrinsic::amdgcn_raw_buffer_load_format:

1990 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:

1991 case Intrinsic::amdgcn_raw_tbuffer_load:

1992 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:

1993 case Intrinsic::amdgcn_s_buffer_load:

1994 case Intrinsic::amdgcn_struct_buffer_load:

1995 case Intrinsic::amdgcn_struct_ptr_buffer_load:

1996 case Intrinsic::amdgcn_struct_buffer_load_format:

1997 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:

1998 case Intrinsic::amdgcn_struct_tbuffer_load:

1999 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:

2001 default: {

2002 if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {

2004 }

2005 break;

2006 }

2007 }

2008 return std::nullopt;

2009}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)

Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)

Definition AMDGPUInstCombineIntrinsic.cpp:492

static bool isTriviallyUniform(const Use &U)

Return true if we can easily prove that use U is uniform.

Definition AMDGPUInstCombineIntrinsic.cpp:499

static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)

Definition AMDGPUInstCombineIntrinsic.cpp:546

static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)

Definition AMDGPUInstCombineIntrinsic.cpp:105

static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)

Definition AMDGPUInstCombineIntrinsic.cpp:432

static APInt defaultComponentBroadcast(Value *V)

Definition AMDGPUInstCombineIntrinsic.cpp:458

static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)

Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...

Definition AMDGPUInstCombineIntrinsic.cpp:120

static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)

Definition AMDGPUInstCombineIntrinsic.cpp:46

static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)

Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.

Definition AMDGPUInstCombineIntrinsic.cpp:1755

static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)

Definition AMDGPUInstCombineIntrinsic.cpp:154

static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)

Definition AMDGPUInstCombineIntrinsic.cpp:67

static Value * matchFPExtFromF16(Value *Arg)

Match an fpext from half to float, or a constant we can convert.

Definition AMDGPUInstCombineIntrinsic.cpp:414

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

This file a TargetTransformInfoImplBase conforming object specific to the AMDGPU target machine.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Utilities for dealing with flags related to floating point properties and mode controls.

AMD GCN specific subclass of TargetSubtarget.

This file provides the interface for the instcombine pass implementation.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

MachineInstr unsigned OpIdx

uint64_t IntrinsicInst * II

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

cmpResult

IEEE-754R 5.11: Floating Point Comparison Relations.

static constexpr roundingMode rmTowardZero

static constexpr roundingMode rmNearestTiesToEven

static const fltSemantics & IEEEhalf()

static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)

Factory for QNaN values.

opStatus divide(const APFloat &RHS, roundingMode RM)

LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)

bool isPosInfinity() const

const fltSemantics & getSemantics() const

APFloat makeQuiet() const

Assuming this is an IEEE-754 NaN value, quiet its signaling bit.

APInt bitcastToAPInt() const

bool isNegInfinity() const

static APFloat getZero(const fltSemantics &Sem, bool Negative=false)

Factory for Positive and Negative Zero.

cmpResult compare(const APFloat &RHS) const

Class for arbitrary precision integers.

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

uint64_t getZExtValue() const

Get zero extended value.

unsigned popcount() const

Count the number of bits set.

unsigned getActiveBits() const

Compute the number of active bits in the value.

LLVM_ABI APInt trunc(unsigned width) const

Truncate to new width.

unsigned countr_zero() const

Count the number of trailing zero bits.

bool isMask(unsigned numBits) const

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

LLVM_ABI const Module * getModule() const

Return the module owning the function this basic block belongs to, or nullptr if the function does no...

bool isTypeLegal(Type *Ty) const override

void addFnAttr(Attribute::AttrKind Kind)

Adds the attribute to the function.

LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const

Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

This class represents a function call, abstracting a target machine's calling convention.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Predicate getSwappedPredicate() const

For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.

bool isFPPredicate() const

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

ConstantFP - Floating Point Values [float, double].

const APFloat & getValueAPF() const

static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)

static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)

static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)

This is the shared class of boolean and integer constants.

static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

const APInt & getValue() const

Return the constant as an APInt value reference.

This is an important base class in LLVM.

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

LLVM_ABI bool isNullValue() const

Return true if this is the value that would be returned by getNullValue.

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

This class represents an extension of floating point types.

Utility class for floating point operations which can have information about relaxed accuracy require...

FastMathFlags getFastMathFlags() const

Convenience function for getting all the fast-math flags.

bool hasApproxFunc() const

Test if this operation allows approximations of math library functions or intrinsics.

LLVM_ABI float getFPAccuracy() const

Get the maximum error permitted by this operation in ULPs.

Convenience struct for specifying and reasoning about fast-math flags.

bool allowContract() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const

Simplify a lane index operand (e.g.

Definition AMDGPUInstCombineIntrinsic.cpp:518

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

Definition AMDGPUInstCombineIntrinsic.cpp:639

Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const

Definition AMDGPUInstCombineIntrinsic.cpp:557

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

Definition AMDGPUInstCombineIntrinsic.cpp:1978

KnownIEEEMode fpenvIEEEMode(const Instruction &I) const

Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...

Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const

Definition AMDGPUInstCombineIntrinsic.cpp:1912

bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const

Definition AMDGPUInstCombineIntrinsic.cpp:391

Common base class shared among various IRBuilders.

CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")

Create a call to the vector.extract intrinsic.

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

IntegerType * getIntNTy(unsigned N)

Fetch the type representing an N-bit integer.

Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

BasicBlock * GetInsertBlock() const

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")

Create call to the maxnum intrinsic.

Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")

Create call to the maximum intrinsic.

Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")

Create call to the minnum intrinsic.

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)

Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")

Create call to the minimumnum intrinsic.

Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

The core instruction combiner logic.

const DataLayout & getDataLayout() const

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy

An IRBuilder that automatically inserts new instructions into the worklist.

DominatorTree & getDominatorTree() const

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

const SimplifyQuery & getSimplifyQuery() const

LLVM_ABI Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)

Convenience function for transferring all fast-math flag values to this instruction,...

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

Class to represent integer types.

A wrapper class for inspecting calls to intrinsic functions.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)

A Module instance is used to store all the information related to an LLVM module.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

LLVM_ABI Type * getWithNewType(Type *EltTy) const

Given vector type, change the element type, whilst keeping the old number of elements.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI Type * getHalfTy(LLVMContext &C)

bool isVoidTy() const

Return true if this is 'void'.

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI bool hasOneUser() const

Return true if there is exactly one user of this value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

const ParentTy * getParent() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)

uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)

const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)

LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)

bool isArgPassedInSGPR(const Argument *A)

bool isIntrinsicAlwaysUniform(unsigned IntrID)

LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)

LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)

@ C

The default llvm calling convention, compatible with C.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)

Gets the type arguments of an intrinsic call by matching type contraints specified by the ....

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

bool match(Val *V, const Pattern &P)

cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()

Match a floating-point negative zero or positive zero.

ap_match< APFloat > m_APFloat(const APFloat *&Res)

Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)

class_match< ConstantFP > m_ConstantFP()

Match an arbitrary ConstantFP and ignore it.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()

Match a finite non-zero FP constant.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)

Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.

constexpr int popcount(T Value) noexcept

Count the number of set bits in a value.

APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)

Equivalent of C standard library function.

LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)

Implements IEEE-754 2008 maxNum semantics.

APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)

Returns: X * 2^Exp for integral exponents.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

constexpr int PoisonMaskElem

LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

@ NearestTiesToEven

roundTiesToEven.

LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)

Return true if the floating-point value can never contain a NaN or infinity.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

constexpr uint64_t Make_64(uint32_t High, uint32_t Low)

Make a 64-bit integer from a high / low pair of 32-bit integers.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

bool isConstant() const

Returns true if we know the value of all bits.

const APInt & getConstant() const

Returns the value when all bits have a known value.

SimplifyQuery getWithInstruction(const Instruction *I) const

LLVM_ABI bool isUndefValue(Value *V) const

If CanUseUndef is true, returns whether V is undef.