AMDGPUInstCombineIntrinsic.cpp Source File (original) (raw)

22#include "llvm/IR/IntrinsicsAMDGPU.h"

24#include

26using namespace llvm;

29#define DEBUG_TYPE "AMDGPUtti"

31namespace {

33struct AMDGPUImageDMaskIntrinsic {

34 unsigned Intr;

35};

37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL

38#include "AMDGPUGenSearchableTables.inc"

40}

53 return maxnum(Src1, Src2);

58 return maxnum(Src0, Src2);

60 return maxnum(Src0, Src1);

61}

68 Type *VTy = V.getType();

71 return false;

72 }

73 if (IsFloat) {

77 APFloat FloatValue(ConstFloat->getValueAPF());

78 bool LosesInfo = true;

80 &LosesInfo);

81 return !LosesInfo;

82 }

83 } else {

87 APInt IntValue(ConstInt->getValue());

89 }

90 }

95 if (IsExt) {

98 return true;

99 }

100

101 return false;

102}

103

104

106 Type *VTy = V.getType();

110 return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);

112 return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));

113

115}

116

117

118

119

124 Func) {

127 return std::nullopt;

128

130

131

132 Func(Args, ArgTys);

133

139

140

143

144 bool RemoveOldIntr = &OldIntr != &InstToReplace;

145

147 if (RemoveOldIntr)

149

150 return RetValue;

151}

152

153static std::optional<Instruction *>

157

158 if (const auto *LZMappingInfo =

160 if (auto *ConstantLod =

162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {

165 ImageDimIntr->Dim);

167 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);

169 });

170 }

171 }

172 }

173

174

175 if (const auto *MIPMappingInfo =

177 if (auto *ConstantMip =

179 if (ConstantMip->isZero()) {

182 ImageDimIntr->Dim);

184 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);

186 });

187 }

188 }

189 }

190

191

192 if (const auto *BiasMappingInfo =

194 if (auto *ConstantBias =

196 if (ConstantBias->isZero()) {

199 ImageDimIntr->Dim);

201 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);

203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);

204 });

205 }

206 }

207 }

208

209

210 if (const auto *OffsetMappingInfo =

212 if (auto *ConstantOffset =

214 if (ConstantOffset->isZero()) {

217 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);

219 II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {

220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);

221 });

222 }

223 }

224 }

225

226

227 if (ST->hasD16Images()) {

228

231

232 if (BaseOpcode->HasD16) {

233

234

235

236

237 if (II.hasOneUse()) {

239

240 if (User->getOpcode() == Instruction::FPTrunc &&

242

244 [&](auto &Args, auto &ArgTys) {

245

246

247 ArgTys[0] = User->getType();

248 });

249 }

250 }

251

252

253

255 ExtractTruncPairs;

256 bool AllHalfExtracts = true;

257

258 for (User *U : II.users()) {

260 if (!Ext || !Ext->hasOneUse()) {

261 AllHalfExtracts = false;

262 break;

263 }

264

266 if (!Tr || !Tr->getType()->isHalfTy()) {

267 AllHalfExtracts = false;

268 break;

269 }

270

272 }

273

274 if (!ExtractTruncPairs.empty() && AllHalfExtracts) {

276 Type *HalfVecTy =

278

279

280

283 SigTys[0] = HalfVecTy;

284

288

289 II.mutateType(HalfVecTy);

290 II.setCalledFunction(HalfDecl);

291

293 for (auto &[Ext, Tr] : ExtractTruncPairs) {

294 Value *Idx = Ext->getIndexOperand();

295

296 Builder.SetInsertPoint(Tr);

297

298 Value *HalfExtract = Builder.CreateExtractElement(&II, Idx);

300

301 Tr->replaceAllUsesWith(HalfExtract);

302 }

303

304 for (auto &[Ext, Tr] : ExtractTruncPairs) {

307 }

308

309 return &II;

310 }

311 }

312 }

313

314

315 if (!ST->hasA16() && !ST->hasG16())

316 return std::nullopt;

317

318

319

320 bool HasSampler =

322 bool FloatCoord = false;

323

324 bool OnlyDerivatives = false;

325

326 for (unsigned OperandIndex = ImageDimIntr->GradientStart;

327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {

328 Value *Coord = II.getOperand(OperandIndex);

329

331 if (OperandIndex < ImageDimIntr->CoordStart ||

333 return std::nullopt;

334 }

335

336 OnlyDerivatives = true;

337 break;

338 }

339

343 }

344

345 if (!OnlyDerivatives && !ST->hasA16())

346 OnlyDerivatives = true;

347

348

349 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {

352 "Only image instructions with a sampler can have a bias");

354 OnlyDerivatives = true;

355 }

356

357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==

359 return std::nullopt;

360

363

365 II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {

366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;

367 if (!OnlyDerivatives) {

368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;

369

370

371 if (ImageDimIntr->NumBiasArgs != 0)

372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());

373 }

374

375 unsigned EndIndex =

378 OperandIndex < EndIndex; OperandIndex++) {

379 Args[OperandIndex] =

380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);

381 }

382

383

384 if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {

385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);

386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);

387 }

388 });

389}

390

394

395

396

397

398

401

402 return true;

403 }

404

407

408 return true;

409 }

410 return false;

411}

412

413

415 Value *Src = nullptr;

418 if (Src->getType()->isHalfTy())

419 return Src;

421 bool LosesInfo;

424 if (!LosesInfo)

426 }

427 return nullptr;

428}

429

430

431

435 unsigned VWidth = VTy->getNumElements();

437

438 for (int i = VWidth - 1; i > 0; --i) {

440 if (!Elt)

441 break;

442

445 break;

446 } else {

447 break;

448 }

449

451 }

452

453 return DemandedElts;

454}

455

456

457

460 unsigned VWidth = VTy->getNumElements();

463

466 SVI->getShuffleMask(ShuffleMask);

467

468 for (int I = VWidth - 1; I > 0; --I) {

469 if (ShuffleMask.empty()) {

471 if (!Elt || (Elt != FirstComponent && isa<UndefValue> (Elt)))

472 break;

473 } else {

474

475

476 if (ShuffleMask[I] != ShuffleMask[0] && ShuffleMask[I] != PoisonMaskElem)

477 break;

478 }

480 }

481

482 return DemandedElts;

483}

484

487 APInt DemandedElts,

488 int DMaskIdx = -1,

489 bool IsLoad = true);

490

491

497

498

500 Value *V = U.get();

502 return true;

507 return false;

508

509

511 }

512 return false;

513}

514

515

516

517

520 unsigned LaneArgIdx) const {

521 unsigned MaskBits = ST->getWavefrontSizeLog2();

523

526 return true;

527

529 return false;

530

531

532

533

534

535 Value *LaneArg = II.getArgOperand(LaneArgIdx);

537 ConstantInt::get(LaneArg->getType(), Known.getConstant() & DemandedMask);

538 if (MaskedConst != LaneArg) {

539 II.getOperandUse(LaneArgIdx).set(MaskedConst);

540 return true;

541 }

542

543 return false;

544}

545

550

551 CallInst *NewCall = B.CreateCall(&NewCallee, Ops, OpBundles);

553 return NewCall;

554}

555

559 const auto IID = II.getIntrinsicID();

560 assert(IID == Intrinsic::amdgcn_readlane ||

561 IID == Intrinsic::amdgcn_readfirstlane ||

562 IID == Intrinsic::amdgcn_permlane64);

563

565

566

567

568

570 return nullptr;

571

572 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);

573

574

575

576 Value *LaneID = nullptr;

577 if (IsReadLane) {

578 LaneID = II.getOperand(1);

579

580

581

582

585 return nullptr;

586 }

587 }

588

589

590

591

592 const auto DoIt = [&](unsigned OpIdx,

595 if (IsReadLane)

596 Ops.push_back(LaneID);

597

598

600

601

604 return &NewOp;

605 };

606

607

608 if (IID == Intrinsic::amdgcn_permlane64 && isa<BitCastInst> (OpInst))

609 return nullptr;

610

612 return DoIt(0, II.getCalledFunction());

613

616 Type *SrcTy = Src->getType();

618 return nullptr;

619

622 return DoIt(0, Remangled);

623 }

624

625

627

628

630 return DoIt(1, II.getCalledFunction());

632 return DoIt(0, II.getCalledFunction());

633 }

634

635 return nullptr;

636}

637

638std::optional<Instruction *>

641 switch (IID) {

642 case Intrinsic::amdgcn_rcp: {

643 Value *Src = II.getArgOperand(0);

646

647

649 Type *Ty = II.getType();

650 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));

652 }

653

654 if (II.isStrictFP())

655 break;

656

658 const APFloat &ArgVal = C->getValueAPF();

661

662

663

664

665

667 }

668

671 break;

673 if (!SrcCI)

674 break;

675

676 auto IID = SrcCI->getIntrinsicID();

677

678

679

680

681 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {

684 if (!InnerFMF.allowContract() || !SrcCI->hasOneUse())

685 break;

686

688 break;

689

691 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});

692

693 InnerFMF |= FMF;

694 II.setFastMathFlags(InnerFMF);

695

696 II.setCalledFunction(NewDecl);

698 }

699

700 break;

701 }

702 case Intrinsic::amdgcn_sqrt:

703 case Intrinsic::amdgcn_rsq:

704 case Intrinsic::amdgcn_tanh: {

705 Value *Src = II.getArgOperand(0);

708

709

711 Type *Ty = II.getType();

712 auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));

714 }

715

716

717 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {

719 II.getModule(), Intrinsic::sqrt, {II.getType()});

720 II.setCalledFunction(NewDecl);

721 return &II;

722 }

723

724 break;

725 }

726 case Intrinsic::amdgcn_log:

727 case Intrinsic::amdgcn_exp2: {

728 const bool IsLog = IID == Intrinsic::amdgcn_log;

729 const bool IsExp = IID == Intrinsic::amdgcn_exp2;

730 Value *Src = II.getArgOperand(0);

731 Type *Ty = II.getType();

732

735

738

740 if (C->isInfinity()) {

741

742

743 if (->isNegative())

745

746

747 if (IsExp && C->isNegative())

749 }

750

751 if (II.isStrictFP())

752 break;

753

754 if (C->isNaN()) {

755 Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());

757 }

758

759

760 if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {

762 : ConstantFP::get(Ty, 1.0);

764 }

765

766 if (IsLog && C->isNegative())

768

769

770 }

771

772 break;

773 }

774 case Intrinsic::amdgcn_frexp_mant:

775 case Intrinsic::amdgcn_frexp_exp: {

776 Value *Src = II.getArgOperand(0);

778 int Exp;

781

782 if (IID == Intrinsic::amdgcn_frexp_mant) {

784 II, ConstantFP::get(II.getContext(), Significand));

785 }

786

787

789 Exp = 0;

790

793 }

794

797

800 }

801

802 break;

803 }

804 case Intrinsic::amdgcn_class: {

805 Value *Src0 = II.getArgOperand(0);

806 Value *Src1 = II.getArgOperand(1);

808 if (CMask) {

810 II.getModule(), Intrinsic::is_fpclass, Src0->getType()));

811

812

813 II.setArgOperand(1, ConstantInt::get(Src1->getType(),

815 return &II;

816 }

817

818

821

822

825

826

831 }

832 break;

833 }

834 case Intrinsic::amdgcn_cvt_pkrtz: {

835 auto foldFPTruncToF16RTZ = [](Value *Arg) -> Value * {

837

842

845 bool LosesInfo;

848 return ConstantFP::get(HalfTy, Val);

849 }

850

851 Value *Src = nullptr;

853 if (Src->getType()->isHalfTy())

854 return Src;

855 }

856

857 return nullptr;

858 };

859

860 if (Value *Src0 = foldFPTruncToF16RTZ(II.getArgOperand(0))) {

861 if (Value *Src1 = foldFPTruncToF16RTZ(II.getArgOperand(1))) {

866 }

867 }

868

869 break;

870 }

871 case Intrinsic::amdgcn_cvt_pknorm_i16:

872 case Intrinsic::amdgcn_cvt_pknorm_u16:

873 case Intrinsic::amdgcn_cvt_pk_i16:

874 case Intrinsic::amdgcn_cvt_pk_u16: {

875 Value *Src0 = II.getArgOperand(0);

876 Value *Src1 = II.getArgOperand(1);

877

878

881

884 }

885

886 break;

887 }

888 case Intrinsic::amdgcn_cvt_off_f32_i4: {

889 Value* Arg = II.getArgOperand(0);

890 Type *Ty = II.getType();

891

894

897

899 if (!CArg)

900 break;

901

902

903 constexpr size_t ResValsSize = 16;

904 static constexpr float ResVals[ResValsSize] = {

905 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,

906 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};

908 ConstantFP::get(Ty, ResVals[CArg->getZExtValue() & (ResValsSize - 1)]);

910 }

911 case Intrinsic::amdgcn_ubfe:

912 case Intrinsic::amdgcn_sbfe: {

913

914 Value *Src = II.getArgOperand(0);

917 }

918

919 unsigned Width;

920 Type *Ty = II.getType();

921 unsigned IntSize = Ty->getIntegerBitWidth();

922

924 if (CWidth) {

926 if ((Width & (IntSize - 1)) == 0) {

928 }

929

930

931 if (Width >= IntSize) {

933 II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));

934 }

935 }

936

939 if (COffset) {

941 if (Offset >= IntSize) {

943 II, 1,

944 ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));

945 }

946 }

947

948 bool Signed = IID == Intrinsic::amdgcn_sbfe;

949

950 if (!CWidth || !COffset)

951 break;

952

953

954

955

957

958

959

960 if (Offset + Width < IntSize) {

964 RightShift->takeName(&II);

966 }

967

970

971 RightShift->takeName(&II);

973 }

974 case Intrinsic::amdgcn_exp:

975 case Intrinsic::amdgcn_exp_row:

976 case Intrinsic::amdgcn_exp_compr: {

979 if (EnBits == 0xf)

980 break;

981

982 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;

984 for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {

985 if ((!IsCompr && (EnBits & (1 << I)) == 0) ||

986 (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {

987 Value *Src = II.getArgOperand(I + 2);

991 }

992 }

993 }

994

996 return &II;

997 }

998

999 break;

1000 }

1001 case Intrinsic::amdgcn_fmed3: {

1002 Value *Src0 = II.getArgOperand(0);

1003 Value *Src1 = II.getArgOperand(1);

1004 Value *Src2 = II.getArgOperand(2);

1005

1006 for (Value *Src : {Src0, Src1, Src2}) {

1009 }

1010

1011 if (II.isStrictFP())

1012 break;

1013

1014

1015

1016

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027

1028

1029

1030

1031

1032

1033

1034

1035

1036

1037

1038

1039

1040

1041

1042

1043

1044

1045

1046

1047 Value *V = nullptr;

1048 const APFloat *ConstSrc0 = nullptr;

1049 const APFloat *ConstSrc1 = nullptr;

1050 const APFloat *ConstSrc2 = nullptr;

1051

1055 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->isPosInfinity();

1058

1059 if (ConstSrc0 && ConstSrc0->isNaN() && ConstSrc0->isSignaling())

1061

1064 break;

1068 break;

1070 break;

1071 }

1075 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->isPosInfinity();

1078

1079 if (ConstSrc1 && ConstSrc1->isNaN() && ConstSrc1->isSignaling())

1081

1084 break;

1088 break;

1090 break;

1091 }

1097 if (ConstSrc2 && ConstSrc2->isNaN() && ConstSrc2->isSignaling()) {

1098 auto *Quieted = ConstantFP::get(II.getType(), ConstSrc2->makeQuiet());

1100 }

1101

1105 break;

1110 break;

1112 break;

1113 }

1114 }

1115

1116 if (V) {

1118 CI->copyFastMathFlags(&II);

1119 CI->takeName(&II);

1120 }

1122 }

1123

1124 bool Swap = false;

1125

1126

1127

1130 Swap = true;

1131 }

1132

1135 Swap = true;

1136 }

1137

1140 Swap = true;

1141 }

1142

1143 if (Swap) {

1144 II.setArgOperand(0, Src0);

1145 II.setArgOperand(1, Src1);

1146 II.setArgOperand(2, Src2);

1147 return &II;

1148 }

1149

1154 C2->getValueAPF());

1156 ConstantFP::get(II.getType(), Result));

1157 }

1158 }

1159 }

1160

1161 if (!ST->hasMed3_16())

1162 break;

1163

1164

1165

1170 IID, {X->getType()}, {X, Y, Z}, &II, II.getName());

1171 return new FPExtInst(NewCall, II.getType());

1172 }

1173 }

1174 }

1175

1176 break;

1177 }

1178 case Intrinsic::amdgcn_icmp:

1179 case Intrinsic::amdgcn_fcmp: {

1181

1183 bool IsInteger = IID == Intrinsic::amdgcn_icmp;

1188 break;

1189

1190 Value *Src0 = II.getArgOperand(0);

1191 Value *Src1 = II.getArgOperand(1);

1192

1200 }

1201

1202

1203

1204

1205

1206

1211 II.getType(), Args);

1212 NewCall->addFnAttr(Attribute::Convergent);

1215 }

1216

1217

1220 II.setArgOperand(0, Src1);

1221 II.setArgOperand(1, Src0);

1222 II.setArgOperand(

1223 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));

1224 return &II;

1225 }

1226

1228 break;

1229

1230

1231

1232

1233

1234

1245 return &II;

1246 }

1247

1251

1252

1253

1254

1255

1256

1257

1258

1259

1260

1261

1268

1270 ? Intrinsic::amdgcn_fcmp

1271 : Intrinsic::amdgcn_icmp;

1272

1275

1276 unsigned Width = CmpType->getBitWidth();

1277 unsigned NewWidth = Width;

1278

1279

1280 if (Width == 1)

1281 break;

1282

1283 if (Width <= 16)

1284 NewWidth = 16;

1285 else if (Width <= 32)

1286 NewWidth = 32;

1287 else if (Width <= 64)

1288 NewWidth = 64;

1289 else

1290 break;

1291

1292 if (Width != NewWidth) {

1297 } else {

1300 }

1301 }

1302 } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())

1303 break;

1304

1305 Value *Args[] = {SrcLHS, SrcRHS,

1306 ConstantInt::get(CC->getType(), SrcPred)};

1308 NewIID, {II.getType(), SrcLHS->getType()}, Args);

1311 }

1312

1313 break;

1314 }

1315 case Intrinsic::amdgcn_mbcnt_hi: {

1316

1317 if (ST->isWave32())

1319 break;

1320 }

1321 case Intrinsic::amdgcn_ballot: {

1322 Value *Arg = II.getArgOperand(0);

1325

1327 if (Src->isZero()) {

1328

1330 }

1331 }

1332 if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {

1333

1334

1335

1336

1339 {IC.Builder.getInt32Ty()},

1340 {II.getArgOperand(0)}),

1341 II.getType());

1342 Call->takeName(&II);

1344 }

1345 break;

1346 }

1347 case Intrinsic::amdgcn_wavefrontsize: {

1348 if (ST->isWaveSizeKnown())

1350 II, ConstantInt::get(II.getType(), ST->getWavefrontSize()));

1351 break;

1352 }

1353 case Intrinsic::amdgcn_wqm_vote: {

1354

1356 break;

1357

1359 }

1360 case Intrinsic::amdgcn_kill: {

1362 if ( || ->getZExtValue())

1363 break;

1364

1365

1367 }

1368 case Intrinsic::amdgcn_update_dpp: {

1369 Value *Old = II.getArgOperand(0);

1370

1374 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||

1376 break;

1377

1378

1380 }

1381 case Intrinsic::amdgcn_permlane16:

1382 case Intrinsic::amdgcn_permlane16_var:

1383 case Intrinsic::amdgcn_permlanex16:

1384 case Intrinsic::amdgcn_permlanex16_var: {

1385

1386 Value *VDstIn = II.getArgOperand(0);

1388 break;

1389

1390

1391 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||

1392 IID == Intrinsic::amdgcn_permlanex16)

1393 ? 4

1394 : 3;

1395

1396

1397

1398

1399 unsigned int BcIdx = FiIdx + 1;

1400

1404 break;

1405

1407 }

1408 case Intrinsic::amdgcn_permlane64:

1409 case Intrinsic::amdgcn_readfirstlane:

1410 case Intrinsic::amdgcn_readlane:

1411 case Intrinsic::amdgcn_ds_bpermute: {

1412

1413 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;

1414 const Use &Src = II.getArgOperandUse(SrcIdx);

1417

1418 if (IID == Intrinsic::amdgcn_readlane &&

1420 return &II;

1421

1422

1423

1424

1425 if (IID == Intrinsic::amdgcn_ds_bpermute) {

1426 const Use &Lane = II.getArgOperandUse(0);

1430 II.getModule(), Intrinsic::amdgcn_readlane, II.getType());

1431 II.setCalledFunction(NewDecl);

1432 II.setOperand(0, Src);

1433 II.setOperand(1, NewLane);

1434 return &II;

1435 }

1436 }

1437

1438 if (IID != Intrinsic::amdgcn_ds_bpermute) {

1440 return Res;

1441 }

1442

1443 return std::nullopt;

1444 }

1445 case Intrinsic::amdgcn_writelane: {

1446

1448 return &II;

1449 return std::nullopt;

1450 }

1451 case Intrinsic::amdgcn_trig_preop: {

1452

1453

1454 if (.getType()->isDoubleTy())

1455 break;

1456

1457 Value *Src = II.getArgOperand(0);

1458 Value *Segment = II.getArgOperand(1);

1461

1463 auto *QNaN = ConstantFP::get(

1466 }

1467

1469 if (!Csrc)

1470 break;

1471

1472 if (II.isStrictFP())

1473 break;

1474

1476 if (Fsrc.isNaN()) {

1477 auto *Quieted = ConstantFP::get(II.getType(), Fsrc.makeQuiet());

1479 }

1480

1482 if (!Cseg)

1483 break;

1484

1487 unsigned Shift = SegmentVal * 53;

1490

1491

1492 static const uint32_t TwoByPi[] = {

1493 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,

1494 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,

1495 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,

1496 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,

1497 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,

1498 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,

1499 0x56033046};

1500

1501

1502 unsigned Idx = Shift >> 5;

1503 if (Idx + 2 >= std::size(TwoByPi)) {

1506 }

1507

1508 unsigned BShift = Shift & 0x1f;

1511 if (BShift)

1512 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));

1513 Thi = Thi >> 11;

1515

1516 int Scale = -53 - Shift;

1518 Scale += 128;

1519

1522 }

1523 case Intrinsic::amdgcn_fmul_legacy: {

1524 Value *Op0 = II.getArgOperand(0);

1525 Value *Op1 = II.getArgOperand(1);

1526

1527 for (Value *Src : {Op0, Op1}) {

1530 }

1531

1532

1533

1534

1538

1539

1540

1543 FMul->takeName(&II);

1545 }

1546 break;

1547 }

1548 case Intrinsic::amdgcn_fma_legacy: {

1549 Value *Op0 = II.getArgOperand(0);

1550 Value *Op1 = II.getArgOperand(1);

1551 Value *Op2 = II.getArgOperand(2);

1552

1553 for (Value *Src : {Op0, Op1, Op2}) {

1556 }

1557

1558

1559

1560

1563

1564

1567 FAdd->takeName(&II);

1569 }

1570

1571

1572

1575 II.getModule(), Intrinsic::fma, II.getType()));

1576 return &II;

1577 }

1578 break;

1579 }

1580 case Intrinsic::amdgcn_is_shared:

1581 case Intrinsic::amdgcn_is_private: {

1582 Value *Src = II.getArgOperand(0);

1587

1590 break;

1591 }

1592 case Intrinsic::amdgcn_make_buffer_rsrc: {

1593 Value *Src = II.getArgOperand(0);

1596 return std::nullopt;

1597 }

1598 case Intrinsic::amdgcn_raw_buffer_store_format:

1599 case Intrinsic::amdgcn_struct_buffer_store_format:

1600 case Intrinsic::amdgcn_raw_tbuffer_store:

1601 case Intrinsic::amdgcn_struct_tbuffer_store:

1602 case Intrinsic::amdgcn_image_store_1d:

1603 case Intrinsic::amdgcn_image_store_1darray:

1604 case Intrinsic::amdgcn_image_store_2d:

1605 case Intrinsic::amdgcn_image_store_2darray:

1606 case Intrinsic::amdgcn_image_store_2darraymsaa:

1607 case Intrinsic::amdgcn_image_store_2dmsaa:

1608 case Intrinsic::amdgcn_image_store_3d:

1609 case Intrinsic::amdgcn_image_store_cube:

1610 case Intrinsic::amdgcn_image_store_mip_1d:

1611 case Intrinsic::amdgcn_image_store_mip_1darray:

1612 case Intrinsic::amdgcn_image_store_mip_2d:

1613 case Intrinsic::amdgcn_image_store_mip_2darray:

1614 case Intrinsic::amdgcn_image_store_mip_3d:

1615 case Intrinsic::amdgcn_image_store_mip_cube: {

1617 break;

1618

1619 APInt DemandedElts;

1620 if (ST->hasDefaultComponentBroadcast())

1622 else if (ST->hasDefaultComponentZero())

1624 else

1625 break;

1626

1627 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;

1629 false)) {

1631 }

1632

1633 break;

1634 }

1635 case Intrinsic::amdgcn_prng_b32: {

1636 auto *Src = II.getArgOperand(0);

1639 }

1640 return std::nullopt;

1641 }

1642 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:

1643 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {

1644 Value *Src0 = II.getArgOperand(0);

1645 Value *Src1 = II.getArgOperand(1);

1650

1651 auto getFormatNumRegs = [](unsigned FormatVal) {

1652 switch (FormatVal) {

1655 return 6u;

1657 return 4u;

1660 return 8u;

1661 default:

1663 }

1664 };

1665

1666 bool MadeChange = false;

1667 unsigned Src0NumElts = getFormatNumRegs(CBSZ);

1668 unsigned Src1NumElts = getFormatNumRegs(BLGP);

1669

1670

1671

1672 if (Src0Ty->getNumElements() > Src0NumElts) {

1676 MadeChange = true;

1677 }

1678

1679 if (Src1Ty->getNumElements() > Src1NumElts) {

1683 MadeChange = true;

1684 }

1685

1686 if (!MadeChange)

1687 return std::nullopt;

1688

1690 Args[0] = Src0;

1691 Args[1] = Src1;

1692

1697 }

1698 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:

1699 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:

1700 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {

1701 Value *Src0 = II.getArgOperand(1);

1702 Value *Src1 = II.getArgOperand(3);

1707

1708 bool MadeChange = false;

1711

1712

1713

1714 if (Src0Ty->getNumElements() > Src0NumElts) {

1718 MadeChange = true;

1719 }

1720

1721 if (Src1Ty->getNumElements() > Src1NumElts) {

1725 MadeChange = true;

1726 }

1727

1728 if (!MadeChange)

1729 return std::nullopt;

1730

1732 Args[1] = Src0;

1733 Args[3] = Src1;

1734

1736 IID, {II.getArgOperand(5)->getType(), Src0->getType(), Src1->getType()},

1737 Args, &II);

1740 }

1741 }

1745 }

1746 return std::nullopt;

1747}

1748

1749

1750

1751

1752

1753

1754

1755

1758 APInt DemandedElts,

1759 int DMaskIdx, bool IsLoad) {

1760

1762 : II.getOperand(0)->getType());

1763 unsigned VWidth = IIVTy->getNumElements();

1764 if (VWidth == 1)

1765 return nullptr;

1766 Type *EltTy = IIVTy->getElementType();

1767

1770

1771

1773

1774 if (DMaskIdx < 0) {

1775

1776

1777 const unsigned ActiveBits = DemandedElts.getActiveBits();

1778 const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();

1779

1780

1781

1782

1783 DemandedElts = (1 << ActiveBits) - 1;

1784

1785 if (UnusedComponentsAtFront > 0) {

1786 static const unsigned InvalidOffsetIdx = 0xf;

1787

1788 unsigned OffsetIdx;

1789 switch (II.getIntrinsicID()) {

1790 case Intrinsic::amdgcn_raw_buffer_load:

1791 case Intrinsic::amdgcn_raw_ptr_buffer_load:

1792 OffsetIdx = 1;

1793 break;

1794 case Intrinsic::amdgcn_s_buffer_load:

1795

1796

1797

1798 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)

1799 OffsetIdx = InvalidOffsetIdx;

1800 else

1801 OffsetIdx = 1;

1802 break;

1803 case Intrinsic::amdgcn_struct_buffer_load:

1804 case Intrinsic::amdgcn_struct_ptr_buffer_load:

1805 OffsetIdx = 2;

1806 break;

1807 default:

1808

1809 OffsetIdx = InvalidOffsetIdx;

1810 break;

1811 }

1812

1813 if (OffsetIdx != InvalidOffsetIdx) {

1814

1815 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);

1816 auto *Offset = Args[OffsetIdx];

1817 unsigned SingleComponentSizeInBits =

1819 unsigned OffsetAdd =

1820 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;

1821 auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);

1823 }

1824 }

1825 } else {

1826

1827

1829 unsigned DMaskVal = DMask->getZExtValue() & 0xf;

1830

1831

1832 if (DMaskVal == 0)

1833 return nullptr;

1834

1835

1836 DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;

1837

1838 unsigned NewDMaskVal = 0;

1839 unsigned OrigLdStIdx = 0;

1840 for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {

1841 const unsigned Bit = 1 << SrcIdx;

1842 if (!!(DMaskVal & Bit)) {

1843 if (!!DemandedElts[OrigLdStIdx])

1844 NewDMaskVal |= Bit;

1845 OrigLdStIdx++;

1846 }

1847 }

1848

1849 if (DMaskVal != NewDMaskVal)

1850 Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);

1851 }

1852

1853 unsigned NewNumElts = DemandedElts.popcount();

1854 if (!NewNumElts)

1856

1857 if (NewNumElts >= VWidth && DemandedElts.isMask()) {

1858 if (DMaskIdx >= 0)

1859 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);

1860 return nullptr;

1861 }

1862

1863

1864

1867 return nullptr;

1868

1869 Type *NewTy =

1871 OverloadTys[0] = NewTy;

1872

1873 if (!IsLoad) {

1875 for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)

1876 if (DemandedElts[OrigStoreIdx])

1877 EltMask.push_back(OrigStoreIdx);

1878

1879 if (NewNumElts == 1)

1881 else

1883 }

1884

1889

1890 if (IsLoad) {

1891 if (NewNumElts == 1) {

1894 }

1895

1897 unsigned NewLoadIdx = 0;

1898 for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {

1899 if (!!DemandedElts[OrigLoadIdx])

1900 EltMask.push_back(NewLoadIdx++);

1901 else

1903 }

1904

1906

1907 return Shuffle;

1908 }

1909

1910 return NewCall;

1911}

1912

1915 APInt &UndefElts) const {

1917 if (!VT)

1918 return nullptr;

1919

1920 const unsigned FirstElt = DemandedElts.countr_zero();

1921 const unsigned LastElt = DemandedElts.getActiveBits() - 1;

1922 const unsigned MaskLen = LastElt - FirstElt + 1;

1923

1924 unsigned OldNumElts = VT->getNumElements();

1925 if (MaskLen == OldNumElts && MaskLen != 1)

1926 return nullptr;

1927

1928 Type *EltTy = VT->getElementType();

1930

1931

1932

1934 return nullptr;

1935

1936 Value *Src = II.getArgOperand(0);

1937

1938

1939

1941 II.getOperandBundlesAsDefs(OpBundles);

1942

1946

1947 if (MaskLen == 1) {

1949

1950

1952

1954 NewCall, FirstElt);

1955 }

1956

1958 for (unsigned I = 0; I != MaskLen; ++I) {

1959 if (DemandedElts[FirstElt + I])

1960 ExtractMask[I] = FirstElt + I;

1961 }

1962

1964

1965

1967

1969 for (unsigned I = 0; I != MaskLen; ++I) {

1970 if (DemandedElts[FirstElt + I])

1971 InsertMask[FirstElt + I] = I;

1972 }

1973

1974

1975

1977}

1978

1981 APInt &UndefElts2, APInt &UndefElts3,

1983 SimplifyAndSetOp) const {

1984 switch (II.getIntrinsicID()) {

1985 case Intrinsic::amdgcn_readfirstlane:

1986 SimplifyAndSetOp(&II, 0, DemandedElts, UndefElts);

1988 case Intrinsic::amdgcn_raw_buffer_load:

1989 case Intrinsic::amdgcn_raw_ptr_buffer_load:

1990 case Intrinsic::amdgcn_raw_buffer_load_format:

1991 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:

1992 case Intrinsic::amdgcn_raw_tbuffer_load:

1993 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:

1994 case Intrinsic::amdgcn_s_buffer_load:

1995 case Intrinsic::amdgcn_struct_buffer_load:

1996 case Intrinsic::amdgcn_struct_ptr_buffer_load:

1997 case Intrinsic::amdgcn_struct_buffer_load_format:

1998 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:

1999 case Intrinsic::amdgcn_struct_tbuffer_load:

2000 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:

2002 default: {

2003 if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {

2005 }

2006 break;

2007 }

2008 }

2009 return std::nullopt;

2010}

for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)

Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)

Definition AMDGPUInstCombineIntrinsic.cpp:492

static bool isTriviallyUniform(const Use &U)

Return true if we can easily prove that use U is uniform.

Definition AMDGPUInstCombineIntrinsic.cpp:499

static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)

Definition AMDGPUInstCombineIntrinsic.cpp:546

static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)

Definition AMDGPUInstCombineIntrinsic.cpp:105

static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)

Definition AMDGPUInstCombineIntrinsic.cpp:432

static APInt defaultComponentBroadcast(Value *V)

Definition AMDGPUInstCombineIntrinsic.cpp:458

static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)

Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...

Definition AMDGPUInstCombineIntrinsic.cpp:120

static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)

Definition AMDGPUInstCombineIntrinsic.cpp:46

static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)

Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.

Definition AMDGPUInstCombineIntrinsic.cpp:1756

static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)

Definition AMDGPUInstCombineIntrinsic.cpp:154

static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)

Definition AMDGPUInstCombineIntrinsic.cpp:67

static Value * matchFPExtFromF16(Value *Arg)

Match an fpext from half to float, or a constant we can convert.

Definition AMDGPUInstCombineIntrinsic.cpp:414

Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.

This file a TargetTransformInfoImplBase conforming object specific to the AMDGPU target machine.

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Utilities for dealing with flags related to floating point properties and mode controls.

AMD GCN specific subclass of TargetSubtarget.

This file provides the interface for the instcombine pass implementation.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

MachineInstr unsigned OpIdx

uint64_t IntrinsicInst * II

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

cmpResult

IEEE-754R 5.11: Floating Point Comparison Relations.

static constexpr roundingMode rmTowardZero

static constexpr roundingMode rmNearestTiesToEven

static const fltSemantics & IEEEhalf()

static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)

Factory for QNaN values.

opStatus divide(const APFloat &RHS, roundingMode RM)

LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)

bool isPosInfinity() const

const fltSemantics & getSemantics() const

APFloat makeQuiet() const

Assuming this is an IEEE-754 NaN value, quiet its signaling bit.

APInt bitcastToAPInt() const

bool isNegInfinity() const

static APFloat getZero(const fltSemantics &Sem, bool Negative=false)

Factory for Positive and Negative Zero.

cmpResult compare(const APFloat &RHS) const

Class for arbitrary precision integers.

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

uint64_t getZExtValue() const

Get zero extended value.

unsigned popcount() const

Count the number of bits set.

unsigned getActiveBits() const

Compute the number of active bits in the value.

LLVM_ABI APInt trunc(unsigned width) const

Truncate to new width.

unsigned countr_zero() const

Count the number of trailing zero bits.

bool isMask(unsigned numBits) const

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

LLVM_ABI const Module * getModule() const

Return the module owning the function this basic block belongs to, or nullptr if the function does no...

bool isTypeLegal(Type *Ty) const override

void addFnAttr(Attribute::AttrKind Kind)

Adds the attribute to the function.

LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const

Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

iterator_range< User::op_iterator > args()

Iteration adapter for range-for loops.

This class represents a function call, abstracting a target machine's calling convention.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

Predicate getSwappedPredicate() const

For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.

bool isFPPredicate() const

Predicate getInversePredicate() const

For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

ConstantFP - Floating Point Values [float, double].

const APFloat & getValueAPF() const

static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)

static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)

static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)

This is the shared class of boolean and integer constants.

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)

uint64_t getZExtValue() const

Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...

const APInt & getValue() const

Return the constant as an APInt value reference.

This is an important base class in LLVM.

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

LLVM_ABI bool isNullValue() const

Return true if this is the value that would be returned by getNullValue.

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const

Return true if the (end of the) basic block BB dominates the use U.

This class represents an extension of floating point types.

Utility class for floating point operations which can have information about relaxed accuracy require...

FastMathFlags getFastMathFlags() const

Convenience function for getting all the fast-math flags.

bool hasApproxFunc() const

Test if this operation allows approximations of math library functions or intrinsics.

LLVM_ABI float getFPAccuracy() const

Get the maximum error permitted by this operation in ULPs.

Convenience struct for specifying and reasoning about fast-math flags.

bool allowContract() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const

Simplify a lane index operand (e.g.

Definition AMDGPUInstCombineIntrinsic.cpp:518

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

Definition AMDGPUInstCombineIntrinsic.cpp:639

Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const

Definition AMDGPUInstCombineIntrinsic.cpp:557

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

Definition AMDGPUInstCombineIntrinsic.cpp:1979

KnownIEEEMode fpenvIEEEMode(const Instruction &I) const

Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...

Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const

Definition AMDGPUInstCombineIntrinsic.cpp:1913

bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const

Definition AMDGPUInstCombineIntrinsic.cpp:391

Common base class shared among various IRBuilders.

CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")

Create a call to the vector.extract intrinsic.

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

IntegerType * getIntNTy(unsigned N)

Fetch the type representing an N-bit integer.

Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

BasicBlock * GetInsertBlock() const

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")

Create call to the maxnum intrinsic.

Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")

Create call to the maximum intrinsic.

Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")

Create call to the minnum intrinsic.

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)

Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")

Create call to the minimumnum intrinsic.

Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

The core instruction combiner logic.

const DataLayout & getDataLayout() const

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy

An IRBuilder that automatically inserts new instructions into the worklist.

DominatorTree & getDominatorTree() const

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

const SimplifyQuery & getSimplifyQuery() const

LLVM_ABI Instruction * clone() const

Create a copy of 'this' instruction that is identical in all ways except the following:

LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)

Convenience function for transferring all fast-math flag values to this instruction,...

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

Class to represent integer types.

A wrapper class for inspecting calls to intrinsic functions.

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)

A Module instance is used to store all the information related to an LLVM module.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

reference emplace_back(ArgTypes &&... Args)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

LLVM_ABI Type * getWithNewType(Type *EltTy) const

Given vector type, change the element type, whilst keeping the old number of elements.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI Type * getHalfTy(LLVMContext &C)

bool isVoidTy() const

Return true if this is 'void'.

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

void setOperand(unsigned i, Value *Val)

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI bool hasOneUser() const

Return true if there is exactly one user of this value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

const ParentTy * getParent() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)

uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)

const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)

LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)

bool isArgPassedInSGPR(const Argument *A)

bool isIntrinsicAlwaysUniform(unsigned IntrID)

LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)

LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)

LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)

const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)

@ C

The default llvm calling convention, compatible with C.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)

Gets the type arguments of an intrinsic call by matching type contraints specified by the ....

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

bool match(Val *V, const Pattern &P)

cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()

Match a floating-point negative zero or positive zero.

ap_match< APFloat > m_APFloat(const APFloat *&Res)

Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)

class_match< ConstantFP > m_ConstantFP()

Match an arbitrary ConstantFP and ignore it.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()

Match a finite non-zero FP constant.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

This is an optimization pass for GlobalISel generic memory operations.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)

Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.

constexpr int popcount(T Value) noexcept

Count the number of set bits in a value.

APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)

Equivalent of C standard library function.

LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)

Implements IEEE-754 2008 maxNum semantics.

APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)

Returns: X * 2^Exp for integral exponents.

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

constexpr int PoisonMaskElem

LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

@ NearestTiesToEven

roundTiesToEven.

LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)

Return true if the floating-point value can never contain a NaN or infinity.

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

constexpr T maskTrailingOnes(unsigned N)

Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.

constexpr uint64_t Make_64(uint32_t High, uint32_t Low)

Make a 64-bit integer from a high / low pair of 32-bit integers.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

bool isConstant() const

Returns true if we know the value of all bits.

const APInt & getConstant() const

Returns the value when all bits have a known value.

SimplifyQuery getWithInstruction(const Instruction *I) const

LLVM_ABI bool isUndefValue(Value *V) const

If CanUseUndef is true, returns whether V is undef.