RISCVTargetTransformInfo.cpp Source File (original) (raw)

19#include

20#include

21using namespace llvm;

24#define DEBUG_TYPE "riscvtti"

27 "riscv-v-register-bit-width-lmul",

29 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "

30 "by autovectorized code. Fractional LMULs are not supported."),

34 "riscv-v-slp-max-vf",

36 "Overrides result used for getMaximumVF query which is used "

37 "exclusively by SLP vectorizer."),

46 size_t NumInstr = OpCodes.size();

48 return NumInstr;

51 return LMULCost * NumInstr;

53 for (auto Op : OpCodes) {

54 switch (Op) {

55 case RISCV::VRGATHER_VI:

57 break;

58 case RISCV::VRGATHER_VV:

60 break;

61 case RISCV::VSLIDEUP_VI:

62 case RISCV::VSLIDEDOWN_VI:

64 break;

65 case RISCV::VSLIDEUP_VX:

66 case RISCV::VSLIDEDOWN_VX:

68 break;

69 case RISCV::VREDMAX_VS:

70 case RISCV::VREDMIN_VS:

71 case RISCV::VREDMAXU_VS:

72 case RISCV::VREDMINU_VS:

73 case RISCV::VREDSUM_VS:

74 case RISCV::VREDAND_VS:

75 case RISCV::VREDOR_VS:

76 case RISCV::VREDXOR_VS:

77 case RISCV::VFREDMAX_VS:

78 case RISCV::VFREDMIN_VS:

79 case RISCV::VFREDUSUM_VS: {

84 break;

85 }

86 case RISCV::VFREDOSUM_VS: {

91 break;

92 }

93 case RISCV::VMV_X_S:

94 case RISCV::VMV_S_X:

95 case RISCV::VFMV_F_S:

96 case RISCV::VFMV_S_F:

97 case RISCV::VMOR_MM:

98 case RISCV::VMXOR_MM:

99 case RISCV::VMAND_MM:

100 case RISCV::VMANDN_MM:

101 case RISCV::VMNAND_MM:

102 case RISCV::VCPOP_M:

103 case RISCV::VFIRST_M:

105 break;

106 default:

107 Cost += LMULCost;

108 }

109 }

111}

112

117 bool FreeZeroes) {

119 "getIntImmCost can only estimate cost of materialising integers");

120

121

122 if (Imm == 0)

124

125

127 false, FreeZeroes);

128}

129

133}

134

135

136

137

139 uint64_t Mask = Imm.getZExtValue();

140 auto *BO = dyn_cast(Inst->getOperand(0));

141 if (!BO || !BO->hasOneUse())

142 return false;

143

144 if (BO->getOpcode() != Instruction::Shl)

145 return false;

146

147 if (!isa(BO->getOperand(1)))

148 return false;

149

150 unsigned ShAmt = cast(BO->getOperand(1))->getZExtValue();

151

152

155 if (ShAmt == Trailing)

156 return true;

157 }

158

159 return false;

160}

161

167 "getIntImmCost can only estimate cost of materialising integers");

168

169

170 if (Imm == 0)

172

173

174

175 bool Takes12BitImm = false;

176 unsigned ImmArgIdx = ~0U;

177

178 switch (Opcode) {

179 case Instruction::GetElementPtr:

180

181

182

184 case Instruction::Store: {

185

186

187

188

189 if (Idx == 1 || !Inst)

191 true);

192

193 StoreInst *ST = cast(Inst);

194 if (!getTLI()->allowsMemoryAccessForAlignment(

196 ST->getPointerAddressSpace(), ST->getAlign()))

198

200 true);

201 }

202 case Instruction::Load:

203

205 case Instruction::And:

206

207 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())

209

210 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())

212

213 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())

215 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&

218 Takes12BitImm = true;

219 break;

220 case Instruction::Add:

221 Takes12BitImm = true;

222 break;

223 case Instruction::Or:

224 case Instruction::Xor:

225

226 if (ST->hasStdExtZbs() && Imm.isPowerOf2())

228 Takes12BitImm = true;

229 break;

230 case Instruction::Mul:

231

232 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())

234

235 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())

237

238 Takes12BitImm = true;

239 break;

240 case Instruction::Sub:

241 case Instruction::Shl:

242 case Instruction::LShr:

243 case Instruction::AShr:

244 Takes12BitImm = true;

245 ImmArgIdx = 1;

246 break;

247 default:

248 break;

249 }

250

251 if (Takes12BitImm) {

252

254

255 if (Imm.getSignificantBits() <= 64 &&

258 }

259 }

260

261

263 }

264

265

267}

268

273

275}

276

279}

280

284 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->is64Bit())

287}

288

290

291

292

293 switch (II->getIntrinsicID()) {

294 default:

295 return false;

296

297 case Intrinsic::vector_reduce_mul:

298 case Intrinsic::vector_reduce_fmul:

299 return true;

300 }

301}

302

307}

308

315}

316

319 unsigned LMUL =

321 switch (K) {

332 : 0);

333 }

334

336}

337

340

341

342

344 0, CostKind);

345}

346

348 unsigned Size = Mask.size();

350 return false;

351 for (unsigned I = 0; I != Size; ++I) {

352 if (static_cast<unsigned>(Mask[I]) == I)

353 continue;

354 if (Mask[I] != 0)

355 return false;

356 if (Size % I != 0)

357 return false;

358 for (unsigned J = I + 1; J != Size; ++J)

359

360 if (static_cast<unsigned>(Mask[J]) != J % I)

361 return false;

362 SubVectorSize = I;

363 return true;

364 }

365

366 return false;

367}

368

376 return cast(EVT(IndexVT).getTypeForEVT(C));

377}

378

386

388

389

390

391

392 if (isa(Tp)) {

393 switch (Kind) {

394 default:

395 break;

397 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {

398 MVT EltTp = LT.second.getVectorElementType();

399

400

401

403

404

405

406

407

409 return 2 * LT.first * TLI->getLMULCost(LT.second);

410

411 if (Mask[0] == 0 || Mask[0] == 1) {

412 auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());

413

414

415 if (equal(DeinterleaveMask, Mask))

416 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,

418 }

419 }

420 int SubVectorSize;

421 if (LT.second.getScalarSizeInBits() != 1 &&

424 unsigned NumSlides = Log2_32(Mask.size() / SubVectorSize);

425

426 for (unsigned I = 0; I != NumSlides; ++I) {

427 unsigned InsertIndex = SubVectorSize * (1 << I);

432 std::pair<InstructionCost, MVT> DestLT =

434

435

436

439 CostKind, InsertIndex, SubTp);

440 }

442 }

443 }

444

445

446 if (LT.second.isFixedLengthVector() && LT.first == 1 &&

447 (LT.second.getScalarSizeInBits() != 8 ||

448 LT.second.getVectorNumElements() <= 256)) {

451 return IndexCost +

452 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);

453 }

454 [[fallthrough]];

455 }

458

459

460

461 if (LT.second.isFixedLengthVector() && LT.first == 1 &&

462 (LT.second.getScalarSizeInBits() != 8 ||

463 LT.second.getVectorNumElements() <= 256)) {

470 return 2 * IndexCost +

471 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},

473 MaskCost;

474 }

475 [[fallthrough]];

476 }

478

479

480

481 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&

482 LT.second.isFixedLengthVector() &&

483 LT.second.getVectorElementType().getSizeInBits() ==

485 LT.second.getVectorNumElements() <

486 cast(Tp)->getNumElements() &&

488 cast(Tp)->getNumElements()) ==

489 static_cast<unsigned>(*LT.first.getValue())) {

490 unsigned NumRegs = *LT.first.getValue();

491 unsigned VF = cast(Tp)->getNumElements();

494

496 for (unsigned I = 0, NumSrcRegs = divideCeil(Mask.size(), SubVF);

497 I < NumSrcRegs; ++I) {

498 bool IsSingleVector = true;

501 Mask.slice(I * SubVF,

502 I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),

503 SubMask.begin(), [&](int I) -> int {

504 if (I == PoisonMaskElem)

505 return PoisonMaskElem;

506 bool SingleSubVector = I / VF == 0;

507 IsSingleVector &= SingleSubVector;

508 return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;

509 });

512 static_cast<unsigned>(P.value()) == P.index();

513 }))

514 continue;

517 SubVecTy, SubMask, CostKind, 0, nullptr);

518 }

520 }

521 break;

522 }

523 }

524 };

525

526

527 switch (Kind) {

528 default:

529

530

531

532 break;

534

535 if (Index == 0)

537

538

539

540

541

542

543

545 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {

548 if (MinVLen == MaxVLen &&

549 SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&

550 SubLT.second.getSizeInBits() <= MinVLen)

552 }

553

554

555

556

557 return LT.first *

558 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);

560

561

562

563 return LT.first *

564 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);

566

567

568

569

570

571

572

573

574

575 return LT.first *

576 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},

578 }

581 Instruction::InsertElement);

582 if (LT.second.getScalarSizeInBits() == 1) {

583 if (HasScalar) {

584

585

586

587

588

589 return LT.first *

590 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},

592 }

593

594

595

596

597

598

599

600

601

602 return LT.first *

603 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,

604 RISCV::VMV_X_S, RISCV::VMV_V_X,

605 RISCV::VMSNE_VI},

607 }

608

609 if (HasScalar) {

610

611

612 return LT.first *

613 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);

614 }

615

616

617

618 return LT.first *

619 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);

620 }

622

623

624

625 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};

626 if (Index >= 0 && Index < 32)

627 Opcodes[0] = RISCV::VSLIDEDOWN_VI;

628 else if (Index < 0 && Index > -32)

629 Opcodes[1] = RISCV::VSLIDEUP_VI;

630 return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);

631 }

633

634

635

636

637

638

639

640

641

642

643

644

645

646

648 if (LT.second.isFixedLengthVector())

649

650 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;

651 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};

652 if (LT.second.isFixedLengthVector() &&

653 isInt<5>(LT.second.getVectorNumElements() - 1))

654 Opcodes[1] = RISCV::VRSUB_VI;

656 getRISCVInstructionCost(Opcodes, LT.second, CostKind);

657

659 return LT.first * (LenCost + GatherCost + ExtendCost);

660 }

661 }

663}

664

669}

670

672 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,

674 if (isa(Ty))

676

677

678

679

680

682 Ty, DemandedElts, Insert, Extract, CostKind);

684 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {

687

688

693 }

694

695 assert(LT.second.isFixedLengthVector());

699 cast(Ty)->getNumElements() *

700 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second, CostKind);

701 if (BV < Cost)

703 }

704 }

706}

707

716

718}

719

723 bool UseMaskForCond, bool UseMaskForGaps) {

724

725

726

727

728 if (!UseMaskForCond && !UseMaskForGaps &&

729 Factor <= TLI->getMaxSupportedInterleaveFactor()) {

730 auto *VTy = cast(VecTy);

732

733 if (LT.second.isVector()) {

734 auto *SubVecTy =

736 VTy->getElementCount().divideCoefficientBy(Factor));

737 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&

740

741

742

748 return LT.first * Cost;

749 }

750

751

752

754 getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0,

755 CostKind, {TTI::OK_AnyValue, TTI::OP_None});

756 unsigned NumLoads = getEstimatedVLFor(VTy);

757 return NumLoads * MemOpCost;

758 }

759 }

760 }

761

762

763

764 if (isa(VecTy))

766

767 auto *FVTy = cast(VecTy);

770 unsigned VF = FVTy->getNumElements() / Factor;

771

772

773

774

775

776

777 if (Opcode == Instruction::Load) {

779 for (unsigned Index : Indices) {

786 Cost += ShuffleCost;

787 }

789 }

790

791

792

793

794

795

796

797

798

799

800

801 if (Factor != 2)

804 UseMaskForCond, UseMaskForGaps);

805

806 assert(Opcode == Instruction::Store && "Opcode must be a store");

807

808

813 return MemCost + ShuffleCost;

814}

815

817 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

822

823 if ((Opcode == Instruction::Load &&

825 (Opcode == Instruction::Store &&

829

830

831

832

833 auto &VTy = *cast(DataTy);

836 {TTI::OK_AnyValue, TTI::OP_None}, I);

837 unsigned NumLoads = getEstimatedVLFor(&VTy);

838 return NumLoads * MemOpCost;

839}

840

842 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

844 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

846 (Opcode != Instruction::Load && Opcode != Instruction::Store))

849

852

853

854

855

856 auto &VTy = *cast(DataTy);

859 {TTI::OK_AnyValue, TTI::OP_None}, I);

860 unsigned NumLoads = getEstimatedVLFor(&VTy);

861 return NumLoads * MemOpCost;

862}

863

866

867

868

871 for (auto *Ty : Tys) {

872 if (!Ty->isVectorTy())

873 continue;

877 }

879}

880

881

882

883

884

886 {Intrinsic::floor, MVT::f32, 9},

887 {Intrinsic::floor, MVT::f64, 9},

888 {Intrinsic::ceil, MVT::f32, 9},

889 {Intrinsic::ceil, MVT::f64, 9},

890 {Intrinsic::trunc, MVT::f32, 7},

891 {Intrinsic::trunc, MVT::f64, 7},

892 {Intrinsic::round, MVT::f32, 9},

893 {Intrinsic::round, MVT::f64, 9},

894 {Intrinsic::roundeven, MVT::f32, 9},

895 {Intrinsic::roundeven, MVT::f64, 9},

896 {Intrinsic::rint, MVT::f32, 7},

897 {Intrinsic::rint, MVT::f64, 7},

898 {Intrinsic::lrint, MVT::i32, 1},

899 {Intrinsic::lrint, MVT::i64, 1},

900 {Intrinsic::llrint, MVT::i64, 1},

901 {Intrinsic::nearbyint, MVT::f32, 9},

902 {Intrinsic::nearbyint, MVT::f64, 9},

903 {Intrinsic::bswap, MVT::i16, 3},

904 {Intrinsic::bswap, MVT::i32, 12},

905 {Intrinsic::bswap, MVT::i64, 31},

906 {Intrinsic::vp_bswap, MVT::i16, 3},

907 {Intrinsic::vp_bswap, MVT::i32, 12},

908 {Intrinsic::vp_bswap, MVT::i64, 31},

909 {Intrinsic::vp_fshl, MVT::i8, 7},

910 {Intrinsic::vp_fshl, MVT::i16, 7},

911 {Intrinsic::vp_fshl, MVT::i32, 7},

912 {Intrinsic::vp_fshl, MVT::i64, 7},

913 {Intrinsic::vp_fshr, MVT::i8, 7},

914 {Intrinsic::vp_fshr, MVT::i16, 7},

915 {Intrinsic::vp_fshr, MVT::i32, 7},

916 {Intrinsic::vp_fshr, MVT::i64, 7},

917 {Intrinsic::bitreverse, MVT::i8, 17},

918 {Intrinsic::bitreverse, MVT::i16, 24},

919 {Intrinsic::bitreverse, MVT::i32, 33},

920 {Intrinsic::bitreverse, MVT::i64, 52},

921 {Intrinsic::vp_bitreverse, MVT::i8, 17},

922 {Intrinsic::vp_bitreverse, MVT::i16, 24},

923 {Intrinsic::vp_bitreverse, MVT::i32, 33},

924 {Intrinsic::vp_bitreverse, MVT::i64, 52},

925 {Intrinsic::ctpop, MVT::i8, 12},

926 {Intrinsic::ctpop, MVT::i16, 19},

927 {Intrinsic::ctpop, MVT::i32, 20},

928 {Intrinsic::ctpop, MVT::i64, 21},

929 {Intrinsic::ctlz, MVT::i8, 19},

930 {Intrinsic::ctlz, MVT::i16, 28},

931 {Intrinsic::ctlz, MVT::i32, 31},

932 {Intrinsic::ctlz, MVT::i64, 35},

933 {Intrinsic::cttz, MVT::i8, 16},

934 {Intrinsic::cttz, MVT::i16, 23},

935 {Intrinsic::cttz, MVT::i32, 24},

936 {Intrinsic::cttz, MVT::i64, 25},

937 {Intrinsic::vp_ctpop, MVT::i8, 12},

938 {Intrinsic::vp_ctpop, MVT::i16, 19},

939 {Intrinsic::vp_ctpop, MVT::i32, 20},

940 {Intrinsic::vp_ctpop, MVT::i64, 21},

941 {Intrinsic::vp_ctlz, MVT::i8, 19},

942 {Intrinsic::vp_ctlz, MVT::i16, 28},

943 {Intrinsic::vp_ctlz, MVT::i32, 31},

944 {Intrinsic::vp_ctlz, MVT::i64, 35},

945 {Intrinsic::vp_cttz, MVT::i8, 16},

946 {Intrinsic::vp_cttz, MVT::i16, 23},

947 {Intrinsic::vp_cttz, MVT::i32, 24},

948 {Intrinsic::vp_cttz, MVT::i64, 25},

949};

950

952 switch (ID) {

953#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \

954 case Intrinsic::VPID: \

955 return ISD::VPSD;

956#include "llvm/IR/VPIntrinsics.def"

957#undef HELPER_MAP_VPID_TO_VPSD

958 }

960}

961

966 switch (ICA.getID()) {

967 case Intrinsic::lrint:

968 case Intrinsic::llrint:

969

970 if (auto *VecTy = dyn_cast(ICA.getArgTypes()[0]);

971 VecTy && VecTy->getElementType()->is16bitFPTy())

973 [[fallthrough]];

974 case Intrinsic::ceil:

975 case Intrinsic:🤣

976 case Intrinsic::trunc:

977 case Intrinsic::rint:

978 case Intrinsic::round:

979 case Intrinsic::roundeven: {

980

983 return LT.first * 8;

984 break;

985 }

986 case Intrinsic::umin:

987 case Intrinsic::umax:

988 case Intrinsic::smin:

989 case Intrinsic::smax: {

991 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())

992 return LT.first;

993

995 unsigned Op;

996 switch (ICA.getID()) {

997 case Intrinsic::umin:

998 Op = RISCV::VMINU_VV;

999 break;

1000 case Intrinsic::umax:

1001 Op = RISCV::VMAXU_VV;

1002 break;

1003 case Intrinsic::smin:

1004 Op = RISCV::VMIN_VV;

1005 break;

1006 case Intrinsic::smax:

1007 Op = RISCV::VMAX_VV;

1008 break;

1009 }

1010 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);

1011 }

1012 break;

1013 }

1014 case Intrinsic::sadd_sat:

1015 case Intrinsic::ssub_sat:

1016 case Intrinsic::uadd_sat:

1017 case Intrinsic::usub_sat: {

1020 unsigned Op;

1021 switch (ICA.getID()) {

1022 case Intrinsic::sadd_sat:

1023 Op = RISCV::VSADD_VV;

1024 break;

1025 case Intrinsic::ssub_sat:

1026 Op = RISCV::VSSUBU_VV;

1027 break;

1028 case Intrinsic::uadd_sat:

1029 Op = RISCV::VSADDU_VV;

1030 break;

1031 case Intrinsic::usub_sat:

1032 Op = RISCV::VSSUBU_VV;

1033 break;

1034 }

1035 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);

1036 }

1037 break;

1038 }

1039 case Intrinsic::fabs: {

1042

1043

1044

1045

1046

1047 if (LT.second.getVectorElementType() == MVT::bf16 ||

1048 (LT.second.getVectorElementType() == MVT::f16 &&

1050 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,

1052 2;

1053 else

1054 return LT.first *

1055 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second, CostKind);

1056 }

1057 break;

1058 }

1059 case Intrinsic::sqrt: {

1064 MVT ConvType = LT.second;

1065 MVT FsqrtType = LT.second;

1066

1067

1068 if (LT.second.getVectorElementType() == MVT::bf16) {

1069 if (LT.second == MVT::nxv32bf16) {

1070 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,

1071 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};

1072 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};

1073 ConvType = MVT::nxv16f16;

1074 FsqrtType = MVT::nxv16f32;

1075 } else {

1076 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};

1077 FsqrtOp = {RISCV::VFSQRT_V};

1079 }

1080 } else if (LT.second.getVectorElementType() == MVT::f16 &&

1082 if (LT.second == MVT::nxv32f16) {

1083 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,

1084 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};

1085 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};

1086 ConvType = MVT::nxv16f16;

1087 FsqrtType = MVT::nxv16f32;

1088 } else {

1089 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};

1090 FsqrtOp = {RISCV::VFSQRT_V};

1092 }

1093 } else {

1094 FsqrtOp = {RISCV::VFSQRT_V};

1095 }

1096

1097 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType, CostKind) +

1098 getRISCVInstructionCost(ConvOp, ConvType, CostKind));

1099 }

1100 break;

1101 }

1102 case Intrinsic::cttz:

1103 case Intrinsic::ctlz:

1104 case Intrinsic::ctpop: {

1106 if (ST->hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {

1107 unsigned Op;

1108 switch (ICA.getID()) {

1109 case Intrinsic::cttz:

1110 Op = RISCV::VCTZ_V;

1111 break;

1112 case Intrinsic::ctlz:

1113 Op = RISCV::VCLZ_V;

1114 break;

1115 case Intrinsic::ctpop:

1116 Op = RISCV::VCPOP_V;

1117 break;

1118 }

1119 return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind);

1120 }

1121 break;

1122 }

1123 case Intrinsic::abs: {

1126

1127

1128 return LT.first *

1129 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},

1131 }

1132 break;

1133 }

1134 case Intrinsic::get_active_lane_mask: {

1137 ICA.getArgTypes()[0], cast(RetTy)->getElementCount());

1139

1140

1141

1142

1143 return LT.first *

1144 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},

1146 }

1147 break;

1148 }

1149

1150 case Intrinsic::stepvector: {

1152

1153

1155 return getRISCVInstructionCost(RISCV::VID_V, LT.second, CostKind) +

1156 (LT.first - 1) *

1157 getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind);

1158 return 1 + (LT.first - 1);

1159 }

1160 case Intrinsic::experimental_cttz_elts: {

1164 break;

1167

1168

1169

1172 cast(ICA.getArgs()[1])->isZero())

1177

1178 return Cost;

1179 }

1180 case Intrinsic::vp_rint: {

1181

1182 unsigned Cost = 5;

1185 return Cost * LT.first;

1186 break;

1187 }

1188 case Intrinsic::vp_nearbyint: {

1189

1190 unsigned Cost = 7;

1193 return Cost * LT.first;

1194 break;

1195 }

1196 case Intrinsic::vp_ceil:

1197 case Intrinsic::vp_floor:

1198 case Intrinsic::vp_round:

1199 case Intrinsic::vp_roundeven:

1200 case Intrinsic::vp_roundtozero: {

1201

1202

1203 unsigned Cost = 7;

1207 return Cost * LT.first;

1208 break;

1209 }

1210 case Intrinsic::vp_fneg: {

1211 std::optional FOp =

1213 assert(FOp.has_value());

1215 break;

1216 }

1217 case Intrinsic::vp_select: {

1220 assert(FOp.has_value());

1223 }

1224 case Intrinsic::vp_merge:

1228 case Intrinsic::experimental_vp_splat: {

1230

1231 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)

1233 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()

1234 ? RISCV::VFMV_V_F

1235 : RISCV::VMV_V_X,

1237 }

1238 }

1239

1242 LT.second.isVector()) {

1243 MVT EltTy = LT.second.getVectorElementType();

1245 ICA.getID(), EltTy))

1246 return LT.first * Entry->Cost;

1247 }

1248 }

1249

1251}

1252

1258 bool IsVectorType = isa(Dst) && isa(Src);

1259 if (!IsVectorType)

1261

1262

1263

1264

1266 Dst->getScalarSizeInBits() > ST->getELen())

1268

1270 assert(ISD && "Invalid opcode");

1273

1274

1275

1276

1277

1278

1279 switch (ISD) {

1280 default:

1281 break;

1284 if (Src->getScalarSizeInBits() == 1) {

1285

1286

1287

1288

1289 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second, CostKind) +

1290 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,

1292 DstLT.first - 1;

1293 }

1294 break;

1296 if (Dst->getScalarSizeInBits() == 1) {

1297

1298

1299

1300

1301

1302 return SrcLT.first *

1303 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},

1305 SrcLT.first - 1;

1306 }

1307 break;

1308 };

1309

1310

1311

1312

1313

1315 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||

1317 SrcLT.second.getSizeInBits()) ||

1319 DstLT.second.getSizeInBits()))

1321

1322

1323 assert((SrcLT.first == 1) && (DstLT.first == 1) && "Illegal type");

1324

1325 int PowDiff = (int)Log2_32(DstLT.second.getScalarSizeInBits()) -

1326 (int)Log2_32(SrcLT.second.getScalarSizeInBits());

1327 switch (ISD) {

1330 if ((PowDiff < 1) || (PowDiff > 3))

1332 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};

1333 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};

1334 unsigned Op =

1335 (ISD == ISD::SIGN_EXTEND) ? SExtOp[PowDiff - 1] : ZExtOp[PowDiff - 1];

1336 return getRISCVInstructionCost(Op, DstLT.second, CostKind);

1337 }

1341

1342 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();

1343 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();

1344

1347 : RISCV::VFNCVT_F_F_W;

1349 for (; SrcEltSize != DstEltSize;) {

1354 DstEltSize =

1355 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;

1356 Cost += getRISCVInstructionCost(Op, DstMVT, CostKind);

1357 }

1358 return Cost;

1359 }

1363 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;

1364 unsigned FWCVT =

1365 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;

1366 unsigned FNCVT =

1367 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;

1368 unsigned SrcEltSize = Src->getScalarSizeInBits();

1369 unsigned DstEltSize = Dst->getScalarSizeInBits();

1371 if ((SrcEltSize == 16) &&

1373

1374

1377 cast(Dst)->getElementCount());

1378 std::pair<InstructionCost, MVT> VecF32LT =

1381 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,

1384 return Cost;

1385 }

1386 if (DstEltSize == SrcEltSize)

1387 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);

1388 else if (DstEltSize > SrcEltSize)

1389 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);

1390 else {

1391

1392

1395 Cost += getRISCVInstructionCost(FNCVT, VecVT, CostKind);

1396 if ((SrcEltSize / 2) > DstEltSize) {

1400 }

1401 }

1402 return Cost;

1403 }

1407 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;

1408 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;

1409 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;

1410 unsigned SrcEltSize = Src->getScalarSizeInBits();

1411 unsigned DstEltSize = Dst->getScalarSizeInBits();

1412

1414 if ((DstEltSize == 16) &&

1416

1417

1420 cast(Dst)->getElementCount());

1421 std::pair<InstructionCost, MVT> VecF32LT =

1424 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,

1426 return Cost;

1427 }

1428

1429 if (DstEltSize == SrcEltSize)

1430 Cost += getRISCVInstructionCost(FCVT, DstLT.second, CostKind);

1431 else if (DstEltSize > SrcEltSize) {

1432 if ((DstEltSize / 2) > SrcEltSize) {

1435 cast(Dst)->getElementCount());

1436 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;

1438 }

1439 Cost += getRISCVInstructionCost(FWCVT, DstLT.second, CostKind);

1440 } else

1441 Cost += getRISCVInstructionCost(FNCVT, DstLT.second, CostKind);

1442 return Cost;

1443 }

1444 }

1446}

1447

1448unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {

1449 if (isa(Ty)) {

1454 }

1455 return cast(Ty)->getNumElements();

1456}

1457

1464

1465

1468

1471

1472

1473

1474 if (IID == Intrinsic::umax || IID == Intrinsic::smin)

1476 else

1478 }

1479

1480 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {

1483 switch (IID) {

1484 case Intrinsic::maximum:

1486 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};

1487 } else {

1488 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,

1489 RISCV::VFMV_F_S};

1490

1491

1492

1496 ExtraCost = 1 +

1500 }

1501 break;

1502

1503 case Intrinsic::minimum:

1505 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};

1506 } else {

1507 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,

1508 RISCV::VFMV_F_S};

1509

1510

1511

1515 ExtraCost = 1 +

1519 }

1520 break;

1521 }

1522 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);

1523 }

1524

1525

1526 unsigned SplitOp;

1528 switch (IID) {

1529 default:

1531 case Intrinsic::smax:

1532 SplitOp = RISCV::VMAX_VV;

1533 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};

1534 break;

1535 case Intrinsic::smin:

1536 SplitOp = RISCV::VMIN_VV;

1537 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};

1538 break;

1539 case Intrinsic::umax:

1540 SplitOp = RISCV::VMAXU_VV;

1541 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};

1542 break;

1543 case Intrinsic::umin:

1544 SplitOp = RISCV::VMINU_VV;

1545 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};

1546 break;

1547 case Intrinsic::maxnum:

1548 SplitOp = RISCV::VFMAX_VV;

1549 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};

1550 break;

1551 case Intrinsic::minnum:

1552 SplitOp = RISCV::VFMIN_VV;

1553 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};

1554 break;

1555 }

1556

1558 (LT.first > 1) ? (LT.first - 1) *

1559 getRISCVInstructionCost(SplitOp, LT.second, CostKind)

1560 : 0;

1561 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);

1562}

1563

1566 std::optional FMF,

1570

1571

1574

1576 assert(ISD && "Invalid opcode");

1577

1581

1585

1586

1587

1588 if (LT.second == MVT::v1i1)

1589 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +

1592

1594

1595

1596

1597

1598

1599

1600

1601

1602

1603

1604

1605

1606

1607 return ((LT.first > 2) ? (LT.first - 2) : 0) *

1608 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +

1609 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +

1610 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +

1614

1615

1616

1617

1618

1619 return (LT.first - 1) *

1620 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind) +

1621 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) + 1;

1622 } else {

1624

1625

1626

1627

1628

1629 return (LT.first - 1) *

1630 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second, CostKind) +

1631 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +

1634 }

1635 }

1636

1637

1638

1639

1640 unsigned SplitOp;

1642 switch (ISD) {

1644 SplitOp = RISCV::VADD_VV;

1645 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};

1646 break;

1648 SplitOp = RISCV::VOR_VV;

1649 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};

1650 break;

1652 SplitOp = RISCV::VXOR_VV;

1653 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};

1654 break;

1656 SplitOp = RISCV::VAND_VV;

1657 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};

1658 break;

1660

1661 if ((LT.second.getVectorElementType() == MVT::f16 &&

1663 LT.second.getVectorElementType() == MVT::bf16)

1666 Opcodes.push_back(RISCV::VFMV_S_F);

1667 for (unsigned i = 0; i < LT.first.getValue(); i++)

1668 Opcodes.push_back(RISCV::VFREDOSUM_VS);

1669 Opcodes.push_back(RISCV::VFMV_F_S);

1670 return getRISCVInstructionCost(Opcodes, LT.second, CostKind);

1671 }

1672 SplitOp = RISCV::VFADD_VV;

1673 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};

1674 break;

1675 }

1676

1678 (LT.first > 1) ? (LT.first - 1) *

1679 getRISCVInstructionCost(SplitOp, LT.second, CostKind)

1680 : 0;

1681 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second, CostKind);

1682}

1683

1685 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,

1690

1691

1695

1696 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)

1699

1701

1702 if (IsUnsigned && Opcode == Instruction::Add &&

1703 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {

1704

1705

1706 return LT.first *

1707 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind);

1708 }

1709

1710 if (ResTy->getScalarSizeInBits() != 2 * LT.second.getScalarSizeInBits())

1713

1714 return (LT.first - 1) +

1716}

1717

1722 if (!isa(Ty))

1723

1724

1725

1726 return 0;

1727

1729

1730

1731

1732 return 1;

1733

1734 return getConstantPoolLoadCost(Ty, CostKind);

1735}

1736

1737

1745

1746 if (VT == MVT::Other)

1749

1751 if (Opcode == Instruction::Store && OpInfo.isConstant())

1753

1755

1759 return Cost;

1760

1761

1762

1763

1764

1766 if (Src->isVectorTy() && LT.second.isVector() &&

1768 LT.second.getSizeInBits()))

1769 return Cost;

1770

1773 }();

1774

1775

1776

1777

1779 BaseCost *= TLI->getLMULCost(LT.second);

1780 return Cost + BaseCost;

1781

1782}

1783

1790 Op1Info, Op2Info, I);

1791

1794 Op1Info, Op2Info, I);

1795

1796

1799 Op1Info, Op2Info, I);

1800

1801 auto GetConstantMatCost =

1803 if (OpInfo.isUniform())

1804

1805

1806 return 0;

1807

1808 return getConstantPoolLoadCost(ValTy, CostKind);

1809 };

1810

1813 ConstantMatCost += GetConstantMatCost(Op1Info);

1815 ConstantMatCost += GetConstantMatCost(Op2Info);

1816

1818 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {

1821

1822

1823

1824 return ConstantMatCost +

1825 LT.first *

1826 getRISCVInstructionCost(

1827 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},

1829 }

1830

1831 return ConstantMatCost +

1832 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,

1834 }

1835

1837

1838

1839

1840

1841

1842 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);

1843 return ConstantMatCost +

1844 LT.first *

1845 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},

1847 LT.first * getRISCVInstructionCost(

1848 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},

1850 }

1851

1852

1853

1854

1855 return ConstantMatCost +

1856 LT.first * getRISCVInstructionCost(

1857 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},

1859 }

1860

1861 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&

1863

1864

1865 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,

1866 LT.second,

1868 }

1869

1870 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&

1872

1873

1875 return ConstantMatCost +

1876 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second, CostKind);

1877

1878

1879

1880

1881

1886 Op1Info, Op2Info, I);

1887

1888

1889

1890 switch (VecPred) {

1895 return ConstantMatCost +

1896 LT.first * getRISCVInstructionCost(

1897 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},

1899

1904 return ConstantMatCost +

1905 LT.first *

1906 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},

1908

1915 return ConstantMatCost +

1916 LT.first *

1917 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second, CostKind);

1918 default:

1919 break;

1920 }

1921 }

1922

1923

1924

1925

1926

1929 if (all_of(I->users(), [&](const User *U) {

1930 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&

1931 U->getType()->isIntegerTy() &&

1932 !isa(U->getOperand(1)) &&

1933 !isa(U->getOperand(2));

1934 }))

1935 return 0;

1936 }

1937

1938

1939

1941 Op1Info, Op2Info, I);

1942}

1943

1948 return Opcode == Instruction::PHI ? 0 : 1;

1949

1950 return 0;

1951}

1952

1955 unsigned Index, Value *Op0,

1958

1959 if (Opcode != Instruction::ExtractElement &&

1960 Opcode != Instruction::InsertElement)

1962

1963

1965

1966

1967 if (!LT.second.isVector()) {

1968 auto *FixedVecTy = cast(Val);

1969

1970 if (Index != -1U)

1971 return 0;

1972

1973

1974

1975

1976 Type *ElemTy = FixedVecTy->getElementType();

1977 auto NumElems = FixedVecTy->getNumElements();

1983 return Opcode == Instruction::ExtractElement

1984 ? StoreCost * NumElems + LoadCost

1985 : (StoreCost + LoadCost) * NumElems + StoreCost;

1986 }

1987

1988

1989 if (LT.second.isScalableVector() && !LT.first.isValid())

1990 return LT.first;

1991

1992

1996 cast(Val)->getElementCount());

1997 if (Opcode == Instruction::ExtractElement) {

2003 return ExtendCost + ExtractCost;

2004 }

2013 return ExtendCost + InsertCost + TruncCost;

2014 }

2015

2016

2017

2018

2019 unsigned BaseCost = 1;

2020

2021 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;

2022

2023 if (Index != -1U) {

2024

2025

2026 if (LT.second.isFixedLengthVector()) {

2027 unsigned Width = LT.second.getVectorNumElements();

2028 Index = Index % Width;

2029 }

2030

2031

2032

2034 unsigned EltSize = LT.second.getScalarSizeInBits();

2035 unsigned M1Max = *VLEN / EltSize;

2036 Index = Index % M1Max;

2037 }

2038

2039

2040 if (Index == 0)

2041 SlideCost = 0;

2042 else if (Opcode == Instruction::InsertElement)

2043 SlideCost = 1;

2044 }

2045

2046

2047

2048

2049 if (LT.first > 1 &&

2050 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&

2051 LT.second.isScalableVector()))) {

2055

2057

2058

2059 if (Opcode == Instruction::ExtractElement)

2061 getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,

2063 IdxCost;

2064

2065

2066

2069 getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,

2071 IdxCost;

2072 }

2073

2074

2077

2078

2079

2080

2081

2082

2083

2084

2085

2086

2087

2088

2089

2090

2091

2092

2093

2094

2095 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;

2096 }

2097 return BaseCost + SlideCost;

2098}

2099

2104

2105

2108 Args, CxtI);

2109

2112 Args, CxtI);

2113

2114

2117 Args, CxtI);

2118

2119

2121

2122

2123 if (!LT.second.isVector())

2125 Args, CxtI);

2126

2127

2128

2131 if ((LT.second.getVectorElementType() == MVT::f16 ||

2132 LT.second.getVectorElementType() == MVT::bf16) &&

2138

2139 CastCost += LT.first * Args.size() *

2142

2143 CastCost +=

2144 LT.first * getCastInstrCost(Instruction::FPTrunc, LegalTy, PromotedTy,

2146

2147 LT.second = PromotedVT;

2148 }

2149

2150 auto getConstantMatCost =

2152 if (OpInfo.isUniform() && canSplatOperand(Opcode, Operand))

2153

2154

2155

2156

2157

2158 return 0;

2159

2160 return getConstantPoolLoadCost(Ty, CostKind);

2161 };

2162

2163

2166 ConstantMatCost += getConstantMatCost(0, Op1Info);

2168 ConstantMatCost += getConstantMatCost(1, Op2Info);

2169

2170 unsigned Op;

2171 switch (ISDOpcode) {

2174 Op = RISCV::VADD_VV;

2175 break;

2179 Op = RISCV::VSLL_VV;

2180 break;

2185 break;

2189 Op = RISCV::VMUL_VV;

2190 break;

2193 Op = RISCV::VDIV_VV;

2194 break;

2197 Op = RISCV::VREM_VV;

2198 break;

2201 Op = RISCV::VFADD_VV;

2202 break;

2204 Op = RISCV::VFMUL_VV;

2205 break;

2207 Op = RISCV::VFDIV_VV;

2208 break;

2210 Op = RISCV::VFSGNJN_VV;

2211 break;

2212 default:

2213

2214

2215 return CastCost + ConstantMatCost +

2217 Args, CxtI);

2218 }

2219

2221

2222

2223

2226 return CastCost + ConstantMatCost + LT.first * InstrCost;

2227}

2228

2229

2235

2236

2237

2238

2239

2240

2241

2242

2243

2244

2245 for (auto [I, V] : enumerate(Ptrs)) {

2246 const auto *GEP = dyn_cast(V);

2247 if ( GEP )

2248 continue;

2249 if (Info.isSameBase() && V != Base) {

2250 if (GEP->hasAllConstantIndices())

2251 continue;

2252

2253

2254

2255

2257 if (Info.isUnitStride() &&

2259 nullptr,

2260 Stride * I,

2261 true,

2262 0,

2263 GEP->getType()->getPointerAddressSpace()))

2264 continue;

2266 {TTI::OK_AnyValue, TTI::OP_None},

2267 {TTI::OK_AnyValue, TTI::OP_None}, {});

2268 } else {

2271 Indices, AccessTy, CostKind);

2272 }

2273 }

2274 return Cost;

2275}

2276

2280

2281

2282

2283

2284 if (ST->enableDefaultUnroll())

2286

2287

2288

2290

2291

2294 if (L->getHeader()->getParent()->hasOptSize())

2295 return;

2296

2298 L->getExitingBlocks(ExitingBlocks);

2300 << "Blocks: " << L->getNumBlocks() << "\n"

2301 << "Exit blocks: " << ExitingBlocks.size() << "\n");

2302

2303

2304

2305 if (ExitingBlocks.size() > 2)

2306 return;

2307

2308

2309

2310 if (L->getNumBlocks() > 4)

2311 return;

2312

2313

2315 return;

2316

2317

2318

2320 for (auto *BB : L->getBlocks()) {

2321 for (auto &I : *BB) {

2322

2323

2324 if (I.getType()->isVectorTy())

2325 return;

2326

2327 if (isa(I) || isa(I)) {

2330 continue;

2331 }

2332 return;

2333 }

2334

2338 }

2339 }

2340

2342

2347

2348

2349

2350 if (Cost < 12)

2351 UP.Force = true;

2352}

2353

2357}

2358

2361

2362 Type *EltTy = cast(Ty)->getElementType();

2366 cast(Ty));

2367

2371

2374 }

2375

2377}

2378

2380 if (SLPMaxVF.getNumOccurrences())

2382

2383

2384

2385

2386

2387

2390

2391

2392 return std::max(1U, RegWidth.getFixedValue() / ElemWidth);

2393}

2394

2398 if (ST->hasVendorXCVmem() && !ST->is64Bit())

2400

2402}

2403

2406

2407

2408

2417}

2418

2420 auto *VTy = dyn_cast(DataTy);

2421 if (!VTy || VTy->isScalableTy())

2422 return false;

2423

2425 return false;

2426

2427

2428

2429 if (VTy->getElementType()->isIntegerTy(8))

2430 if (VTy->getElementCount().getFixedValue() > 256)

2431 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <

2433 return true;

2434}

2435

2437 auto *VTy = dyn_cast(DataTy);

2438 if (!VTy || VTy->isScalableTy())

2439 return false;

2440

2442 return false;

2443 return true;

2444}

2445

2446

2447

2448

2449

2450

2452 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {

2453 bool Considerable = false;

2454 AllowPromotionWithoutCommonHeader = false;

2455 if (!isa(&I))

2456 return false;

2457 Type *ConsideredSExtType =

2459 if (I.getType() != ConsideredSExtType)

2460 return false;

2461

2462

2463 for (const User *U : I.users()) {

2464 if (const GetElementPtrInst *GEPInst = dyn_cast(U)) {

2465 Considerable = true;

2466

2467

2468

2469 if (GEPInst->getNumOperands() > 2) {

2470 AllowPromotionWithoutCommonHeader = true;

2471 break;

2472 }

2473 }

2474 }

2475 return Considerable;

2476}

2477

2479 switch (Opcode) {

2480 case Instruction::Add:

2481 case Instruction::Sub:

2482 case Instruction::Mul:

2483 case Instruction::And:

2484 case Instruction::Or:

2485 case Instruction::Xor:

2486 case Instruction::FAdd:

2487 case Instruction::FSub:

2488 case Instruction::FMul:

2489 case Instruction::FDiv:

2490 case Instruction::ICmp:

2491 case Instruction::FCmp:

2492 return true;

2493 case Instruction::Shl:

2494 case Instruction::LShr:

2495 case Instruction::AShr:

2496 case Instruction::UDiv:

2497 case Instruction::SDiv:

2498 case Instruction::URem:

2499 case Instruction::SRem:

2500 case Instruction::Select:

2501 return Operand == 1;

2502 default:

2503 return false;

2504 }

2505}

2506

2509 return false;

2510

2512 return true;

2513

2514 auto *II = dyn_cast(I);

2515 if ()

2516 return false;

2517

2518 switch (II->getIntrinsicID()) {

2519 case Intrinsic::fma:

2520 case Intrinsic::vp_fma:

2521 case Intrinsic::fmuladd:

2522 case Intrinsic::vp_fmuladd:

2523 return Operand == 0 || Operand == 1;

2524 case Intrinsic::vp_shl:

2525 case Intrinsic::vp_lshr:

2526 case Intrinsic::vp_ashr:

2527 case Intrinsic::vp_udiv:

2528 case Intrinsic::vp_sdiv:

2529 case Intrinsic::vp_urem:

2530 case Intrinsic::vp_srem:

2531 case Intrinsic::ssub_sat:

2532 case Intrinsic::vp_ssub_sat:

2533 case Intrinsic::usub_sat:

2534 case Intrinsic::vp_usub_sat:

2535 case Intrinsic::vp_select:

2536 return Operand == 1;

2537

2538 case Intrinsic::vp_add:

2539 case Intrinsic::vp_mul:

2540 case Intrinsic::vp_and:

2541 case Intrinsic::vp_or:

2542 case Intrinsic::vp_xor:

2543 case Intrinsic::vp_fadd:

2544 case Intrinsic::vp_fmul:

2545 case Intrinsic::vp_icmp:

2546 case Intrinsic::vp_fcmp:

2547 case Intrinsic::smin:

2548 case Intrinsic::vp_smin:

2549 case Intrinsic::umin:

2550 case Intrinsic::vp_umin:

2551 case Intrinsic::smax:

2552 case Intrinsic::vp_smax:

2553 case Intrinsic::umax:

2554 case Intrinsic::vp_umax:

2555 case Intrinsic::sadd_sat:

2556 case Intrinsic::vp_sadd_sat:

2557 case Intrinsic::uadd_sat:

2558 case Intrinsic::vp_uadd_sat:

2559

2560 case Intrinsic::vp_sub:

2561 case Intrinsic::vp_fsub:

2562 case Intrinsic::vp_fdiv:

2563 return Operand == 0 || Operand == 1;

2564 default:

2565 return false;

2566 }

2567}

2568

2569

2570

2571

2575

2577 return false;

2578

2579

2580

2581

2582

2583

2584 if (!ST->sinkSplatOperands())

2585 return false;

2586

2587 for (auto OpIdx : enumerate(I->operands())) {

2589 continue;

2590

2591 Instruction *Op = dyn_cast(OpIdx.value().get());

2592

2593 if ( || any_of(Ops, [&](Use *U) { return U->get() == Op; }))

2594 continue;

2595

2596

2599 continue;

2600

2601

2602 if (cast(Op->getType())->getElementType()->isIntegerTy(1))

2603 continue;

2604

2605

2606

2607 for (Use &U : Op->uses()) {

2610 return false;

2611 }

2612

2615 }

2616 return true;

2617}

2618

2622

2623

2624 if (!ST->enableUnalignedScalarMem())

2626

2627 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)

2629

2630 Options.AllowOverlappingLoads = true;

2634 Options.LoadSizes = {8, 4, 2, 1};

2635 Options.AllowedTailExpansions = {3, 5, 6};

2636 } else {

2637 Options.LoadSizes = {4, 2, 1};

2638 Options.AllowedTailExpansions = {3};

2639 }

2641}

SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file provides a helper that implements much of the TTI interface in terms of the target-independ...

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Analysis containing CSE Info

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Cost tables and simple lookup functions.

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))

mir Rename Register Operands

static const Function * getCalledFunction(const Value *V)

uint64_t IntrinsicInst * II

static bool isRepeatedConcatMask(ArrayRef< int > Mask, int &SubVectorSize)

static unsigned isM1OrSmaller(MVT VT)

static cl::opt< unsigned > SLPMaxVF("riscv-v-slp-max-vf", cl::desc("Overrides result used for getMaximumVF query which is used " "exclusively by SLP vectorizer."), cl::Hidden)

static cl::opt< unsigned > RVVRegisterWidthLMUL("riscv-v-register-bit-width-lmul", cl::desc("The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), cl::init(2), cl::Hidden)

static InstructionCost getIntImmCostImpl(const DataLayout &DL, const RISCVSubtarget *ST, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, bool FreeZeroes)

static VectorType * getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, LLVMContext &C)

static const CostTblEntry VectorIntrinsicCostTable[]

static bool canUseShiftPair(Instruction *Inst, const APInt &Imm)

static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID)

This file defines a TargetTransformInfo::Concept conforming object specific to the RISC-V target mach...

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static Type * getValueType(Value *V)

Returns the type of the given value/instruction V.

This file describes how to lower LLVM code to machine code.

This pass exposes codegen information to IR-level passes.

Class for arbitrary precision integers.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

Get intrinsic cost based on arguments.

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)

std::optional< unsigned > getVScaleForTuning() const

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)

std::optional< unsigned > getMaxVScale() const

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

unsigned getRegUsageForType(Type *Ty)

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

Try to calculate op costs for min/max reduction operations.

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

Estimate the cost of type-legalization and the legalized type.

bool isLegalAddImmediate(int64_t imm)

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})

Estimate the overhead of scalarizing an instruction.

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ FCMP_OEQ

0 0 0 1 True if ordered and equal

@ FCMP_TRUE

1 1 1 1 Always true (always folded)

@ ICMP_SLT

signed less than

@ FCMP_OLT

0 1 0 0 True if ordered and less than

@ FCMP_ULE

1 1 0 1 True if unordered, less than, or equal

@ FCMP_OGT

0 0 1 0 True if ordered and greater than

@ FCMP_OGE

0 0 1 1 True if ordered and greater than or equal

@ FCMP_ULT

1 1 0 0 True if unordered or less than

@ FCMP_ONE

0 1 1 0 True if ordered and operands are unequal

@ FCMP_UEQ

1 0 0 1 True if unordered or equal

@ FCMP_UGT

1 0 1 0 True if unordered or greater than

@ FCMP_OLE

0 1 0 1 True if ordered and less than or equal

@ FCMP_ORD

0 1 1 1 True if ordered (no nans)

@ FCMP_UNE

1 1 1 0 True if unordered or not equal

@ FCMP_UGE

1 0 1 1 True if unordered, greater than, or equal

@ FCMP_FALSE

0 0 0 0 Always false (always folded)

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

bool isFPPredicate() const

bool isIntPredicate() const

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

TypeSize getTypeStoreSizeInBits(Type *Ty) const

Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...

Align getABITypeAlign(Type *Ty) const

Returns the minimum ABI-required alignment for the specified type.

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

TypeSize getTypeStoreSize(Type *Ty) const

Returns the maximum number of bytes that may be overwritten by storing the specified type.

Align getPrefTypeAlign(Type *Ty) const

Returns the preferred stack/global alignment for the specified type.

Convenience struct for specifying and reasoning about fast-math flags.

Class to represent fixed width SIMD vectors.

static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

static InstructionCost getInvalid(CostType Val=0)

bool isCommutative() const LLVM_READONLY

Return true if the instruction is commutative:

static IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

const SmallVectorImpl< Type * > & getArgTypes() const

Type * getReturnType() const

const SmallVectorImpl< const Value * > & getArgs() const

Intrinsic::ID getID() const

A wrapper class for inspecting calls to intrinsic functions.

This is an important class for using LLVM in a threaded context.

Represents a single loop in the control flow graph.

static MVT getFloatingPointVT(unsigned BitWidth)

unsigned getVectorMinNumElements() const

Given a vector type, return the minimum number of elements it contains.

uint64_t getScalarSizeInBits() const

MVT changeVectorElementType(MVT EltVT) const

Return a VT for a vector type whose attributes match ourselves with the exception of the element type...

unsigned getVectorNumElements() const

bool isVector() const

Return true if this is a vector value type.

MVT changeTypeToInteger()

Return the type converted to an equivalently sized integer or vector with integer element type.

bool bitsGT(MVT VT) const

Return true if this has more bits than VT.

bool isFixedLengthVector() const

static MVT getIntegerVT(unsigned BitWidth)

MVT getScalarType() const

If this is a vector, return the element type, otherwise return this.

unsigned getOpcode() const

Return the opcode for this Instruction or ConstantExpr.

unsigned getMaxLMULForFixedLengthVectors() const

bool hasVInstructionsF64() const

unsigned getRealMinVLen() const

bool useRVVForFixedLengthVectors() const

bool hasConditionalMoveFusion() const

bool hasVInstructionsF16() const

bool hasVInstructions() const

std::optional< unsigned > getRealVLen() const

bool hasOptimizedSegmentLoadStore(unsigned NF) const

unsigned getRealMaxVLen() const

bool hasVInstructionsF32() const

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const

bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)

bool canSplatOperand(Instruction *I, int Operand) const

Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)

bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)

See if I should be considered for address type promotion.

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)

std::optional< unsigned > getVScaleForTuning() const

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)

TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const

std::optional< unsigned > getMaxVScale() const

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})

InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)

TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)

bool shouldExpandReduction(const IntrinsicInst *II) const

InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)

InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)

Return the cost of materializing an immediate for a value operand of a store instruction.

bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)

bool isLegalStridedLoadStore(Type *DataType, Align Alignment)

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

unsigned getRegUsageForType(Type *Ty)

bool isLegalMaskedGather(Type *DataType, Align Alignment)

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

bool isLegalMaskedScatter(Type *DataType, Align Alignment)

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const

InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)

bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const

InstructionCost getVRGatherVVCost(MVT VT) const

Return the cost of a vrgather.vv instruction for the type VT.

InstructionCost getVRGatherVICost(MVT VT) const

Return the cost of a vrgather.vi (or vx) instruction for the type VT.

static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)

bool shouldExpandCttzElements(EVT VT) const override

Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...

InstructionCost getLMULCost(MVT VT) const

Return the cost of LMUL for linear operations.

InstructionCost getVSlideVICost(MVT VT) const

Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.

MVT getContainerForFixedLengthVector(MVT VT) const

InstructionCost getVSlideVXCost(MVT VT) const

Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.

static RISCVII::VLMUL getLMUL(MVT VT)

bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const

Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.

The main scalar evolution driver.

static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)

Return true if the mask interleaves one or more input vectors together.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

int InstructionOpcodeToISD(unsigned Opcode) const

Get the ISD node that corresponds to the Instruction class opcode.

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

bool isOperationCustom(unsigned Op, EVT VT) const

Return true if the operation uses custom lowering, regardless of whether the type is legal or not.

unsigned getMaxExpandSizeMemcmp(bool OptSize) const

Get maximum # of load operations permitted for memcmp.

LegalizeAction getOperationAction(unsigned Op, EVT VT) const

Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...

MVT getTypeToPromoteTo(unsigned Op, MVT VT) const

If the action for this operation is to promote, this method returns the ValueType to promote to.

const DataLayout & getDataLayout() const

TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const

bool isLoweredToCall(const Function *F) const

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

@ TCK_Latency

The latency of instruction.

static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)

A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...

PopcntSupportKind

Flags indicating the kind of support for population count.

@ TCC_Free

Expected to fold away in lowering.

@ TCC_Basic

The cost of a typical 'add' instruction.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_InsertSubvector

InsertSubvector. Index indicates start offset.

@ SK_Select

Selects elements from the corresponding lane of either source operand.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Transpose

Transpose two vectors.

@ SK_Splice

Concatenates elements from the first input vector with elements of the second input vector.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_PermuteTwoSrc

Merge elements from two source vectors into one with any shuffle mask.

@ SK_Reverse

Reverse the order of the vector.

@ SK_ExtractSubvector

ExtractSubvector Index indicates start offset.

CastContextHint

Represents a hint about the context in which a cast is used.

@ None

The cast is not used with a load/store of any kind.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

static constexpr TypeSize getScalable(ScalarTy MinimumSize)

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

static IntegerType * getInt1Ty(LLVMContext &C)

bool isBFloatTy() const

Return true if this is 'bfloat', a 16-bit bfloat type.

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

Type * getWithNewBitWidth(unsigned NewBitWidth) const

Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

static IntegerType * getInt64Ty(LLVMContext &C)

static Type * getFloatTy(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

bool isFPOrFPVectorTy() const

Return true if this is a FP type or a vector of FP.

TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

A Use represents the edge between a Value definition and its users.

Value * getOperand(unsigned i) const

static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)

LLVM Value Representation.

Base class of all SIMD vector types.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

static VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

Type * getElementType() const

constexpr ScalarTy getFixedValue() const

static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

@ DELETED_NODE

DELETED_NODE - This is an illegal value that is used to catch errors.

@ ADD

Simple integer binary arithmetic operators.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ FADD

Simple binary floating point operators.

@ SIGN_EXTEND

Conversion operators.

@ FNEG

Perform various unary floating-point operations inspired by libm.

@ MULHU

MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...

@ SHL

Shift and rotation operations.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ FP_EXTEND

X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

bool match(Val *V, const Pattern &P)

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

auto m_Undef()

Match an arbitrary undef constant.

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)

static constexpr unsigned RVVBitsPerBlock

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

unsigned Log2_32_Ceil(uint32_t Value)

Return the ceil log base 2 of the specified value, 32 if the value is zero.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)

Find in cost table.

bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)

Returns true if Name is applied to TheLoop and enabled.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

int countr_zero(T Val)

Count number of 0's from the least significant bit to the most stopping at the first 1.

constexpr bool isShiftedMask_64(uint64_t Value)

Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...

OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)

Wrapper function around std::transform to apply a function to a range and store the result elsewhere.

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)

Create a stride shuffle mask.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

constexpr int PoisonMaskElem

constexpr T divideCeil(U Numerator, V Denominator)

Returns the integer ceil(Numerator / Denominator).

llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)

Create an interleave shuffle mask.

DWARFExpression::Operation Op

bool equal(L &&LRange, R &&RRange)

Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.

T bit_floor(T Value)

Returns the largest integral power of two no greater than Value if Value is nonzero.

This struct is a compact representation of a valid (non-zero power of two) alignment.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

unsigned Insns

TODO: Some of these could be merged.

Returns options for expansion of memcmp. IsZeroCmp is.

Describe known properties for a set of pointers.

Parameters that control the generic loop unrolling transformation.

bool UpperBound

Allow using trip count upper bound to unroll loops.

bool Force

Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...

unsigned OptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).