LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

23#include "llvm/IR/IntrinsicsAArch64.h"

29#include

30#include

31using namespace llvm;

33

34#define DEBUG_TYPE "aarch64tti"

35

38

40 "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden);

41

44

47

50

54

58 "Penalty of calling a function that requires a change to PSTATE.SM"));

59

62 cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));

63

66

69

70

73 cl::desc("The cost of a histcnt instruction"));

74

77 cl::desc("The number of instructions to search for a redundant dmb"));

78

79namespace {

80class TailFoldingOption {

81

82

83

84

85

86

87

88

92

93

94

95 bool NeedsDefault = true;

96

97 void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }

98

99 void setNeedsDefault(bool V) { NeedsDefault = V; }

100

102 EnableBits |= Bit;

103 DisableBits &= ~Bit;

104 }

105

107 EnableBits &= ~Bit;

108 DisableBits |= Bit;

109 }

110

113

114 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&

115 "Initial bits should only include one of "

116 "(disabled|all|simple|default)");

117 Bits = NeedsDefault ? DefaultBits : InitialBits;

118 Bits |= EnableBits;

119 Bits &= ~DisableBits;

120

122 }

123

125 errs() << "invalid argument '" << Opt

126 << "' to -sve-tail-folding=; the option should be of the form\n"

127 " (disabled|all|default|simple)[+(reductions|recurrences"

128 "|reverse|noreductions|norecurrences|noreverse)]\n";

130 }

131

132public:

133

134 void operator=(const std::string &Val) {

135

136 if (Val.empty()) {

138 return;

139 }

140

141

142

143 setNeedsDefault(false);

144

147

148 unsigned StartIdx = 1;

149 if (TailFoldTypes[0] == "disabled")

150 setInitialBits(TailFoldingOpts::Disabled);

151 else if (TailFoldTypes[0] == "all")

152 setInitialBits(TailFoldingOpts::All);

153 else if (TailFoldTypes[0] == "default")

154 setNeedsDefault(true);

155 else if (TailFoldTypes[0] == "simple")

156 setInitialBits(TailFoldingOpts::Simple);

157 else {

158 StartIdx = 0;

159 setInitialBits(TailFoldingOpts::Disabled);

160 }

161

162 for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {

163 if (TailFoldTypes[I] == "reductions")

164 setEnableBit(TailFoldingOpts::Reductions);

165 else if (TailFoldTypes[I] == "recurrences")

166 setEnableBit(TailFoldingOpts::Recurrences);

167 else if (TailFoldTypes[I] == "reverse")

168 setEnableBit(TailFoldingOpts::Reverse);

169 else if (TailFoldTypes[I] == "noreductions")

170 setDisableBit(TailFoldingOpts::Reductions);

171 else if (TailFoldTypes[I] == "norecurrences")

172 setDisableBit(TailFoldingOpts::Recurrences);

173 else if (TailFoldTypes[I] == "noreverse")

174 setDisableBit(TailFoldingOpts::Reverse);

175 else

177 }

178 }

179

182 }

183};

184}

185

187

189 "sve-tail-folding",

191 "Control the use of vectorisation using tail-folding for SVE where the"

192 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"

193 "\ndisabled (Initial) No loop types will vectorize using "

194 "tail-folding"

195 "\ndefault (Initial) Uses the default tail-folding settings for "

196 "the target CPU"

197 "\nall (Initial) All legal loop types will vectorize using "

198 "tail-folding"

199 "\nsimple (Initial) Use tail-folding for simple loops (not "

200 "reductions or recurrences)"

201 "\nreductions Use tail-folding for loops containing reductions"

202 "\nnoreductions Inverse of above"

203 "\nrecurrences Use tail-folding for loops containing fixed order "

204 "recurrences"

205 "\nnorecurrences Inverse of above"

206 "\nreverse Use tail-folding for loops requiring reversed "

207 "predicates"

208 "\nnoreverse Inverse of above"),

210

211

212

213

215 "enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);

216

217

218

219

221 "enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);

222

226 .Case("__arm_sme_state", true)

227 .Case("__arm_tpidr2_save", true)

228 .Case("__arm_tpidr2_restore", true)

229 .Case("__arm_za_disable", true)

231}

232

233

234

235

239

240

241

242

243 if (isa(I) && I.isDebugOrPseudoInst() &&

244 (cast(I).isInlineAsm() || isa(I) ||

246 return true;

247 }

248 }

249 return false;

250}

251

255 StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();

257 FeatureStr.split(Features, ",");

259}

260

262 return F.hasFnAttribute("fmv-features");

263}

264

266 const Function *Callee) const {

267 SMEAttrs CallerAttrs(*Caller), CalleeAttrs(*Callee);

268

269

270

274 }

275

277 return false;

278

279 if (CallerAttrs.requiresLazySave(CalleeAttrs) ||

280 CallerAttrs.requiresSMChange(CalleeAttrs) ||

281 CallerAttrs.requiresPreservingZT0(CalleeAttrs) ||

282 CallerAttrs.requiresPreservingAllZAState(CalleeAttrs)) {

284 return false;

285 }

286

288}

289

294 return false;

295

296

297

298

299

300

301

302

303

305 auto FVTy = dyn_cast(Ty);

306 return FVTy &&

307 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;

308 }))

309 return false;

310

311 return true;

312}

313

314unsigned

316 unsigned DefaultCallPenalty) const {

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

339 if (F == Call.getCaller())

343 }

344

345 return DefaultCallPenalty;

346}

347

353}

354

355

356

357

359

361 return 0;

362

363 if (Val < 0)

364 Val = ~Val;

365

366

369 return Insn.size();

370}

371

372

376

378 if (BitSize == 0)

379 return ~0U;

380

381

382 APInt ImmVal = Imm;

383 if (BitSize & 0x3f)

384 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);

385

386

387

389 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {

393 }

394

395 return std::max(1, Cost);

396}

397

403

405

406

407 if (BitSize == 0)

409

410 unsigned ImmIdx = ~0U;

411 switch (Opcode) {

412 default:

414 case Instruction::GetElementPtr:

415

416 if (Idx == 0)

419 case Instruction::Store:

420 ImmIdx = 0;

421 break;

422 case Instruction::Add:

423 case Instruction::Sub:

424 case Instruction::Mul:

425 case Instruction::UDiv:

426 case Instruction::SDiv:

427 case Instruction::URem:

428 case Instruction::SRem:

429 case Instruction::And:

430 case Instruction::Or:

431 case Instruction::Xor:

432 case Instruction::ICmp:

433 ImmIdx = 1;

434 break;

435

436 case Instruction::Shl:

437 case Instruction::LShr:

438 case Instruction::AShr:

439 if (Idx == 1)

441 break;

442 case Instruction::Trunc:

443 case Instruction::ZExt:

444 case Instruction::SExt:

445 case Instruction::IntToPtr:

446 case Instruction::PtrToInt:

447 case Instruction::BitCast:

448 case Instruction::PHI:

449 case Instruction::Call:

450 case Instruction::Select:

451 case Instruction::Ret:

452 case Instruction::Load:

453 break;

454 }

455

456 if (Idx == ImmIdx) {

457 int NumConstants = (BitSize + 63) / 64;

462 }

464}

465

471

473

474

475 if (BitSize == 0)

477

478

479

480

481 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)

483

484 switch (IID) {

485 default:

487 case Intrinsic::sadd_with_overflow:

488 case Intrinsic::uadd_with_overflow:

489 case Intrinsic::ssub_with_overflow:

490 case Intrinsic::usub_with_overflow:

491 case Intrinsic::smul_with_overflow:

492 case Intrinsic::umul_with_overflow:

493 if (Idx == 1) {

494 int NumConstants = (BitSize + 63) / 64;

499 }

500 break;

501 case Intrinsic::experimental_stackmap:

502 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

504 break;

505 case Intrinsic::experimental_patchpoint_void:

506 case Intrinsic::experimental_patchpoint:

507 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

509 break;

510 case Intrinsic::experimental_gc_statepoint:

511 if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

513 break;

514 }

516}

517

521 if (TyWidth == 32 || TyWidth == 64)

523

525}

526

530}

531

533 Type *BucketPtrsTy = ICA.getArgTypes()[0];

534 Type *EltTy = ICA.getArgTypes()[1];

535 unsigned TotalHistCnts = 1;

536

538

541

542

543

544 if (VectorType *VTy = dyn_cast(BucketPtrsTy)) {

545 unsigned EC = VTy->getElementCount().getKnownMinValue();

548

549

550 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;

551

552 if (EC == 2 || (LegalEltSize == 32 && EC == 4))

554

556 TotalHistCnts = EC / NaturalVectorWidth;

557 }

558

560}

561

565

566

567

568

570 if (auto *VTy = dyn_cast(RetTy))

573

574 switch (ICA.getID()) {

575 case Intrinsic::experimental_vector_histogram_add:

576 if (!ST->hasSVE2())

579 case Intrinsic::umin:

580 case Intrinsic::umax:

581 case Intrinsic::smin:

582 case Intrinsic::smax: {

583 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,

584 MVT::v8i16, MVT::v2i32, MVT::v4i32,

585 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,

586 MVT::nxv2i64};

588

589 if (LT.second == MVT::v2i64)

590 return LT.first * 2;

591 if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))

592 return LT.first;

593 break;

594 }

595 case Intrinsic::sadd_sat:

596 case Intrinsic::ssub_sat:

597 case Intrinsic::uadd_sat:

598 case Intrinsic::usub_sat: {

599 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,

600 MVT::v8i16, MVT::v2i32, MVT::v4i32,

601 MVT::v2i64};

603

604

605 unsigned Instrs =

606 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;

607 if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))

608 return LT.first * Instrs;

609 break;

610 }

611 case Intrinsic::abs: {

612 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,

613 MVT::v8i16, MVT::v2i32, MVT::v4i32,

614 MVT::v2i64};

616 if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }))

617 return LT.first;

618 break;

619 }

620 case Intrinsic::bswap: {

621 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,

622 MVT::v4i32, MVT::v2i64};

624 if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }) &&

625 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())

626 return LT.first;

627 break;

628 }

629 case Intrinsic::stepvector: {

632

633

634 if (LT.first > 1) {

638 Cost += AddCost * (LT.first - 1);

639 }

641 }

642 case Intrinsic::vector_extract:

643 case Intrinsic::vector_insert: {

644

645

646

647

648

649

652 break;

653

656 bool IsExtract = ICA.getID() == Intrinsic::vector_extract;

659

660

662 break;

663

673 break;

674 }

675 case Intrinsic::bitreverse: {

676 static const CostTblEntry BitreverseTbl[] = {

677 {Intrinsic::bitreverse, MVT::i32, 1},

678 {Intrinsic::bitreverse, MVT::i64, 1},

679 {Intrinsic::bitreverse, MVT::v8i8, 1},

680 {Intrinsic::bitreverse, MVT::v16i8, 1},

681 {Intrinsic::bitreverse, MVT::v4i16, 2},

682 {Intrinsic::bitreverse, MVT::v8i16, 2},

683 {Intrinsic::bitreverse, MVT::v2i32, 2},

684 {Intrinsic::bitreverse, MVT::v4i32, 2},

685 {Intrinsic::bitreverse, MVT::v1i64, 2},

686 {Intrinsic::bitreverse, MVT::v2i64, 2},

687 };

689 const auto *Entry =

691 if (Entry) {

692

693

696 return LegalisationCost.first * Entry->Cost + 1;

697

698 return LegalisationCost.first * Entry->Cost;

699 }

700 break;

701 }

702 case Intrinsic::ctpop: {

703 if (!ST->hasNEON()) {

704

706 }

717 };

719 MVT MTy = LT.second;

721

722

724 RetTy->getScalarSizeInBits()

725 ? 1

726 : 0;

727 return LT.first * Entry->Cost + ExtraCost;

728 }

729 break;

730 }

731 case Intrinsic::sadd_with_overflow:

732 case Intrinsic::uadd_with_overflow:

733 case Intrinsic::ssub_with_overflow:

734 case Intrinsic::usub_with_overflow:

735 case Intrinsic::smul_with_overflow:

736 case Intrinsic::umul_with_overflow: {

737 static const CostTblEntry WithOverflowCostTbl[] = {

738 {Intrinsic::sadd_with_overflow, MVT::i8, 3},

739 {Intrinsic::uadd_with_overflow, MVT::i8, 3},

740 {Intrinsic::sadd_with_overflow, MVT::i16, 3},

741 {Intrinsic::uadd_with_overflow, MVT::i16, 3},

742 {Intrinsic::sadd_with_overflow, MVT::i32, 1},

743 {Intrinsic::uadd_with_overflow, MVT::i32, 1},

744 {Intrinsic::sadd_with_overflow, MVT::i64, 1},

745 {Intrinsic::uadd_with_overflow, MVT::i64, 1},

746 {Intrinsic::ssub_with_overflow, MVT::i8, 3},

747 {Intrinsic::usub_with_overflow, MVT::i8, 3},

748 {Intrinsic::ssub_with_overflow, MVT::i16, 3},

749 {Intrinsic::usub_with_overflow, MVT::i16, 3},

750 {Intrinsic::ssub_with_overflow, MVT::i32, 1},

751 {Intrinsic::usub_with_overflow, MVT::i32, 1},

752 {Intrinsic::ssub_with_overflow, MVT::i64, 1},

753 {Intrinsic::usub_with_overflow, MVT::i64, 1},

754 {Intrinsic::smul_with_overflow, MVT::i8, 5},

755 {Intrinsic::umul_with_overflow, MVT::i8, 4},

756 {Intrinsic::smul_with_overflow, MVT::i16, 5},

757 {Intrinsic::umul_with_overflow, MVT::i16, 4},

758 {Intrinsic::smul_with_overflow, MVT::i32, 2},

759 {Intrinsic::umul_with_overflow, MVT::i32, 2},

760 {Intrinsic::smul_with_overflow, MVT::i64, 3},

761 {Intrinsic::umul_with_overflow, MVT::i64, 3},

762 };

767 return Entry->Cost;

768 break;

769 }

770 case Intrinsic::fptosi_sat:

771 case Intrinsic::fptoui_sat: {

773 break;

774 bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;

777

778

779 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||

780 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||

781 LT.second == MVT::v2f64)) {

783 (LT.second == MVT::f64 && MTy == MVT::i32) ||

784 (LT.second == MVT::f32 && MTy == MVT::i64)))

785 return LT.first;

786

787 if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() &&

790 }

791

792

793 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())

800 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||

801 (LT.second == MVT::f16 && MTy == MVT::i64) ||

802 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&

804 return LT.first;

805

809

810

811 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&

814

815

816 if ((LT.second.getScalarType() == MVT::f32 ||

817 LT.second.getScalarType() == MVT::f64 ||

818 LT.second.getScalarType() == MVT::f16) &&

820 Type *LegalTy =

822 if (LT.second.isVector())

823 LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());

826 LegalTy, {LegalTy, LegalTy});

829 LegalTy, {LegalTy, LegalTy});

831 return LT.first * Cost +

832 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0

833 : 1);

834 }

835

836

839 if (LT.second.isVector()) {

840 FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount());

842 }

848 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,

850 if (IsSigned) {

851 Type *CondTy = RetTy->getWithNewBitWidth(1);

856 }

857 return LT.first * Cost;

858 }

859 case Intrinsic::fshl:

860 case Intrinsic::fshr: {

862 break;

863

864

867 break;

868

871

873 {Intrinsic::fshl, MVT::v4i32, 3},

874 {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},

875 {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},

876 {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};

877

878

879 const auto *Entry =

880 CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);

881 if (Entry)

882 return LegalisationCost.first * Entry->Cost;

883 }

884

886 if (RetTy->isIntegerTy())

887 break;

888

889

890

891 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&

892 RetTy->getScalarSizeInBits() < 64) ||

893 (RetTy->getScalarSizeInBits() % 64 != 0);

894 unsigned ExtraCost = HigherCost ? 1 : 0;

895 if (RetTy->getScalarSizeInBits() == 32 ||

896 RetTy->getScalarSizeInBits() == 64)

897 ExtraCost = 0;

898

899 else if (HigherCost)

900 ExtraCost = 1;

901 else

902 break;

903 return TyL.first + ExtraCost;

904 }

905 case Intrinsic::get_active_lane_mask: {

910 if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&

912

913

914

915

916

917

918

919

920

921 return RetTy->getNumElements() * 2;

922 }

923 }

924 break;

925 }

926 case Intrinsic::experimental_vector_match: {

927 auto *NeedleTy = cast(ICA.getArgTypes()[1]);

929 unsigned SearchSize = NeedleTy->getNumElements();

930 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {

931

932

933

934

935

937 if (isa(RetTy))

940 }

941 break;

942 }

943 default:

944 break;

945 }

947}

948

949

950

954 auto RequiredType = II.getType();

955

956 auto *PN = dyn_cast(II.getArgOperand(0));

957 assert(PN && "Expected Phi Node!");

958

959

960 if (!PN->hasOneUse())

961 return std::nullopt;

962

963 for (Value *IncValPhi : PN->incoming_values()) {

964 auto *Reinterpret = dyn_cast(IncValPhi);

965 if (!Reinterpret ||

966 Reinterpret->getIntrinsicID() !=

967 Intrinsic::aarch64_sve_convert_to_svbool ||

968 RequiredType != Reinterpret->getArgOperand(0)->getType())

969 return std::nullopt;

970 }

971

972

976

977 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {

978 auto *Reinterpret = cast(PN->getIncomingValue(I));

979 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));

981 }

982

983

985}

986

987

988

989

990

991

992

993

994

995

996static std::optional<Instruction *>

998 auto BinOp = dyn_cast(II.getOperand(0));

999 if (!BinOp)

1000 return std::nullopt;

1001

1002 auto IntrinsicID = BinOp->getIntrinsicID();

1003 switch (IntrinsicID) {

1004 case Intrinsic::aarch64_sve_and_z:

1005 case Intrinsic::aarch64_sve_bic_z:

1006 case Intrinsic::aarch64_sve_eor_z:

1007 case Intrinsic::aarch64_sve_nand_z:

1008 case Intrinsic::aarch64_sve_nor_z:

1009 case Intrinsic::aarch64_sve_orn_z:

1010 case Intrinsic::aarch64_sve_orr_z:

1011 break;

1012 default:

1013 return std::nullopt;

1014 }

1015

1016 auto BinOpPred = BinOp->getOperand(0);

1017 auto BinOpOp1 = BinOp->getOperand(1);

1018 auto BinOpOp2 = BinOp->getOperand(2);

1019

1020 auto PredIntr = dyn_cast(BinOpPred);

1021 if (!PredIntr ||

1022 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)

1023 return std::nullopt;

1024

1025 auto PredOp = PredIntr->getOperand(0);

1026 auto PredOpTy = cast(PredOp->getType());

1027 if (PredOpTy != II.getType())

1028 return std::nullopt;

1029

1032 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});

1033 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);

1034 if (BinOpOp1 == BinOpOp2)

1035 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);

1036 else

1038 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));

1039

1040 auto NarrowedBinOp =

1043}

1044

1045static std::optional<Instruction *>

1047

1048 if (isa(II.getArgOperand(0)))

1050

1052 return BinOpCombine;

1053

1054

1055 if (isa(II.getArgOperand(0)->getType()) ||

1056 isa(II.getType()))

1057 return std::nullopt;

1058

1060 Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;

1061

1062 const auto *IVTy = cast(II.getType());

1063

1064

1065 while (Cursor) {

1066

1067

1068 const auto *CursorVTy = cast(Cursor->getType());

1069 if (CursorVTy->getElementCount().getKnownMinValue() <

1070 IVTy->getElementCount().getKnownMinValue())

1071 break;

1072

1073

1074 if (Cursor->getType() == IVTy)

1075 EarliestReplacement = Cursor;

1076

1077 auto *IntrinsicCursor = dyn_cast(Cursor);

1078

1079

1080 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==

1081 Intrinsic::aarch64_sve_convert_to_svbool ||

1082 IntrinsicCursor->getIntrinsicID() ==

1083 Intrinsic::aarch64_sve_convert_from_svbool))

1084 break;

1085

1086 CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);

1087 Cursor = IntrinsicCursor->getOperand(0);

1088 }

1089

1090

1091

1092 if (!EarliestReplacement)

1093 return std::nullopt;

1094

1096}

1097

1099

1100 Value *UncastedPred;

1101 if (match(Pred, m_IntrinsicIntrinsic::aarch64\_sve\_convert\_from\_svbool(

1102 m_IntrinsicIntrinsic::aarch64\_sve\_convert\_to\_svbool(

1103 m_Value(UncastedPred)))))

1104

1105

1106 if (cast(Pred->getType())->getMinNumElements() <=

1107 cast(UncastedPred->getType())->getMinNumElements())

1108 Pred = UncastedPred;

1109

1110 return match(Pred, m_IntrinsicIntrinsic::aarch64\_sve\_ptrue(

1111 m_ConstantIntAArch64SVEPredPattern::all()));

1112}

1113

1114

1115

1116static std::optional<Instruction *>

1118 bool hasInactiveVector) {

1119 int PredOperand = hasInactiveVector ? 1 : 0;

1120 int ReplaceOperand = hasInactiveVector ? 0 : 1;

1124 }

1125 return std::nullopt;

1126}

1127

1128

1129

1130static std::optional<Instruction *>

1133 !isallvm::UndefValue(II.getOperand(0)) &&

1134 !isallvm::PoisonValue(II.getOperand(0))) {

1137 }

1139}

1140

1141

1142static std::optional<Instruction *>

1144 int PredPos) {

1147 }

1148 return std::nullopt;

1149}

1150

1151

1152

1153static std::optional<Instruction *>

1158 if (RetTy->isStructTy()) {

1159 auto StructT = cast(RetTy);

1160 auto VecT = StructT->getElementType(0);

1162 for (unsigned i = 0; i < StructT->getNumElements(); i++) {

1163 ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0)

1164 : ConstantInt::get(VecT, 0));

1165 }

1167 } else

1168 Node = RetTy->isFPOrFPVectorTy() ? ConstantFP::get(RetTy, 0.0)

1169 : ConstantInt::get(II.getType(), 0);

1170

1173 }

1174 return std::nullopt;

1175}

1176

1179

1180 auto *OpPredicate = II.getOperand(0);

1183

1187}

1188

1191 IntrinsicInst *Pg = dyn_cast(II.getArgOperand(1));

1192 if (!Pg)

1193 return std::nullopt;

1194

1195 if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)

1196 return std::nullopt;

1197

1198 const auto PTruePattern =

1199 cast(Pg->getOperand(0))->getZExtValue();

1200 if (PTruePattern != AArch64SVEPredPattern::vl1)

1201 return std::nullopt;

1202

1203

1206 II.getArgOperand(0), II.getArgOperand(2), ConstantInt::get(IdxTy, 0));

1207 Insert->insertBefore(&II);

1208 Insert->takeName(&II);

1209

1211}

1212

1215

1216 auto *RetTy = cast(II.getType());

1218 II.getArgOperand(0));

1221}

1222

1226

1227

1229 return II_NA;

1230

1231

1232 auto *Pg = dyn_cast(II.getArgOperand(0));

1233 if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)

1234 return std::nullopt;

1235

1236 const auto PTruePattern =

1237 cast(Pg->getOperand(0))->getZExtValue();

1238 if (PTruePattern != AArch64SVEPredPattern::all)

1239 return std::nullopt;

1240

1241

1242 auto *SplatValue =

1243 dyn_cast_or_null(getSplatValue(II.getArgOperand(2)));

1244 if (!SplatValue || !SplatValue->isZero())

1245 return std::nullopt;

1246

1247

1248 auto *DupQLane = dyn_cast(II.getArgOperand(1));

1249 if (!DupQLane ||

1250 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)

1251 return std::nullopt;

1252

1253

1254 auto *DupQLaneIdx = dyn_cast(DupQLane->getArgOperand(1));

1255 if (!DupQLaneIdx || !DupQLaneIdx->isZero())

1256 return std::nullopt;

1257

1258 auto *VecIns = dyn_cast(DupQLane->getArgOperand(0));

1259 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)

1260 return std::nullopt;

1261

1262

1263

1264 if (!isa(VecIns->getArgOperand(0)))

1265 return std::nullopt;

1266

1267 if (!cast(VecIns->getArgOperand(2))->isZero())

1268 return std::nullopt;

1269

1270 auto *ConstVec = dyn_cast(VecIns->getArgOperand(1));

1271 if (!ConstVec)

1272 return std::nullopt;

1273

1274 auto *VecTy = dyn_cast(ConstVec->getType());

1275 auto *OutTy = dyn_cast(II.getType());

1276 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())

1277 return std::nullopt;

1278

1279 unsigned NumElts = VecTy->getNumElements();

1280 unsigned PredicateBits = 0;

1281

1282

1283 for (unsigned I = 0; I < NumElts; ++I) {

1284 auto *Arg = dyn_cast(ConstVec->getAggregateElement(I));

1285 if (!Arg)

1286 return std::nullopt;

1287 if (!Arg->isZero())

1288 PredicateBits |= 1 << (I * (16 / NumElts));

1289 }

1290

1291

1292 if (PredicateBits == 0) {

1294 PFalse->takeName(&II);

1296 }

1297

1298

1299 unsigned Mask = 8;

1300 for (unsigned I = 0; I < 16; ++I)

1301 if ((PredicateBits & (1 << I)) != 0)

1302 Mask |= (I % 8);

1303

1304 unsigned PredSize = Mask & -Mask;

1307

1308

1309 for (unsigned I = 0; I < 16; I += PredSize)

1310 if ((PredicateBits & (1 << I)) == 0)

1311 return std::nullopt;

1312

1313 auto *PTruePat =

1314 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);

1316 {PredType}, {PTruePat});

1318 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});

1319 auto *ConvertFromSVBool =

1321 {II.getType()}, {ConvertToSVBool});

1322

1325}

1326

1329 Value *Pg = II.getArgOperand(0);

1330 Value *Vec = II.getArgOperand(1);

1331 auto IntrinsicID = II.getIntrinsicID();

1332 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;

1333

1334

1337

1338

1339

1343 auto *OldBinOp = cast(Vec);

1344 auto OpC = OldBinOp->getOpcode();

1345 auto *NewLHS =

1347 auto *NewRHS =

1350 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), II.getIterator());

1352 }

1353 }

1354

1355 auto *C = dyn_cast(Pg);

1356 if (IsAfter && C && C->isNullValue()) {

1357

1360 Extract->insertBefore(&II);

1361 Extract->takeName(&II);

1363 }

1364

1365 auto *IntrPG = dyn_cast(Pg);

1366 if (!IntrPG)

1367 return std::nullopt;

1368

1369 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)

1370 return std::nullopt;

1371

1372 const auto PTruePattern =

1373 cast(IntrPG->getOperand(0))->getZExtValue();

1374

1375

1377 if (!MinNumElts)

1378 return std::nullopt;

1379

1380 unsigned Idx = MinNumElts - 1;

1381

1382

1383 if (IsAfter)

1385

1386

1387

1388

1389 auto *PgVTy = cast(Pg->getType());

1390 if (Idx >= PgVTy->getMinNumElements())

1391 return std::nullopt;

1392

1393

1396 Extract->insertBefore(&II);

1397 Extract->takeName(&II);

1399}

1400

1403

1404

1405

1406

1407

1408

1409

1410 Value *Pg = II.getArgOperand(0);

1412 Value *Vec = II.getArgOperand(2);

1413 Type *Ty = II.getType();

1414

1416 return std::nullopt;

1417

1419 switch (cast(Ty)->getBitWidth()) {

1420 default:

1421 return std::nullopt;

1422 case 16:

1424 break;

1425 case 32:

1427 break;

1428 case 64:

1430 break;

1431 }

1432

1435 FPTy, cast(Vec->getType())->getElementCount());

1438 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});

1441}

1442

1446

1447

1448 auto *AllPat =

1449 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);

1451 {II.getType()}, {AllPat});

1452 auto *RDFFR =

1456}

1457

1458static std::optional<Instruction *>

1460 const auto Pattern = cast(II.getArgOperand(0))->getZExtValue();

1461

1462 if (Pattern == AArch64SVEPredPattern::all) {

1463 Constant *StepVal = ConstantInt::get(II.getType(), NumElts);

1467 }

1468

1470

1471 return MinNumElts && NumElts >= MinNumElts

1473 II, ConstantInt::get(II.getType(), MinNumElts)))

1474 : std::nullopt;

1475}

1476

1479 Value *PgVal = II.getArgOperand(0);

1480 Value *OpVal = II.getArgOperand(1);

1481

1482

1483

1484 if (PgVal == OpVal &&

1485 (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||

1486 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {

1487 Value *Ops[] = {PgVal, OpVal};

1489

1490 auto *PTest =

1493

1495 }

1496

1497 IntrinsicInst *Pg = dyn_cast(PgVal);

1499

1500 if (!Pg || Op)

1501 return std::nullopt;

1502

1504

1505 if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&

1506 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&

1510

1512

1515 }

1516

1517

1518

1519

1520 if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&

1521 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||

1522 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||

1523 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||

1524 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||

1525 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||

1526 (OpIID == Intrinsic::aarch64_sve_and_z) ||

1527 (OpIID == Intrinsic::aarch64_sve_bic_z) ||

1528 (OpIID == Intrinsic::aarch64_sve_eor_z) ||

1529 (OpIID == Intrinsic::aarch64_sve_nand_z) ||

1530 (OpIID == Intrinsic::aarch64_sve_nor_z) ||

1531 (OpIID == Intrinsic::aarch64_sve_orn_z) ||

1532 (OpIID == Intrinsic::aarch64_sve_orr_z))) {

1535

1538

1540 }

1541

1542 return std::nullopt;

1543}

1544

1545template <Intrinsic::ID MulOpc, typename Intrinsic::ID FuseOpc>

1546static std::optional<Instruction *>

1548 bool MergeIntoAddendOp) {

1549 Value *P = II.getOperand(0);

1550 Value *MulOp0, *MulOp1, *AddendOp, *Mul;

1551 if (MergeIntoAddendOp) {

1552 AddendOp = II.getOperand(1);

1553 Mul = II.getOperand(2);

1554 } else {

1555 AddendOp = II.getOperand(2);

1556 Mul = II.getOperand(1);

1557 }

1558

1561 return std::nullopt;

1562

1563 if (Mul->hasOneUse())

1564 return std::nullopt;

1565

1567 if (II.getType()->isFPOrFPVectorTy()) {

1569

1570

1571 if (FAddFlags != cast(Mul)->getFastMathFlags())

1572 return std::nullopt;

1574 return std::nullopt;

1576 }

1577

1579 if (MergeIntoAddendOp)

1581 {P, AddendOp, MulOp0, MulOp1}, FMFSource);

1582 else

1584 {P, MulOp0, MulOp1, AddendOp}, FMFSource);

1585

1587}

1588

1589static std::optional<Instruction *>

1591 Value *Pred = II.getOperand(0);

1592 Value *PtrOp = II.getOperand(1);

1593 Type *VecTy = II.getType();

1594

1595

1597 return II_NA;

1598

1601 Load->copyMetadata(II);

1603 }

1604

1610}

1611

1612static std::optional<Instruction *>

1614 Value *VecOp = II.getOperand(0);

1615 Value *Pred = II.getOperand(1);

1616 Value *PtrOp = II.getOperand(2);

1617

1620 Store->copyMetadata(II);

1622 }

1623

1628}

1629

1631 switch (Intrinsic) {

1632 case Intrinsic::aarch64_sve_fmul_u:

1633 return Instruction::BinaryOps::FMul;

1634 case Intrinsic::aarch64_sve_fadd_u:

1635 return Instruction::BinaryOps::FAdd;

1636 case Intrinsic::aarch64_sve_fsub_u:

1637 return Instruction::BinaryOps::FSub;

1638 default:

1639 return Instruction::BinaryOpsEnd;

1640 }

1641}

1642

1643static std::optional<Instruction *>

1645

1646 if (II.isStrictFP())

1647 return std::nullopt;

1648

1649 auto *OpPredicate = II.getOperand(0);

1651 if (BinOpCode == Instruction::BinaryOpsEnd ||

1652 match(OpPredicate, m_IntrinsicIntrinsic::aarch64\_sve\_ptrue(

1653 m_ConstantIntAArch64SVEPredPattern::all())))

1654 return std::nullopt;

1656 BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());

1658}

1659

1660

1661

1664 auto *OpPredicate = II.getOperand(0);

1665 if (match(OpPredicate, m_IntrinsicIntrinsic::aarch64\_sve\_ptrue(

1666 m_ConstantIntAArch64SVEPredPattern::all())))

1667 return std::nullopt;

1668

1669 auto *Mod = II.getModule();

1671 II.setCalledFunction(NewDecl);

1672

1673 return &II;

1674}

1675

1676

1677

1678static std::optional<Instruction *>

1682

1683

1685 }

1687}

1688

1691 if (auto II_U =

1693 return II_U;

1695 Intrinsic::aarch64_sve_mla>(

1696 IC, II, true))

1697 return MLA;

1699 Intrinsic::aarch64_sve_mad>(

1700 IC, II, false))

1701 return MAD;

1702 return std::nullopt;

1703}

1704

1705static std::optional<Instruction *>

1707 if (auto II_U =

1709 return II_U;

1710 if (auto FMLA =

1712 Intrinsic::aarch64_sve_fmla>(IC, II,

1713 true))

1714 return FMLA;

1715 if (auto FMAD =

1717 Intrinsic::aarch64_sve_fmad>(IC, II,

1718 false))

1719 return FMAD;

1720 if (auto FMLA =

1722 Intrinsic::aarch64_sve_fmla>(IC, II,

1723 true))

1724 return FMLA;

1725 return std::nullopt;

1726}

1727

1728static std::optional<Instruction *>

1730 if (auto FMLA =

1732 Intrinsic::aarch64_sve_fmla>(IC, II,

1733 true))

1734 return FMLA;

1735 if (auto FMAD =

1737 Intrinsic::aarch64_sve_fmad>(IC, II,

1738 false))

1739 return FMAD;

1740 if (auto FMLA_U =

1742 Intrinsic::aarch64_sve_fmla_u>(

1743 IC, II, true))

1744 return FMLA_U;

1746}

1747

1748static std::optional<Instruction *>

1750 if (auto II_U =

1752 return II_U;

1753 if (auto FMLS =

1755 Intrinsic::aarch64_sve_fmls>(IC, II,

1756 true))

1757 return FMLS;

1758 if (auto FMSB =

1760 Intrinsic::aarch64_sve_fnmsb>(

1761 IC, II, false))

1762 return FMSB;

1763 if (auto FMLS =

1765 Intrinsic::aarch64_sve_fmls>(IC, II,

1766 true))

1767 return FMLS;

1768 return std::nullopt;

1769}

1770

1771static std::optional<Instruction *>

1773 if (auto FMLS =

1775 Intrinsic::aarch64_sve_fmls>(IC, II,

1776 true))

1777 return FMLS;

1778 if (auto FMSB =

1780 Intrinsic::aarch64_sve_fnmsb>(

1781 IC, II, false))

1782 return FMSB;

1783 if (auto FMLS_U =

1785 Intrinsic::aarch64_sve_fmls_u>(

1786 IC, II, true))

1787 return FMLS_U;

1789}

1790

1793 if (auto II_U =

1795 return II_U;

1797 Intrinsic::aarch64_sve_mls>(

1798 IC, II, true))

1799 return MLS;

1800 return std::nullopt;

1801}

1802

1806 auto *OpPredicate = II.getOperand(0);

1807 auto *OpMultiplicand = II.getOperand(1);

1808 auto *OpMultiplier = II.getOperand(2);

1809

1810

1811 auto IsUnitSplat = [](auto *I) {

1813 if (!SplatValue)

1814 return false;

1816 };

1817

1818

1819

1820 auto IsUnitDup = [](auto *I) {

1821 auto *IntrI = dyn_cast(I);

1822 if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup)

1823 return false;

1824

1825 auto *SplatValue = IntrI->getOperand(2);

1827 };

1828

1829 if (IsUnitSplat(OpMultiplier)) {

1830

1831 OpMultiplicand->takeName(&II);

1833 } else if (IsUnitDup(OpMultiplier)) {

1834

1835 auto *DupInst = cast(OpMultiplier);

1836 auto *DupPg = DupInst->getOperand(1);

1837

1838

1839 if (OpPredicate == DupPg) {

1840 OpMultiplicand->takeName(&II);

1842 }

1843 }

1844

1846}

1847

1850 Value *UnpackArg = II.getArgOperand(0);

1851 auto *RetTy = cast(II.getType());

1852 bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||

1853 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;

1854

1855

1856

1857 if (auto *ScalarArg = getSplatValue(UnpackArg)) {

1858 ScalarArg =

1864 }

1865

1866 return std::nullopt;

1867}

1870 auto *OpVal = II.getOperand(0);

1871 auto *OpIndices = II.getOperand(1);

1872 VectorType *VTy = cast(II.getType());

1873

1874

1875

1876 auto *SplatValue = dyn_cast_or_null(getSplatValue(OpIndices));

1877 if (!SplatValue ||

1878 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))

1879 return std::nullopt;

1880

1881

1882

1884 auto *VectorSplat =

1886

1889}

1890

1895 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;

1896 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;

1897

1898

1899

1900 if ((match(II.getArgOperand(0),

1901 m_Intrinsic(m_Intrinsic(m_Value(A)))) &&

1902 match(II.getArgOperand(1),

1903 m_Intrinsic(m_Intrinsic(m_Value(B))))) ||

1904 (match(II.getArgOperand(0), m_Intrinsic(m_Value(A))) &&

1905 match(II.getArgOperand(1), m_Intrinsic(m_Value(B))))) {

1906 auto *TyA = cast(A->getType());

1907 if (TyA == B->getType() &&

1915 }

1916 }

1917

1918 return std::nullopt;

1919}

1920

1923

1924

1926 if (match(II.getArgOperand(0),

1927 m_IntrinsicIntrinsic::aarch64\_sve\_uzp1(m_Value(A), m_Value(B))) &&

1928 match(II.getArgOperand(1), m_IntrinsicIntrinsic::aarch64\_sve\_uzp2(

1931 II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));

1932

1933 return std::nullopt;

1934}

1935

1936static std::optional<Instruction *>

1938 Value *Mask = II.getOperand(0);

1939 Value *BasePtr = II.getOperand(1);

1940 Value *Index = II.getOperand(2);

1941 Type *Ty = II.getType();

1943

1944

1946 return II_NA;

1947

1948

1949

1950

1951 Value *IndexBase;

1952 if (match(Index, m_IntrinsicIntrinsic::aarch64\_sve\_index(

1954 Align Alignment =

1955 BasePtr->getPointerAlignment(II.getDataLayout());

1956

1959 BasePtr, IndexBase);

1965 }

1966

1967 return std::nullopt;

1968}

1969

1970static std::optional<Instruction *>

1972 Value *Val = II.getOperand(0);

1973 Value *Mask = II.getOperand(1);

1974 Value *BasePtr = II.getOperand(2);

1975 Value *Index = II.getOperand(3);

1977

1978

1979

1980

1981 Value *IndexBase;

1982 if (match(Index, m_IntrinsicIntrinsic::aarch64\_sve\_index(

1984 Align Alignment =

1985 BasePtr->getPointerAlignment(II.getDataLayout());

1986

1988 BasePtr, IndexBase);

1991

1993

1995 }

1996

1997 return std::nullopt;

1998}

1999

2003 Value *Pred = II.getOperand(0);

2004 Value *Vec = II.getOperand(1);

2005 Value *DivVec = II.getOperand(2);

2006

2008 ConstantInt *SplatConstantInt = dyn_cast_or_null(SplatValue);

2009 if (!SplatConstantInt)

2010 return std::nullopt;

2011

2013 const int64_t DivisorValue = Divisor.getSExtValue();

2014 if (DivisorValue == -1)

2015 return std::nullopt;

2016 if (DivisorValue == 1)

2018

2020 Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());

2022 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});

2024 }

2027 Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());

2029 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});

2031 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});

2033 }

2034

2035 return std::nullopt;

2036}

2037

2039 size_t VecSize = Vec.size();

2040 if (VecSize == 1)

2041 return true;

2043 return false;

2044 size_t HalfVecSize = VecSize / 2;

2045

2046 for (auto LHS = Vec.begin(), RHS = Vec.begin() + HalfVecSize;

2048 if (*LHS != nullptr && *RHS != nullptr) {

2050 continue;

2051 else

2052 return false;

2053 }

2054 if (!AllowPoison)

2055 return false;

2056 if (*LHS == nullptr && *RHS != nullptr)

2058 }

2059

2060 Vec.resize(HalfVecSize);

2062 return true;

2063}

2064

2065

2066

2069 Value *CurrentInsertElt = nullptr, *Default = nullptr;

2070 if (match(II.getOperand(0),

2071 m_IntrinsicIntrinsic::vector\_insert(

2073 !isa(CurrentInsertElt->getType()))

2074 return std::nullopt;

2075 auto IIScalableTy = cast(II.getType());

2076

2077

2079 while (auto InsertElt = dyn_cast(CurrentInsertElt)) {

2080 auto Idx = cast(InsertElt->getOperand(2));

2081 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);

2082 CurrentInsertElt = InsertElt->getOperand(0);

2083 }

2084

2085 bool AllowPoison =

2086 isa(CurrentInsertElt) && isa(Default);

2088 return std::nullopt;

2089

2090

2092 for (size_t I = 0; I < Elts.size(); I++) {

2093 if (Elts[I] == nullptr)

2094 continue;

2097 }

2098 if (InsertEltChain == nullptr)

2099 return std::nullopt;

2100

2101

2102

2103

2104

2105 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.size();

2106 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *

2107 IIScalableTy->getMinNumElements() /

2108 PatternWidth;

2109

2112 auto *WideShuffleMaskTy =

2114

2118 auto WideBitcast =

2122 WideBitcast, PoisonValue::get(WideScalableTy), WideShuffleMask);

2123 auto NarrowBitcast =

2125

2127}

2128

2131 Value *A = II.getArgOperand(0);

2132 Value *B = II.getArgOperand(1);

2133 if (A == B)

2135

2136 return std::nullopt;

2137}

2138

2141 Value *Pred = II.getOperand(0);

2142 Value *Vec = II.getOperand(1);

2143 Value *Shift = II.getOperand(2);

2144

2145

2146 Value *AbsPred, *MergedValue;

2147 if (match(Vec, m_IntrinsicIntrinsic::aarch64\_sve\_sqabs(

2149 match(Vec, m_IntrinsicIntrinsic::aarch64\_sve\_abs(

2151

2152 return std::nullopt;

2153

2154

2155

2156

2157

2158 if (!isa(MergedValue) && match(MergedValue, m_NonNegative()) &&

2160 return std::nullopt;

2161

2162

2163

2165 return std::nullopt;

2166

2168 {II.getType()}, {Pred, Vec, Shift});

2169

2171}

2172

2175 Value *Vec = II.getOperand(0);

2176

2179

2180 return std::nullopt;

2181}

2182

2185

2186 auto *NI = II.getNextNonDebugInstruction();

2189 return I->mayReadOrWriteMemory() && I->mayHaveSideEffects();

2190 };

2191 while (LookaheadThreshold-- && CanSkipOver(NI)) {

2192 auto *NIBB = NI->getParent();

2193 NI = NI->getNextNonDebugInstruction();

2194 if (!NI) {

2195 if (auto *SuccBB = NIBB->getUniqueSuccessor())

2196 NI = SuccBB->getFirstNonPHIOrDbgOrLifetime();

2197 else

2198 break;

2199 }

2200 }

2201 auto *NextII = dyn_cast_or_null(NI);

2202 if (NextII && II.isIdenticalTo(NextII))

2204

2205 return std::nullopt;

2206}

2207

2208std::optional<Instruction *>

2212 switch (IID) {

2213 default:

2214 break;

2215 case Intrinsic::aarch64_dmb:

2217 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:

2218 case Intrinsic::aarch64_sve_fcvt_f16f32:

2219 case Intrinsic::aarch64_sve_fcvt_f16f64:

2220 case Intrinsic::aarch64_sve_fcvt_f32f16:

2221 case Intrinsic::aarch64_sve_fcvt_f32f64:

2222 case Intrinsic::aarch64_sve_fcvt_f64f16:

2223 case Intrinsic::aarch64_sve_fcvt_f64f32:

2224 case Intrinsic::aarch64_sve_fcvtlt_f32f16:

2225 case Intrinsic::aarch64_sve_fcvtlt_f64f32:

2226 case Intrinsic::aarch64_sve_fcvtx_f32f64:

2227 case Intrinsic::aarch64_sve_fcvtzs:

2228 case Intrinsic::aarch64_sve_fcvtzs_i32f16:

2229 case Intrinsic::aarch64_sve_fcvtzs_i32f64:

2230 case Intrinsic::aarch64_sve_fcvtzs_i64f16:

2231 case Intrinsic::aarch64_sve_fcvtzs_i64f32:

2232 case Intrinsic::aarch64_sve_fcvtzu:

2233 case Intrinsic::aarch64_sve_fcvtzu_i32f16:

2234 case Intrinsic::aarch64_sve_fcvtzu_i32f64:

2235 case Intrinsic::aarch64_sve_fcvtzu_i64f16:

2236 case Intrinsic::aarch64_sve_fcvtzu_i64f32:

2237 case Intrinsic::aarch64_sve_scvtf:

2238 case Intrinsic::aarch64_sve_scvtf_f16i32:

2239 case Intrinsic::aarch64_sve_scvtf_f16i64:

2240 case Intrinsic::aarch64_sve_scvtf_f32i64:

2241 case Intrinsic::aarch64_sve_scvtf_f64i32:

2242 case Intrinsic::aarch64_sve_ucvtf:

2243 case Intrinsic::aarch64_sve_ucvtf_f16i32:

2244 case Intrinsic::aarch64_sve_ucvtf_f16i64:

2245 case Intrinsic::aarch64_sve_ucvtf_f32i64:

2246 case Intrinsic::aarch64_sve_ucvtf_f64i32:

2248 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:

2249 case Intrinsic::aarch64_sve_fcvtnt_f16f32:

2250 case Intrinsic::aarch64_sve_fcvtnt_f32f64:

2251 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:

2253 case Intrinsic::aarch64_sve_st1_scatter:

2254 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:

2255 case Intrinsic::aarch64_sve_st1_scatter_sxtw:

2256 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:

2257 case Intrinsic::aarch64_sve_st1_scatter_uxtw:

2258 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:

2259 case Intrinsic::aarch64_sve_st1dq:

2260 case Intrinsic::aarch64_sve_st1q_scatter_index:

2261 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:

2262 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:

2263 case Intrinsic::aarch64_sve_st1wq:

2264 case Intrinsic::aarch64_sve_stnt1:

2265 case Intrinsic::aarch64_sve_stnt1_scatter:

2266 case Intrinsic::aarch64_sve_stnt1_scatter_index:

2267 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:

2268 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:

2270 case Intrinsic::aarch64_sve_st2:

2271 case Intrinsic::aarch64_sve_st2q:

2273 case Intrinsic::aarch64_sve_st3:

2274 case Intrinsic::aarch64_sve_st3q:

2276 case Intrinsic::aarch64_sve_st4:

2277 case Intrinsic::aarch64_sve_st4q:

2279 case Intrinsic::aarch64_sve_addqv:

2280 case Intrinsic::aarch64_sve_and_z:

2281 case Intrinsic::aarch64_sve_bic_z:

2282 case Intrinsic::aarch64_sve_brka_z:

2283 case Intrinsic::aarch64_sve_brkb_z:

2284 case Intrinsic::aarch64_sve_brkn_z:

2285 case Intrinsic::aarch64_sve_brkpa_z:

2286 case Intrinsic::aarch64_sve_brkpb_z:

2287 case Intrinsic::aarch64_sve_cntp:

2288 case Intrinsic::aarch64_sve_compact:

2289 case Intrinsic::aarch64_sve_eor_z:

2290 case Intrinsic::aarch64_sve_eorv:

2291 case Intrinsic::aarch64_sve_eorqv:

2292 case Intrinsic::aarch64_sve_nand_z:

2293 case Intrinsic::aarch64_sve_nor_z:

2294 case Intrinsic::aarch64_sve_orn_z:

2295 case Intrinsic::aarch64_sve_orr_z:

2296 case Intrinsic::aarch64_sve_orv:

2297 case Intrinsic::aarch64_sve_orqv:

2298 case Intrinsic::aarch64_sve_pnext:

2299 case Intrinsic::aarch64_sve_rdffr_z:

2300 case Intrinsic::aarch64_sve_saddv:

2301 case Intrinsic::aarch64_sve_uaddv:

2302 case Intrinsic::aarch64_sve_umaxv:

2303 case Intrinsic::aarch64_sve_umaxqv:

2304 case Intrinsic::aarch64_sve_cmpeq:

2305 case Intrinsic::aarch64_sve_cmpeq_wide:

2306 case Intrinsic::aarch64_sve_cmpge:

2307 case Intrinsic::aarch64_sve_cmpge_wide:

2308 case Intrinsic::aarch64_sve_cmpgt:

2309 case Intrinsic::aarch64_sve_cmpgt_wide:

2310 case Intrinsic::aarch64_sve_cmphi:

2311 case Intrinsic::aarch64_sve_cmphi_wide:

2312 case Intrinsic::aarch64_sve_cmphs:

2313 case Intrinsic::aarch64_sve_cmphs_wide:

2314 case Intrinsic::aarch64_sve_cmple_wide:

2315 case Intrinsic::aarch64_sve_cmplo_wide:

2316 case Intrinsic::aarch64_sve_cmpls_wide:

2317 case Intrinsic::aarch64_sve_cmplt_wide:

2318 case Intrinsic::aarch64_sve_facge:

2319 case Intrinsic::aarch64_sve_facgt:

2320 case Intrinsic::aarch64_sve_fcmpeq:

2321 case Intrinsic::aarch64_sve_fcmpge:

2322 case Intrinsic::aarch64_sve_fcmpgt:

2323 case Intrinsic::aarch64_sve_fcmpne:

2324 case Intrinsic::aarch64_sve_fcmpuo:

2325 case Intrinsic::aarch64_sve_ld1_gather:

2326 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:

2327 case Intrinsic::aarch64_sve_ld1_gather_sxtw:

2328 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:

2329 case Intrinsic::aarch64_sve_ld1_gather_uxtw:

2330 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:

2331 case Intrinsic::aarch64_sve_ld1q_gather_index:

2332 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:

2333 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:

2334 case Intrinsic::aarch64_sve_ld1ro:

2335 case Intrinsic::aarch64_sve_ld1rq:

2336 case Intrinsic::aarch64_sve_ld1udq:

2337 case Intrinsic::aarch64_sve_ld1uwq:

2338 case Intrinsic::aarch64_sve_ld2_sret:

2339 case Intrinsic::aarch64_sve_ld2q_sret:

2340 case Intrinsic::aarch64_sve_ld3_sret:

2341 case Intrinsic::aarch64_sve_ld3q_sret:

2342 case Intrinsic::aarch64_sve_ld4_sret:

2343 case Intrinsic::aarch64_sve_ld4q_sret:

2344 case Intrinsic::aarch64_sve_ldff1:

2345 case Intrinsic::aarch64_sve_ldff1_gather:

2346 case Intrinsic::aarch64_sve_ldff1_gather_index:

2347 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:

2348 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:

2349 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:

2350 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:

2351 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:

2352 case Intrinsic::aarch64_sve_ldnf1:

2353 case Intrinsic::aarch64_sve_ldnt1:

2354 case Intrinsic::aarch64_sve_ldnt1_gather:

2355 case Intrinsic::aarch64_sve_ldnt1_gather_index:

2356 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:

2357 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:

2359 case Intrinsic::aarch64_sve_prf:

2360 case Intrinsic::aarch64_sve_prfb_gather_index:

2361 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:

2362 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:

2363 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:

2364 case Intrinsic::aarch64_sve_prfd_gather_index:

2365 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:

2366 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:

2367 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:

2368 case Intrinsic::aarch64_sve_prfh_gather_index:

2369 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:

2370 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:

2371 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:

2372 case Intrinsic::aarch64_sve_prfw_gather_index:

2373 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:

2374 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:

2375 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:

2377 case Intrinsic::aarch64_neon_fmaxnm:

2378 case Intrinsic::aarch64_neon_fminnm:

2380 case Intrinsic::aarch64_sve_convert_from_svbool:

2382 case Intrinsic::aarch64_sve_dup:

2384 case Intrinsic::aarch64_sve_dup_x:

2386 case Intrinsic::aarch64_sve_cmpne:

2387 case Intrinsic::aarch64_sve_cmpne_wide:

2389 case Intrinsic::aarch64_sve_rdffr:

2391 case Intrinsic::aarch64_sve_lasta:

2392 case Intrinsic::aarch64_sve_lastb:

2394 case Intrinsic::aarch64_sve_clasta_n:

2395 case Intrinsic::aarch64_sve_clastb_n:

2397 case Intrinsic::aarch64_sve_cntd:

2399 case Intrinsic::aarch64_sve_cntw:

2401 case Intrinsic::aarch64_sve_cnth:

2403 case Intrinsic::aarch64_sve_cntb:

2405 case Intrinsic::aarch64_sve_ptest_any:

2406 case Intrinsic::aarch64_sve_ptest_first:

2407 case Intrinsic::aarch64_sve_ptest_last:

2409 case Intrinsic::aarch64_sve_fabd:

2411 case Intrinsic::aarch64_sve_fadd:

2413 case Intrinsic::aarch64_sve_fadd_u:

2415 case Intrinsic::aarch64_sve_fdiv:

2417 case Intrinsic::aarch64_sve_fmax:

2419 case Intrinsic::aarch64_sve_fmaxnm:

2421 case Intrinsic::aarch64_sve_fmin:

2423 case Intrinsic::aarch64_sve_fminnm:

2425 case Intrinsic::aarch64_sve_fmla:

2427 case Intrinsic::aarch64_sve_fmls:

2429 case Intrinsic::aarch64_sve_fmul:

2430 if (auto II_U =

2432 return II_U;

2434 case Intrinsic::aarch64_sve_fmul_u:

2436 case Intrinsic::aarch64_sve_fmulx:

2438 case Intrinsic::aarch64_sve_fnmla:

2440 case Intrinsic::aarch64_sve_fnmls:

2442 case Intrinsic::aarch64_sve_fsub:

2444 case Intrinsic::aarch64_sve_fsub_u:

2446 case Intrinsic::aarch64_sve_add:

2448 case Intrinsic::aarch64_sve_add_u:

2450 Intrinsic::aarch64_sve_mla_u>(

2451 IC, II, true);

2452 case Intrinsic::aarch64_sve_mla:

2454 case Intrinsic::aarch64_sve_mls:

2456 case Intrinsic::aarch64_sve_mul:

2457 if (auto II_U =

2459 return II_U;

2461 case Intrinsic::aarch64_sve_mul_u:

2463 case Intrinsic::aarch64_sve_sabd:

2465 case Intrinsic::aarch64_sve_smax:

2467 case Intrinsic::aarch64_sve_smin:

2469 case Intrinsic::aarch64_sve_smulh:

2471 case Intrinsic::aarch64_sve_sub:

2473 case Intrinsic::aarch64_sve_sub_u:

2475 Intrinsic::aarch64_sve_mls_u>(

2476 IC, II, true);

2477 case Intrinsic::aarch64_sve_uabd:

2479 case Intrinsic::aarch64_sve_umax:

2481 case Intrinsic::aarch64_sve_umin:

2483 case Intrinsic::aarch64_sve_umulh:

2485 case Intrinsic::aarch64_sve_asr:

2487 case Intrinsic::aarch64_sve_lsl:

2489 case Intrinsic::aarch64_sve_lsr:

2491 case Intrinsic::aarch64_sve_and:

2493 case Intrinsic::aarch64_sve_bic:

2495 case Intrinsic::aarch64_sve_eor:

2497 case Intrinsic::aarch64_sve_orr:

2499 case Intrinsic::aarch64_sve_sqsub:

2501 case Intrinsic::aarch64_sve_uqsub:

2503 case Intrinsic::aarch64_sve_tbl:

2505 case Intrinsic::aarch64_sve_uunpkhi:

2506 case Intrinsic::aarch64_sve_uunpklo:

2507 case Intrinsic::aarch64_sve_sunpkhi:

2508 case Intrinsic::aarch64_sve_sunpklo:

2510 case Intrinsic::aarch64_sve_uzp1:

2512 case Intrinsic::aarch64_sve_zip1:

2513 case Intrinsic::aarch64_sve_zip2:

2515 case Intrinsic::aarch64_sve_ld1_gather_index:

2517 case Intrinsic::aarch64_sve_st1_scatter_index:

2519 case Intrinsic::aarch64_sve_ld1:

2521 case Intrinsic::aarch64_sve_st1:

2523 case Intrinsic::aarch64_sve_sdiv:

2525 case Intrinsic::aarch64_sve_sel:

2527 case Intrinsic::aarch64_sve_srshl:

2529 case Intrinsic::aarch64_sve_dupq_lane:

2531 case Intrinsic::aarch64_sve_insr:

2533 }

2534

2535 return std::nullopt;

2536}

2537

2542 SimplifyAndSetOp) const {

2543 switch (II.getIntrinsicID()) {

2544 default:

2545 break;

2546 case Intrinsic::aarch64_neon_fcvtxn:

2547 case Intrinsic::aarch64_neon_rshrn:

2548 case Intrinsic::aarch64_neon_sqrshrn:

2549 case Intrinsic::aarch64_neon_sqrshrun:

2550 case Intrinsic::aarch64_neon_sqshrn:

2551 case Intrinsic::aarch64_neon_sqshrun:

2552 case Intrinsic::aarch64_neon_sqxtn:

2553 case Intrinsic::aarch64_neon_sqxtun:

2554 case Intrinsic::aarch64_neon_uqrshrn:

2555 case Intrinsic::aarch64_neon_uqshrn:

2556 case Intrinsic::aarch64_neon_uqxtn:

2557 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);

2558 break;

2559 }

2560

2561 return std::nullopt;

2562}

2563

2567}

2568

2571 switch (K) {

2581 else

2587 else

2589 }

2591}

2592

2593bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,

2595 Type *SrcOverrideTy) {

2596

2597

2600 cast(DstTy)->getElementCount());

2601 };

2602

2603

2604

2605

2606

2607

2609 if (useNeonVector(DstTy) || Args.size() != 2 ||

2610 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))

2611 return false;

2612

2613

2614

2615

2616

2617

2618

2619

2620 Type *SrcTy = SrcOverrideTy;

2621 switch (Opcode) {

2622 case Instruction::Add:

2623 case Instruction::Sub:

2624

2625 if (isa(Args[1]) || isa(Args[1])) {

2626 if (!SrcTy)

2627 SrcTy =

2628 toVectorTy(cast(Args[1])->getOperand(0)->getType());

2629 } else

2630 return false;

2631 break;

2632 case Instruction::Mul: {

2633

2634 if ((isa(Args[0]) && isa(Args[1])) ||

2635 (isa(Args[0]) && isa(Args[1]))) {

2636 if (!SrcTy)

2637 SrcTy =

2638 toVectorTy(cast(Args[0])->getOperand(0)->getType());

2639 } else if (isa(Args[0]) || isa(Args[1])) {

2640

2641

2642

2648 return false;

2649 if (!SrcTy)

2652 } else

2653 return false;

2654 break;

2655 }

2656 default:

2657 return false;

2658 }

2659

2660

2661

2663 if (!DstTyL.second.isVector() || DstEltSize != DstTy->getScalarSizeInBits())

2664 return false;

2665

2666

2667

2668 assert(SrcTy && "Expected some SrcTy");

2670 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();

2671 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())

2672 return false;

2673

2674

2676 DstTyL.first * DstTyL.second.getVectorMinNumElements();

2678 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();

2679

2680

2681

2682 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;

2683}

2684

2685

2686

2687

2688

2689

2690

2692 Type *Src) {

2693

2695 (Src->isScalableTy() && !ST->hasSVE2()))

2696 return false;

2697

2698 if (ExtUser->getOpcode() != Instruction::Add || !ExtUser->hasOneUse())

2699 return false;

2700

2701

2703 auto *AddUser =

2704 dyn_cast_or_null(Add->getUniqueUndroppableUser());

2705 if (AddUser && AddUser->getOpcode() == Instruction::Add)

2706 Add = AddUser;

2707

2708 auto *Shr = dyn_cast_or_null(Add->getUniqueUndroppableUser());

2709 if (!Shr || Shr->getOpcode() != Instruction::LShr)

2710 return false;

2711

2712 auto *Trunc = dyn_cast_or_null(Shr->getUniqueUndroppableUser());

2713 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||

2714 Src->getScalarSizeInBits() !=

2715 cast(Trunc)->getDestTy()->getScalarSizeInBits())

2716 return false;

2717

2718

2719

2723 return false;

2724

2725

2728 return true;

2729

2730 return false;

2731}

2732

2739 assert(ISD && "Invalid opcode");

2740

2741

2742 if (I && I->hasOneUser()) {

2743 auto *SingleUser = cast(*I->user_begin());

2745 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands, Src)) {

2746

2747

2748

2749 if (SingleUser->getOpcode() == Instruction::Add) {

2750 if (I == SingleUser->getOperand(1) ||

2751 (isa(SingleUser->getOperand(1)) &&

2752 cast(SingleUser->getOperand(1))->getOpcode() == Opcode))

2753 return 0;

2754 } else

2755 return 0;

2756 }

2757

2758

2759 if ((isa(I) || isa(I)) &&

2761 return 0;

2762 }

2763

2764

2767 return Cost == 0 ? 0 : 1;

2768 return Cost;

2769 };

2770

2773

2775 return AdjustCost(

2777

2779 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1},

2780 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1},

2781 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1},

2782 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2},

2783 {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2},

2784 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3},

2785 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6},

2786 };

2787

2788 if (ST->hasBF16())

2791 return AdjustCost(Entry->Cost);

2792

2794 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1},

2795 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1},

2796 {ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1},

2797 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1},

2798 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 3},

2799 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1},

2800 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2},

2801 {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1},

2802 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1},

2803 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2},

2804 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 4},

2805 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1},

2806 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 3},

2807 {ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 2},

2808 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 1},

2809 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 3},

2810 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 7},

2811 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2},

2812 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i64, 6},

2813 {ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4},

2814

2815

2846 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},

2847 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},

2848 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},

2849

2850

2867

2868

2870 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},

2871 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},

2872

2875 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},

2876 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},

2877 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},

2878 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},

2879 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},

2880

2882 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},

2883 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},

2884 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},

2885 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},

2886 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},

2887 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},

2888

2890 {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1},

2891 {ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 2},

2892

2895 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, 1},

2896 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, 2},

2897 {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2},

2898 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3},

2899 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6},

2900

2901 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8},

2902 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9},

2909

2910

2917

2918

2925

2926

2931

2932

2937

2938

2941

2942

2949

2950

2953

2954

2961

2962

2969

2970

2975

2976

2985

2986

2993

2994

3003

3004

3013

3014

3019

3020

3027

3028

3033

3034

3043

3044

3053

3054

3063

3064

3068

3069

3073

3074

3078

3079

3083

3084

3088

3089

3093

3094

3095 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},

3096 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},

3097 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},

3098

3099

3100 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},

3101 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},

3102 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},

3103

3104

3105

3106

3113

3120 };

3121

3122

3123

3124

3125 EVT WiderTy = SrcTy.bitsGT(DstTy) ? SrcTy : DstTy;

3129 std::pair<InstructionCost, MVT> LT =

3131 unsigned NumElements =

3133 return AdjustCost(

3134 LT.first *

3139 }

3140

3143 return AdjustCost(Entry->Cost);

3144

3150 {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2},

3152 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2},

3156 {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4},

3158 {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3},

3160 {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2},

3162 {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8},

3164 {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},

3165 {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},

3166 {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},

3167 {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},

3168 };

3169

3170 if (ST->hasFullFP16())

3173 return AdjustCost(Entry->Cost);

3174

3182

3183

3184

3185

3189 Opcode, LegalTy, Src, CCH, CostKind, I);

3192 return Part1 + Part2;

3193 }

3194

3195

3196

3201

3202 return AdjustCost(

3204}

3205

3209 unsigned Index) {

3210

3211

3212 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&

3213 "Invalid opcode");

3214

3215

3216

3218

3219

3220 assert(isa(Dst) && isa(Src) && "Invalid type");

3221

3222

3223

3226 CostKind, Index, nullptr, nullptr);

3227

3228

3232

3233

3234

3235

3236 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))

3239

3240

3241

3242 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())

3245

3246 switch (Opcode) {

3247 default:

3249

3250

3251

3252 case Instruction::SExt:

3253 return Cost;

3254

3255

3256

3257 case Instruction::ZExt:

3258 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)

3259 return Cost;

3260 }

3261

3262

3265}

3266

3271 return Opcode == Instruction::PHI ? 0 : 1;

3273

3274 return 0;

3275}

3276

3277InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(

3278 unsigned Opcode, Type *Val, unsigned Index, bool HasRealUse,

3280 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {

3282

3283 if (Index != -1U) {

3284

3286

3287

3288 if (!LT.second.isVector())

3289 return 0;

3290

3291

3292

3293 if (LT.second.isFixedLengthVector()) {

3294 unsigned Width = LT.second.getVectorNumElements();

3295 Index = Index % Width;

3296 }

3297

3298

3299

3300

3301

3302

3303

3305 return 0;

3306

3307

3308

3309

3310

3311 if (I && dyn_cast(I->getOperand(1)))

3313

3314

3315

3318

3319

3320

3321

3322

3323

3324 }

3325

3326

3327

3328

3329

3330

3331

3332

3333

3334

3335

3336

3337

3338

3339

3340

3341 auto ExtractCanFuseWithFmul = [&]() {

3342

3343 if (Index == 0)

3344 return false;

3345

3346

3347

3348 auto IsAllowedScalarTy = [&](const Type *T) {

3349 return T->isFloatTy() || T->isDoubleTy() ||

3350 (T->isHalfTy() && ST->hasFullFP16());

3351 };

3352

3353

3354 auto IsUserFMulScalarTy = [](const Value *EEUser) {

3355

3356 const auto *BO = dyn_cast(EEUser);

3357 return BO && BO->getOpcode() == BinaryOperator::FMul &&

3358 !BO->getType()->isVectorTy();

3359 };

3360

3361

3362

3363 auto IsExtractLaneEquivalentToZero = [&](unsigned Idx, unsigned EltSz) {

3364 auto RegWidth =

3367 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);

3368 };

3369

3370

3371

3372 if (!isa(Val) || !IsAllowedScalarTy(Val->getScalarType()))

3373 return false;

3374

3375 if (Scalar) {

3377 for (auto *U : Scalar->users()) {

3378 if (!IsUserFMulScalarTy(U))

3379 return false;

3380

3381

3382 UserToExtractIdx[U];

3383 }

3384 if (UserToExtractIdx.empty())

3385 return false;

3386 for (auto &[S, U, L] : ScalarUserAndIdx) {

3387 for (auto *U : S->users()) {

3388 if (UserToExtractIdx.find(U) != UserToExtractIdx.end()) {

3389 auto *FMul = cast(U);

3390 auto *Op0 = FMul->getOperand(0);

3391 auto *Op1 = FMul->getOperand(1);

3392 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {

3393 UserToExtractIdx[U] = L;

3394 break;

3395 }

3396 }

3397 }

3398 }

3399 for (auto &[U, L] : UserToExtractIdx) {

3400 if (!IsExtractLaneEquivalentToZero(Index, Val->getScalarSizeInBits()) &&

3402 return false;

3403 }

3404 } else {

3405 const auto *EE = cast(I);

3406

3407 const auto *IdxOp = dyn_cast(EE->getIndexOperand());

3408 if (!IdxOp)

3409 return false;

3410

3411 return !EE->users().empty() && all_of(EE->users(), [&](const User *U) {

3412 if (!IsUserFMulScalarTy(U))

3413 return false;

3414

3415

3416

3417 const auto *BO = cast(U);

3418 const auto *OtherEE = dyn_cast(

3419 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));

3420 if (OtherEE) {

3421 const auto *IdxOp = dyn_cast(OtherEE->getIndexOperand());

3422 if (!IdxOp)

3423 return false;

3424 return IsExtractLaneEquivalentToZero(

3425 cast(OtherEE->getIndexOperand())

3426 ->getValue()

3427 .getZExtValue(),

3428 OtherEE->getType()->getScalarSizeInBits());

3429 }

3430 return true;

3431 });

3432 }

3433 return true;

3434 };

3435

3436 if (Opcode == Instruction::ExtractElement && (I || Scalar) &&

3437 ExtractCanFuseWithFmul())

3438 return 0;

3439

3440

3441 return ST->getVectorInsertExtractBaseCost();

3442}

3443

3446 unsigned Index, Value *Op0,

3448 bool HasRealUse =

3449 Opcode == Instruction::InsertElement && Op0 && !isa(Op0);

3450 return getVectorInstrCostHelper(Opcode, Val, Index, HasRealUse);

3451}

3452

3456 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {

3457 return getVectorInstrCostHelper(Opcode, Val, Index, false, nullptr, Scalar,

3458 ScalarUserAndIdx);

3459}

3460

3464 unsigned Index) {

3465 return getVectorInstrCostHelper(I.getOpcode(), Val, Index,

3466 true , &I);

3467}

3468

3470 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,

3472 if (isa(Ty))

3477 return DemandedElts.popcount() * (Insert + Extract) *

3479}

3480

3486

3487

3488

3489

3490

3491 if (auto *VTy = dyn_cast(Ty))

3494

3495

3498 Op2Info, Args, CxtI);

3499

3500

3503

3504 switch (ISD) {

3505 default:

3507 Op2Info);

3510

3511

3512

3513

3515 Instruction::Add, Ty, CostKind,

3520 Instruction::Select, Ty, CostKind,

3524 return Cost;

3525 }

3526 [[fallthrough]];

3531

3532

3533

3540 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;

3541 }

3542 }

3543

3544

3545

3546

3547 if (!VT.isVector() && VT.getSizeInBits() > 64)

3549

3551 Opcode, Ty, CostKind, Op1Info, Op2Info);

3554

3555

3556 if (isa(Ty) && cast(Ty)

3557 ->getPrimitiveSizeInBits()

3558 .getFixedValue() < 128) {

3567

3569 if (nullptr != Entry)

3570 return Entry->Cost;

3571 }

3572

3573

3574 if (LT.second.getScalarType() == MVT::i8)

3576 else if (LT.second.getScalarType() == MVT::i16)

3578 return Cost;

3579 } else {

3580

3581

3582

3583

3586 if (auto *VTy = dyn_cast(Ty)) {

3589 return (4 + DivCost) * VTy->getNumElements();

3590 }

3591 }

3592

3593

3595 CostKind, Op1Info, Op2Info);

3597 Op1Info, Op2Info);

3598 }

3599

3600

3601

3603 }

3604 return Cost;

3605 }

3607

3608

3609 if (LT.second == MVT::v2i64 && ST->hasSVE())

3610 return LT.first;

3611

3612

3613

3614

3615

3616

3617

3618

3619

3620

3621

3622

3623

3624 if (LT.second != MVT::v2i64 || isWideningInstruction(Ty, Opcode, Args))

3625 return LT.first;

3626 return cast(Ty)->getElementCount().getKnownMinValue() *

3629 nullptr, nullptr) *

3630 2 +

3632 nullptr, nullptr));

3640

3641

3642 return LT.first;

3643

3645

3647 (Ty->isHalfTy() && ST->hasFullFP16())) &&

3648 CxtI &&

3652 return 0;

3653 [[fallthrough]];

3656

3657

3660 return 2 * LT.first;

3662 return LT.first;

3663 [[fallthrough]];

3666

3667

3669 return 2 * LT.first;

3670

3672 Op2Info);

3674

3675

3679 Op2Info);

3680 }

3681}

3682

3686

3687

3688

3689

3691 int MaxMergeDistance = 64;

3692

3695 return NumVectorInstToHideOverhead;

3696

3697

3698

3699 return 1;

3700}

3701

3706

3709 Op1Info, Op2Info, I);

3710

3712

3713

3714 if (isa(ValTy) && ISD == ISD::SELECT) {

3715

3716 const int AmortizationCost = 20;

3717

3718

3719

3724 VecPred = CurrentPred;

3725 }

3726

3727

3732 static const auto ValidMinMaxTys = {

3733 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,

3734 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};

3735 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};

3736

3738 if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||

3739 (ST->hasFullFP16() &&

3740 any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))

3741 return LT.first;

3742 }

3743

3745 VectorSelectTbl[] = {

3746 { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },

3747 { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },

3748 { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },

3749 { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },

3750 { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },

3751 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },

3752 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },

3753 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },

3754 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },

3755 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },

3756 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }

3757 };

3758

3765 return Entry->Cost;

3766 }

3767 }

3768

3769 if (isa(ValTy) && ISD == ISD::SETCC) {

3771

3772 if (LT.second == MVT::v4f16 && !ST->hasFullFP16())

3773 return LT.first * 4;

3774 }

3775

3776

3777

3778

3784 return 0;

3785

3786

3787

3789 Op1Info, Op2Info, I);

3790}

3791

3795 if (ST->requiresStrictAlign()) {

3796

3797

3799 }

3800 Options.AllowOverlappingLoads = true;

3803

3804

3805

3806 Options.LoadSizes = {8, 4, 2, 1};

3807 Options.AllowedTailExpansions = {3, 5, 6};

3809}

3810

3812 return ST->hasSVE();

3813}

3814

3823 if (!LT.first.isValid())

3825

3826

3827 auto *VT = cast(Src);

3828 if (VT->getElementType()->isIntegerTy(1))

3830

3831

3832

3833

3834

3837

3838 return LT.first;

3839}

3840

3841

3842

3845 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

3846 "Should be called on only load or stores.");

3847 switch (Opcode) {

3848 case Instruction::Load:

3851 return ST->getGatherOverhead();

3852 break;

3853 case Instruction::Store:

3856 return ST->getScatterOverhead();

3857 break;

3858 default:

3860 }

3861}

3862

3864 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

3869 auto *VT = cast(DataTy);

3871 if (!LT.first.isValid())

3873

3874

3875 if (!LT.second.isVector() ||

3877 VT->getElementType()->isIntegerTy(1))

3879

3880

3881

3882

3883

3886

3887 ElementCount LegalVF = LT.second.getVectorElementCount();

3890 {TTI::OK_AnyValue, TTI::OP_None}, I);

3891

3894}

3895

3898}

3899

3907

3908 if (VT == MVT::Other)

3911

3913 if (!LT.first.isValid())

3915

3916

3917

3918

3919

3920

3921 if (auto *VTy = dyn_cast(Ty))

3923 (VTy->getElementType()->isIntegerTy(1) &&

3924 !VTy->getElementCount().isKnownMultipleOf(

3927

3928

3930 return LT.first;

3931

3933 return 1;

3934

3935 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&

3936 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {

3937

3938

3939

3940

3941

3942 const int AmortizationCost = 6;

3943

3944 return LT.first * 2 * AmortizationCost;

3945 }

3946

3947

3949 return LT.first;

3950

3952

3954

3955 if (VT == MVT::v4i8)

3956 return 2;

3957

3958 return cast(Ty)->getNumElements() * 2;

3959 }

3962 if (isPowerOf2\_32(EltSize) || EltSize < 8 || EltSize > 64 ||

3964 *Alignment != Align(1))

3965 return LT.first;

3966

3967

3969 return LT.first;

3970

3971

3972

3973

3978 while (!TypeWorklist.empty()) {

3983 continue;

3984 }

3985

3986 unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;

3990 }

3991 return Cost;

3992 }

3993

3994 return LT.first;

3995}

3996

4000 bool UseMaskForCond, bool UseMaskForGaps) {

4001 assert(Factor >= 2 && "Invalid interleave factor");

4002 auto *VecVTy = cast(VecTy);

4003

4006

4007

4008

4009 if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))

4011

4012 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {

4013 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();

4014 auto *SubVecTy =

4016 VecVTy->getElementCount().divideCoefficientBy(Factor));

4017

4018

4019

4020

4021 bool UseScalable;

4022 if (MinElts % Factor == 0 &&

4025 }

4026

4029 UseMaskForCond, UseMaskForGaps);

4030}

4031

4036 for (auto *I : Tys) {

4037 if (I->isVectorTy())

4038 continue;

4039 if (I->getScalarSizeInBits() * cast(I)->getNumElements() ==

4040 128)

4043 }

4044 return Cost;

4045}

4046

4049}

4050

4051

4052

4053

4054

4055static void

4058 enum { MaxStridedLoads = 7 };

4060 int StridedLoads = 0;

4061

4062

4063 for (const auto BB : L->blocks()) {

4064 for (auto &I : *BB) {

4065 LoadInst *LMemI = dyn_cast(&I);

4066 if (!LMemI)

4067 continue;

4068

4070 if (L->isLoopInvariant(PtrValue))

4071 continue;

4072

4073 const SCEV *LSCEV = SE.getSCEV(PtrValue);

4074 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast(LSCEV);

4075 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())

4076 continue;

4077

4078

4079

4080

4081 ++StridedLoads;

4082

4083

4084 if (StridedLoads > MaxStridedLoads / 2)

4085 return StridedLoads;

4086 }

4087 }

4088 return StridedLoads;

4089 };

4090

4091 int StridedLoads = countStridedLoads(L, SE);

4092 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads

4093 << " strided loads\n");

4094

4095

4096 if (StridedLoads) {

4097 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);

4098 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "

4100 }

4101}

4102

4103

4104

4105static void

4109

4110

4111

4112

4113

4114

4115 if (!L->isInnermost() || !L->getExitBlock() || L->getNumBlocks() > 8)

4116 return;

4117

4119 if (isa(BTC) || isa(BTC) ||

4122 return;

4124 return;

4125

4126 int64_t Size = 0;

4127 for (auto *BB : L->getBlocks()) {

4128 for (auto &I : *BB) {

4129 if (!isa(&I) && isa(&I))

4130 return;

4134 }

4135 }

4136

4137

4139

4140

4141

4142 BasicBlock *Header = L->getHeader();

4143 if (Header == L->getLoopLatch()) {

4144 if (Size > 8)

4145 return;

4146

4149 for (auto *BB : L->blocks()) {

4150 for (auto &I : *BB) {

4152 if (Ptr)

4153 continue;

4156 continue;

4157 if (isa(&I))

4158 LoadedValues.insert(&I);

4159 else

4160 Stores.push_back(cast(&I));

4161 }

4162 }

4163

4164

4165

4166 unsigned MaxInstsPerLine = 16;

4167 unsigned UC = 1;

4168 unsigned BestUC = 1;

4169 unsigned SizeWithBestUC = BestUC * Size;

4170 while (UC <= 8) {

4171 unsigned SizeWithUC = UC * Size;

4172 if (SizeWithUC > 48)

4173 break;

4174 if ((SizeWithUC % MaxInstsPerLine) == 0 ||

4175 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {

4176 BestUC = UC;

4177 SizeWithBestUC = BestUC * Size;

4178 }

4179 UC++;

4180 }

4181

4182 if (BestUC == 1 || none_of(Stores, [&LoadedValues](StoreInst *SI) {

4183 return LoadedValues.contains(SI->getOperand(0));

4184 }))

4185 return;

4186

4189 return;

4190 }

4191

4192

4193

4194 auto *Term = dyn_cast(Header->getTerminator());

4195 auto *Latch = L->getLoopLatch();

4197 if (!Term || !Term->isConditional() || Preds.size() == 1 ||

4198 none_of(Preds, [Header](BasicBlock *Pred) { return Header == Pred; }) ||

4199 none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))

4200 return;

4201

4202 std::function<bool(Instruction *, unsigned)> DependsOnLoopLoad =

4204 if (isa(I) || L->isLoopInvariant(I) || Depth > 8)

4205 return false;

4206

4207 if (isa(I))

4208 return true;

4209

4210 return any_of(I->operands(), [&](Value *V) {

4211 auto *I = dyn_cast(V);

4212 return I && DependsOnLoopLoad(I, Depth + 1);

4213 });

4214 };

4219 DependsOnLoopLoad(I, 0)) {

4221 }

4222}

4223

4227

4229

4231

4232

4233

4234

4235 if (L->getLoopDepth() > 1)

4237

4238

4240

4241

4243 case AArch64Subtarget::AppleA14:

4244 case AArch64Subtarget::AppleA15:

4245 case AArch64Subtarget::AppleA16:

4246 case AArch64Subtarget::AppleM4:

4248 break;

4249 case AArch64Subtarget::Falkor:

4252 break;

4253 default:

4254 break;

4255 }

4256

4257

4258

4259

4260 for (auto *BB : L->getBlocks()) {

4261 for (auto &I : *BB) {

4262

4263 if (I.getType()->isVectorTy())

4264 return;

4265

4266 if (isa(I) || isa(I)) {

4269 continue;

4270 }

4271 return;

4272 }

4273 }

4274 }

4275

4276

4277

4278

4279

4281 !ST->getSchedModel().isOutOfOrder()) {

4286

4289 }

4290}

4291

4295}

4296

4298 Type *ExpectedType) {

4300 default:

4301 return nullptr;

4302 case Intrinsic::aarch64_neon_st2:

4303 case Intrinsic::aarch64_neon_st3:

4304 case Intrinsic::aarch64_neon_st4: {

4305

4306 StructType *ST = dyn_cast(ExpectedType);

4307 if (!ST)

4308 return nullptr;

4309 unsigned NumElts = Inst->arg_size() - 1;

4310 if (ST->getNumElements() != NumElts)

4311 return nullptr;

4312 for (unsigned i = 0, e = NumElts; i != e; ++i) {

4314 return nullptr;

4315 }

4318 for (unsigned i = 0, e = NumElts; i != e; ++i) {

4321 }

4322 return Res;

4323 }

4324 case Intrinsic::aarch64_neon_ld2:

4325 case Intrinsic::aarch64_neon_ld3:

4326 case Intrinsic::aarch64_neon_ld4:

4327 if (Inst->getType() == ExpectedType)

4328 return Inst;

4329 return nullptr;

4330 }

4331}

4332

4336 default:

4337 break;

4338 case Intrinsic::aarch64_neon_ld2:

4339 case Intrinsic::aarch64_neon_ld3:

4340 case Intrinsic::aarch64_neon_ld4:

4341 Info.ReadMem = true;

4342 Info.WriteMem = false;

4344 break;

4345 case Intrinsic::aarch64_neon_st2:

4346 case Intrinsic::aarch64_neon_st3:

4347 case Intrinsic::aarch64_neon_st4:

4348 Info.ReadMem = false;

4349 Info.WriteMem = true;

4351 break;

4352 }

4353

4355 default:

4356 return false;

4357 case Intrinsic::aarch64_neon_ld2:

4358 case Intrinsic::aarch64_neon_st2:

4359 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;

4360 break;

4361 case Intrinsic::aarch64_neon_ld3:

4362 case Intrinsic::aarch64_neon_st3:

4363 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;

4364 break;

4365 case Intrinsic::aarch64_neon_ld4:

4366 case Intrinsic::aarch64_neon_st4:

4367 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;

4368 break;

4369 }

4370 return true;

4371}

4372

4373

4374

4375

4376

4377

4379 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {

4380 bool Considerable = false;

4381 AllowPromotionWithoutCommonHeader = false;

4382 if (!isa(&I))

4383 return false;

4384 Type *ConsideredSExtType =

4386 if (I.getType() != ConsideredSExtType)

4387 return false;

4388

4389

4390 for (const User *U : I.users()) {

4391 if (const GetElementPtrInst *GEPInst = dyn_cast(U)) {

4392 Considerable = true;

4393

4394

4395

4396 if (GEPInst->getNumOperands() > 2) {

4397 AllowPromotionWithoutCommonHeader = true;

4398 break;

4399 }

4400 }

4401 }

4402 return Considerable;

4403}

4404

4408 return true;

4409

4412 return false;

4413

4429 return true;

4430 default:

4431 return false;

4432 }

4433}

4434

4439

4440

4441

4442

4443 if (auto *VTy = dyn_cast(Ty))

4446

4448

4449 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())

4451

4453 if (LT.first > 1) {

4457 }

4458

4459 return LegalizationCost + 2;

4460}

4461

4466 if (LT.first > 1) {

4469 LegalizationCost *= LT.first - 1;

4470 }

4471

4473 assert(ISD && "Invalid opcode");

4474

4475 switch (ISD) {

4481 return LegalizationCost + 2;

4482 default:

4484 }

4485}

4486

4489 std::optional FMF,

4491

4492

4493

4494

4495 if (auto *VTy = dyn_cast(ValTy))

4498

4500 if (auto *FixedVTy = dyn_cast(ValTy)) {

4503

4504

4505 return BaseCost + FixedVTy->getNumElements();

4506 }

4507

4508 if (Opcode != Instruction::FAdd)

4510

4511 auto *VTy = cast(ValTy);

4515 return Cost;

4516 }

4517

4518 if (isa(ValTy))

4520

4522 MVT MTy = LT.second;

4524 assert(ISD && "Invalid opcode");

4525

4526

4527

4528

4529

4530

4531

4532

4533

4534 static const CostTblEntry CostTblNoPairwise[]{

4541 {ISD::OR, MVT::v8i8, 15},

4542 {ISD::OR, MVT::v16i8, 17},

4543 {ISD::OR, MVT::v4i16, 7},

4544 {ISD::OR, MVT::v8i16, 9},

4545 {ISD::OR, MVT::v2i32, 3},

4546 {ISD::OR, MVT::v4i32, 5},

4547 {ISD::OR, MVT::v2i64, 3},

4562 };

4563 switch (ISD) {

4564 default:

4565 break;

4568

4569

4570 MTy.isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||

4571 (EltTy->isHalfTy() && ST->hasFullFP16()))) {

4575

4576

4577

4578

4579

4580

4581

4582

4583 return (LT.first - 1) + Log2_32(NElts);

4584 }

4585 break;

4587 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))

4588 return (LT.first - 1) + Entry->Cost;

4589 break;

4593 const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy);

4594 if (!Entry)

4595 break;

4596 auto *ValVTy = cast(ValTy);

4600 if (LT.first != 1) {

4601

4602

4606 ExtraCost *= LT.first - 1;

4607 }

4608

4609 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;

4610 return Cost + ExtraCost;

4611 }

4612 break;

4613 }

4615}

4616

4632 };

4633

4634

4635

4636

4637

4640

4644 EVT PromotedVT = LT.second.getScalarType() == MVT::i1

4646 : LT.second;

4649 if (Index < 0) {

4650 LegalizationCost =

4655 }

4656

4657

4658

4659 if (LT.second.getScalarType() == MVT::i1) {

4660 LegalizationCost +=

4665 }

4666 const auto *Entry =

4668 assert(Entry && "Illegal Type for Splice");

4669 LegalizationCost += Entry->Cost;

4670 return LegalizationCost * LT.first;

4671}

4672

4674 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

4677 std::optional BinOp) const {

4680

4681 if (Opcode != Instruction::Add)

4683

4684 if (InputTypeA != InputTypeB)

4686

4689

4694

4695 if (InputEVT == MVT::i8) {

4697 default:

4699 case 8:

4700 if (AccumEVT == MVT::i32)

4702 else if (AccumEVT != MVT::i64)

4704 break;

4705 case 16:

4706 if (AccumEVT == MVT::i64)

4708 else if (AccumEVT != MVT::i32)

4710 break;

4711 }

4712 } else if (InputEVT == MVT::i16) {

4713

4714

4717 } else

4719

4720

4721

4723 (OpAExtend != OpBExtend && !ST->hasMatMulInt8() &&

4726

4727 if (!BinOp || *BinOp != Instruction::Mul)

4729

4730 return Cost;

4731}

4732

4738

4739

4740

4741 if (!Mask.empty() && isa(Tp) && LT.second.isVector() &&

4743 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {

4744

4745

4746

4747

4748

4749 if (Args.size() >= 1 && isa(Args[0]) &&

4752 return std::max(1, LT.first / 4);

4753

4754

4755

4756

4757

4763 return LT.first;

4764

4765 unsigned TpNumElts = Mask.size();

4766 unsigned LTNumElts = LT.second.getVectorNumElements();

4767 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;

4771 for (unsigned N = 0; N < NumVecs; N++) {

4773

4774

4775 unsigned Source1, Source2;

4776 unsigned NumSources = 0;

4777 for (unsigned E = 0; E < LTNumElts; E++) {

4778 int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]

4780 if (MaskElt < 0) {

4782 continue;

4783 }

4784

4785

4786

4787 unsigned Source = MaskElt / LTNumElts;

4788 if (NumSources == 0) {

4789 Source1 = Source;

4790 NumSources = 1;

4791 } else if (NumSources == 1 && Source != Source1) {

4792 Source2 = Source;

4793 NumSources = 2;

4794 } else if (NumSources >= 2 && Source != Source1 && Source != Source2) {

4795 NumSources++;

4796 }

4797

4798

4799

4800 if (Source == Source1)

4801 NMask.push_back(MaskElt % LTNumElts);

4802 else if (Source == Source2)

4803 NMask.push_back(MaskElt % LTNumElts + LTNumElts);

4804 else

4805 NMask.push_back(MaskElt % LTNumElts);

4806 }

4807

4808

4809

4810 if (NumSources <= 2)

4813 NTp, NMask, CostKind, 0, nullptr, Args, CxtI);

4814 else

4815 Cost += LTNumElts;

4816 }

4817 return Cost;

4818 }

4819

4822

4823

4824

4825 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {

4826 if (LT.second.is128BitVector() &&

4827 cast(SubTp)->getNumElements() ==

4828 LT.second.getVectorNumElements() / 2) {

4829 if (Index == 0)

4830 return 0;

4831 if (Index == (int)LT.second.getVectorNumElements() / 2)

4832 return 1;

4833 }

4835 }

4836

4837

4838

4839

4840

4841

4842

4843

4845 bool IsLoad = !Args.empty() && isa(Args[0]);

4846 if (IsLoad && LT.second.isVector() &&

4848 LT.second.getVectorElementCount()))

4849 return 0;

4850 }

4851

4852

4853

4856 all_of(Mask, [](int E) { return E < 8; }))

4858

4859

4860 if (!Mask.empty() && LT.second.isFixedLengthVector() &&

4863 return M.value() < 0 || M.value() == (int)M.index();

4864 }))

4865 return 0;

4866

4867

4868

4869 unsigned Unused;

4870 if (LT.second.isFixedLengthVector() &&

4871 LT.second.getVectorNumElements() == Mask.size() &&

4873 (isZIPMask(Mask, LT.second.getVectorNumElements(), Unused) ||

4874 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||

4875

4877 [&Mask](int M) { return M < 0 || M == Mask[0]; })))

4878 return 1;

4879

4884

4897

4898

4911

4912

4914 {TTI::SK_Select, MVT::v4i32, 2},

4917 {TTI::SK_Select, MVT::v4f32, 2},

4919

4934

4947

4962

4980

4998 };

4999 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))

5000 return LT.first * Entry->Cost;

5001 }

5002

5003 if (Kind == TTI::SK_Splice && isa(Tp))

5005

5006

5007

5009 LT.second.getSizeInBits() <= 128 && SubTp) {

5011 if (SubLT.second.isVector()) {

5012 int NumElts = LT.second.getVectorNumElements();

5013 int NumSubElts = SubLT.second.getVectorNumElements();

5014 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)

5015 return SubLT.first;

5016 }

5017 }

5018

5019

5020 if (IsExtractSubvector)

5023 CxtI);

5024}

5025

5030

5031

5033 if (isa(&I) || isa(&I)) {

5036 if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, Strides, true,

5037 false)

5038 .value_or(0) < 0)

5039 return true;

5040 }

5041 }

5042 }

5043 return false;

5044}

5045

5049 return ST->useFixedOverScalableIfEqualCost();

5050}

5051

5054}

5055

5057 if (!ST->hasSVE())

5058 return false;

5059

5060

5061

5062

5064 return false;

5065

5071

5072

5073

5074

5080

5082 Required))

5083 return false;

5084

5085

5086

5087 unsigned NumInsns = 0;

5089 NumInsns += BB->sizeWithoutDebug();

5090 }

5091

5092

5094}

5095

5098 StackOffset BaseOffset, bool HasBaseReg,

5099 int64_t Scale, unsigned AddrSpace) const {

5100

5101

5102

5103

5104

5105

5106

5111 AM.Scale = Scale;

5114

5115

5116 return AM.Scale != 0 && AM.Scale != 1;

5117 return -1;

5118}

5119

5122

5123

5124

5125

5126 if (I->getOpcode() == Instruction::Or &&

5127 isa(I->getNextNode()) &&

5128 cast(I->getNextNode())->isUnconditional())

5129 return true;

5130

5131 if (I->getOpcode() == Instruction::Add ||

5132 I->getOpcode() == Instruction::Sub)

5133 return true;

5134 }

5136}

5137

5140

5141

5142

5143

5144

5150

5152}

5153

5155 if (auto *Shuf = dyn_cast(V))

5156 return all_equal(Shuf->getShuffleMask());

5157 return false;

5158}

5159

5160

5161

5163 bool AllowSplat = false) {

5164

5166 return false;

5167

5168 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {

5169 auto *FullTy = FullV->getType();

5170 auto *HalfTy = HalfV->getType();

5172 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();

5173 };

5174

5175 auto extractHalf = [](Value *FullV, Value *HalfV) {

5176 auto *FullVT = cast(FullV->getType());

5177 auto *HalfVT = cast(HalfV->getType());

5178 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();

5179 };

5180

5182 Value *S1Op1 = nullptr, *S2Op1 = nullptr;

5185 return false;

5186

5187

5188

5190 S1Op1 = nullptr;

5192 S2Op1 = nullptr;

5193

5194

5195

5196 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||

5197 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))

5198 return false;

5199

5200

5201

5202 int M1Start = 0;

5203 int M2Start = 0;

5204 int NumElements = cast(Op1->getType())->getNumElements() * 2;

5205 if ((S1Op1 &&

5207 (S2Op1 &&

5209 return false;

5210

5211 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||

5212 (M2Start != 0 && M2Start != (NumElements / 2)))

5213 return false;

5214 if (S1Op1 && S2Op1 && M1Start != M2Start)

5215 return false;

5216

5217 return true;

5218}

5219

5220

5221

5223 auto areExtDoubled = [](Instruction *Ext) {

5224 return Ext->getType()->getScalarSizeInBits() ==

5225 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();

5226 };

5227

5230 !areExtDoubled(cast(Ext1)) ||

5231 !areExtDoubled(cast(Ext2)))

5232 return false;

5233

5234 return true;

5235}

5236

5237

5239 Value *VectorOperand = nullptr;

5243 ElementIndex->getValue() == 1 &&

5244 isa(VectorOperand->getType()) &&

5245 cast(VectorOperand->getType())->getNumElements() == 2;

5246}

5247

5248

5251}

5252

5254

5255 auto *GEP = dyn_cast(Ptrs);

5256 if (GEP || GEP->getNumOperands() != 2)

5257 return false;

5258

5260 Value *Offsets = GEP->getOperand(1);

5261

5262

5263 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())

5264 return false;

5265

5266

5267 if (isa(Offsets) || isa(Offsets)) {

5268 auto *OffsetsInst = cast(Offsets);

5269 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&

5270 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)

5272 }

5273

5274

5275 return true;

5276}

5277

5278

5279

5280

5283 return true;

5286 Ops.push_back(&cast(Op)->getOperandUse(0));

5287 return true;

5288 }

5291 Value *ZExtOp = cast(Op)->getOperand(0);

5292 Ops.push_back(&cast(ZExtOp)->getOperandUse(0));

5293 Ops.push_back(&cast(Op)->getOperandUse(0));

5294 return true;

5295 }

5296 return false;

5297}

5298

5299

5300

5301

5305 switch (II->getIntrinsicID()) {

5306 case Intrinsic::aarch64_neon_smull:

5307 case Intrinsic::aarch64_neon_umull:

5309 true)) {

5312 return true;

5313 }

5314 [[fallthrough]];

5315

5316 case Intrinsic::fma:

5317 case Intrinsic::fmuladd:

5318 if (isa(I->getType()) &&

5319 cast(I->getType())->getElementType()->isHalfTy() &&

5320 !ST->hasFullFP16())

5321 return false;

5322 [[fallthrough]];

5323 case Intrinsic::aarch64_neon_sqdmull:

5324 case Intrinsic::aarch64_neon_sqdmulh:

5325 case Intrinsic::aarch64_neon_sqrdmulh:

5326

5331 return !Ops.empty();

5332 case Intrinsic::aarch64_neon_fmlal:

5333 case Intrinsic::aarch64_neon_fmlal2:

5334 case Intrinsic::aarch64_neon_fmlsl:

5335 case Intrinsic::aarch64_neon_fmlsl2:

5336

5341 return !Ops.empty();

5342 case Intrinsic::aarch64_sve_ptest_first:

5343 case Intrinsic::aarch64_sve_ptest_last:

5344 if (auto *IIOp = dyn_cast(II->getOperand(0)))

5345 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)

5347 return !Ops.empty();

5348 case Intrinsic::aarch64_sme_write_horiz:

5349 case Intrinsic::aarch64_sme_write_vert:

5350 case Intrinsic::aarch64_sme_writeq_horiz:

5351 case Intrinsic::aarch64_sme_writeq_vert: {

5352 auto *Idx = dyn_cast(II->getOperand(1));

5353 if (Idx || Idx->getOpcode() != Instruction::Add)

5354 return false;

5356 return true;

5357 }

5358 case Intrinsic::aarch64_sme_read_horiz:

5359 case Intrinsic::aarch64_sme_read_vert:

5360 case Intrinsic::aarch64_sme_readq_horiz:

5361 case Intrinsic::aarch64_sme_readq_vert:

5362 case Intrinsic::aarch64_sme_ld1b_vert:

5363 case Intrinsic::aarch64_sme_ld1h_vert:

5364 case Intrinsic::aarch64_sme_ld1w_vert:

5365 case Intrinsic::aarch64_sme_ld1d_vert:

5366 case Intrinsic::aarch64_sme_ld1q_vert:

5367 case Intrinsic::aarch64_sme_st1b_vert:

5368 case Intrinsic::aarch64_sme_st1h_vert:

5369 case Intrinsic::aarch64_sme_st1w_vert:

5370 case Intrinsic::aarch64_sme_st1d_vert:

5371 case Intrinsic::aarch64_sme_st1q_vert:

5372 case Intrinsic::aarch64_sme_ld1b_horiz:

5373 case Intrinsic::aarch64_sme_ld1h_horiz:

5374 case Intrinsic::aarch64_sme_ld1w_horiz:

5375 case Intrinsic::aarch64_sme_ld1d_horiz:

5376 case Intrinsic::aarch64_sme_ld1q_horiz:

5377 case Intrinsic::aarch64_sme_st1b_horiz:

5378 case Intrinsic::aarch64_sme_st1h_horiz:

5379 case Intrinsic::aarch64_sme_st1w_horiz:

5380 case Intrinsic::aarch64_sme_st1d_horiz:

5381 case Intrinsic::aarch64_sme_st1q_horiz: {

5382 auto *Idx = dyn_cast(II->getOperand(3));

5383 if (Idx || Idx->getOpcode() != Instruction::Add)

5384 return false;

5386 return true;

5387 }

5388 case Intrinsic::aarch64_neon_pmull:

5390 return false;

5393 return true;

5394 case Intrinsic::aarch64_neon_pmull64:

5396 II->getArgOperand(1)))

5397 return false;

5398 Ops.push_back(&II->getArgOperandUse(0));

5399 Ops.push_back(&II->getArgOperandUse(1));

5400 return true;

5401 case Intrinsic::masked_gather:

5403 return false;

5404 Ops.push_back(&II->getArgOperandUse(0));

5405 return true;

5406 case Intrinsic::masked_scatter:

5408 return false;

5409 Ops.push_back(&II->getArgOperandUse(1));

5410 return true;

5411 default:

5412 return false;

5413 }

5414 }

5415

5416 auto ShouldSinkCondition = [](Value *Cond) -> bool {

5417 auto *II = dyn_cast(Cond);

5418 return II && II->getIntrinsicID() == Intrinsic::vector_reduce_or &&

5419 isa(II->getOperand(0)->getType());

5420 };

5421

5422 switch (I->getOpcode()) {

5423 case Instruction::GetElementPtr:

5424 case Instruction::Add:

5425 case Instruction::Sub:

5426

5427 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {

5430 return true;

5431 }

5432 }

5433 break;

5434 case Instruction::Select: {

5435 if (!ShouldSinkCondition(I->getOperand(0)))

5436 return false;

5437

5438 Ops.push_back(&I->getOperandUse(0));

5439 return true;

5440 }

5441 case Instruction::Br: {

5442 if (cast(I)->isUnconditional())

5443 return false;

5444

5445 if (!ShouldSinkCondition(cast(I)->getCondition()))

5446 return false;

5447

5448 Ops.push_back(&I->getOperandUse(0));

5449 return true;

5450 }

5451 default:

5452 break;

5453 }

5454

5455 if (I->getType()->isVectorTy())

5456 return false;

5457

5458 switch (I->getOpcode()) {

5459 case Instruction::Sub:

5460 case Instruction::Add: {

5462 return false;

5463

5464

5465

5466 auto Ext1 = cast(I->getOperand(0));

5467 auto Ext2 = cast(I->getOperand(1));

5469 Ops.push_back(&Ext1->getOperandUse(0));

5470 Ops.push_back(&Ext2->getOperandUse(0));

5471 }

5472

5473 Ops.push_back(&I->getOperandUse(0));

5474 Ops.push_back(&I->getOperandUse(1));

5475

5476 return true;

5477 }

5478 case Instruction::Or: {

5479

5480

5481 if (ST->hasNEON()) {

5483 Value *MaskValue;

5484

5488 if (match(OtherAnd,

5490 Instruction *MainAnd = I->getOperand(0) == OtherAnd

5491 ? cast(I->getOperand(1))

5492 : cast(I->getOperand(0));

5493

5494

5495 if (I->getParent() != MainAnd->getParent() ||

5496 I->getParent() != OtherAnd->getParent())

5497 return false;

5498

5499

5500 if (I->getParent() != IA->getParent() ||

5501 I->getParent() != IB->getParent())

5502 return false;

5503

5506 Ops.push_back(&I->getOperandUse(0));

5507 Ops.push_back(&I->getOperandUse(1));

5508

5509 return true;

5510 }

5511 }

5512 }

5513

5514 return false;

5515 }

5516 case Instruction::Mul: {

5517 auto ShouldSinkSplatForIndexedVariant = [](Value *V) {

5518 auto *Ty = cast(V->getType());

5519

5520 if (Ty->isScalableTy())

5521 return false;

5522

5523

5524 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;

5525 };

5526

5527 int NumZExts = 0, NumSExts = 0;

5528 for (auto &Op : I->operands()) {

5529

5530 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))

5531 continue;

5532

5534 auto *Ext = cast(Op);

5535 auto *ExtOp = Ext->getOperand(0);

5536 if (isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))

5537 Ops.push_back(&Ext->getOperandUse(0));

5539

5540 if (isa(Ext))

5541 NumSExts++;

5542 else

5543 NumZExts++;

5544

5545 continue;

5546 }

5547

5549 if (!Shuffle)

5550 continue;

5551

5552

5553

5554

5560 NumSExts++;

5561 else

5562 NumZExts++;

5563 continue;

5564 }

5565

5567 InsertElementInst *Insert = dyn_cast(ShuffleOperand);

5568 if (!Insert)

5569 continue;

5570

5571 Instruction *OperandInstr = dyn_cast(Insert->getOperand(1));

5572 if (!OperandInstr)

5573 continue;

5574

5576 dyn_cast(Insert->getOperand(2));

5577

5578 if (!ElementConstant || !ElementConstant->isZero())

5579 continue;

5580

5581 unsigned Opcode = OperandInstr->getOpcode();

5582 if (Opcode == Instruction::SExt)

5583 NumSExts++;

5584 else if (Opcode == Instruction::ZExt)

5585 NumZExts++;

5586 else {

5587

5588

5589 unsigned Bitwidth = I->getType()->getScalarSizeInBits();

5593 continue;

5594 NumZExts++;

5595 }

5596

5597

5598

5600 Ops.push_back(&Insert->getOperandUse(1));

5603 }

5604

5605

5606 if (!Ops.empty() && (NumSExts == 2 || NumZExts == 2))

5607 return true;

5608

5609

5610 if (!ShouldSinkSplatForIndexedVariant(I))

5611 return false;

5612

5615 Ops.push_back(&I->getOperandUse(0));

5617 Ops.push_back(&I->getOperandUse(1));

5618

5619 return !Ops.empty();

5620 }

5621 case Instruction::FMul: {

5622

5623 if (I->getType()->isScalableTy())

5624 return false;

5625

5626 if (cast(I->getType())->getElementType()->isHalfTy() &&

5627 !ST->hasFullFP16())

5628 return false;

5629

5630

5632 Ops.push_back(&I->getOperandUse(0));

5634 Ops.push_back(&I->getOperandUse(1));

5635 return !Ops.empty();

5636 }

5637 default:

5638 return false;

5639 }

5640 return false;

5641}

static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)

SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn

static std::optional< Instruction * > instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID)

TailFoldingOption TailFoldingOptionLoc

static std::optional< Instruction * > instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, bool MergeIntoAddendOp)

static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)

bool SimplifyValuePattern(SmallVector< Value * > &Vec, bool AllowPoison)

static std::optional< Instruction * > instCombineSVESel(InstCombiner &IC, IntrinsicInst &II)

static bool shouldSinkVScale(Value *Op, SmallVectorImpl< Use * > &Ops)

We want to sink following cases: (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A,...

static std::optional< Instruction * > tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEUnpack(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< unsigned > SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold", cl::init(15), cl::Hidden)

static cl::opt< bool > EnableFixedwidthAutovecInStreamingMode("enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden)

static std::optional< Instruction * > instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, Intrinsic::ID IID)

static std::optional< Instruction * > instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II)

static bool areExtractExts(Value *Ext1, Value *Ext2)

Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.

static cl::opt< bool > EnableLSRCostOpt("enable-aarch64-lsr-cost-opt", cl::init(true), cl::Hidden)

static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl< Use * > &Ops)

static std::optional< Instruction * > instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > processPhiNode(InstCombiner &IC, IntrinsicInst &II)

The function will remove redundant reinterprets casting in the presence of the control flow.

static std::optional< Instruction * > instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, int PredPos)

static std::optional< Instruction * > instCombineSVEInsr(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVESDIV(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)

static bool containsDecreasingPointers(Loop *TheLoop, PredicatedScalarEvolution *PSE)

static std::optional< Instruction * > instCombineSVEAllActive(IntrinsicInst &II, Intrinsic::ID IID)

static std::optional< Instruction * > instCombineSVENoActiveZero(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< bool > SVEPreferFixedOverScalableIfEqualCost("sve-prefer-fixed-over-scalable-if-equal", cl::Hidden)

static bool isUnpackedVectorVT(EVT VecVT)

static std::optional< Instruction * > instCombineSVEDupX(InstCombiner &IC, IntrinsicInst &II)

static void getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP, AArch64TTIImpl &TTI)

For Apple CPUs, we want to runtime-unroll loops to make better use if the OOO engine's wide instructi...

static std::optional< Instruction * > instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineDMB(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineMaxMinNM(InstCombiner &IC, IntrinsicInst &II)

static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA)

static cl::opt< unsigned > SVEGatherOverhead("sve-gather-overhead", cl::init(10), cl::Hidden)

static std::optional< Instruction * > instCombineSVECondLast(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< unsigned > BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, cl::desc("The cost of a histcnt instruction"))

static std::optional< Instruction * > instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< unsigned > CallPenaltyChangeSM("call-penalty-sm-change", cl::init(5), cl::Hidden, cl::desc("Penalty of calling a function that requires a change to PSTATE.SM"))

static std::optional< Instruction * > instCombineSVEUzp1(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< bool > EnableScalableAutovecInStreamingMode("enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden)

static std::optional< Instruction * > instCombineSVETBL(InstCombiner &IC, IntrinsicInst &II)

static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)

Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.

static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic)

static bool isSplatShuffle(Value *V)

static cl::opt< unsigned > InlineCallPenaltyChangeSM("inline-call-penalty-sm-change", cl::init(10), cl::Hidden, cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"))

static std::optional< Instruction * > instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II)

static std::optional< Instruction * > instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)

static std::optional< Instruction * > instCombineSVENoActiveReplace(InstCombiner &IC, IntrinsicInst &II, bool hasInactiveVector)

static std::optional< Instruction * > instCombineSVESrshl(InstCombiner &IC, IntrinsicInst &II)

static bool isSMEABIRoutineCall(const CallInst &CI)

static cl::opt< unsigned > DMBLookaheadThreshold("dmb-lookahead-threshold", cl::init(10), cl::Hidden, cl::desc("The number of instructions to search for a redundant dmb"))

static unsigned getSVEGatherScatterOverhead(unsigned Opcode, const AArch64Subtarget *ST)

static bool isOperandOfVmullHighP64(Value *Op)

Check if Op could be used with vmull_high_p64 intrinsic.

static std::optional< Instruction * > instCombineSVELast(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< unsigned > NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10), cl::Hidden)

static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)

static std::optional< Instruction * > instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts)

static bool hasPossibleIncompatibleOps(const Function *F)

Returns true if the function has explicit operations that can only be lowered using incompatible inst...

cl::opt< TailFoldingOption, true, cl::parser< std::string > > SVETailFolding("sve-tail-folding", cl::desc("Control the use of vectorisation using tail-folding for SVE where the" " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" "\ndisabled (Initial) No loop types will vectorize using " "tail-folding" "\ndefault (Initial) Uses the default tail-folding settings for " "the target CPU" "\nall (Initial) All legal loop types will vectorize using " "tail-folding" "\nsimple (Initial) Use tail-folding for simple loops (not " "reductions or recurrences)" "\nreductions Use tail-folding for loops containing reductions" "\nnoreductions Inverse of above" "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" "\nnorecurrences Inverse of above" "\nreverse Use tail-folding for loops requiring reversed " "predicates" "\nnoreverse Inverse of above"), cl::location(TailFoldingOptionLoc))

static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)

Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...

static std::optional< Instruction * > instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II)

static cl::opt< bool > EnableOrLikeSelectOpt("enable-aarch64-or-like-select", cl::init(true), cl::Hidden)

static cl::opt< unsigned > SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden)

static std::optional< Instruction * > instCombineSVEDupqLane(InstCombiner &IC, IntrinsicInst &II)

This file a TargetTransformInfo::Concept conforming object specific to the AArch64 target machine.

AMDGPU Register Bank Select

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file provides a helper that implements much of the TTI interface in terms of the target-independ...

static Error reportError(StringRef Message)

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Analysis containing CSE Info

static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))

Cost tables and simple lookup functions.

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines the DenseMap class.

This file provides the interface for the instcombine pass implementation.

This file defines the LoopVectorizationLegality class.

mir Rename Register Operands

static const Function * getCalledFunction(const Value *V)

uint64_t IntrinsicInst * II

const SmallVectorImpl< MachineOperand > & Cond

static uint64_t getBits(uint64_t Val, int Start, int End)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static unsigned getScalarSizeInBits(Type *Ty)

static SymbolRef::Type getType(const Symbol *Sym)

This file describes how to lower LLVM code to machine code.

This pass exposes codegen information to IR-level passes.

static unsigned getBitWidth(Type *Ty, const DataLayout &DL)

Returns the bitwidth of the given scalar or pointer type.

bool isNeonAvailable() const

Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....

unsigned getVectorInsertExtractBaseCost() const

ARMProcFamilyEnum getProcFamily() const

Returns ARM processor family.

unsigned getMaxInterleaveFactor() const

bool isSVEorStreamingSVEAvailable() const

Returns true if the target has access to either the full range of SVE instructions,...

TailFoldingOpts getSVETailFoldingDefaultOpts() const

bool useSVEForFixedLengthVectors() const

unsigned getEpilogueVectorizationMinVF() const

unsigned getMinSVEVectorSizeInBits() const

bool isSVEAvailable() const

Returns true if the target has SVE and can use the full range of SVE instructions,...

InstructionCost getSpliceCost(VectorType *Tp, int Index)

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const

Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)

bool shouldTreatInstructionLikeSelect(const Instruction *I)

InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)

bool prefersVectorizedAddressing() const

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp) const

InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const

bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const

unsigned getEpilogueVectorizationMinVF() const

Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)

bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const

bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)

See if I should be considered for address type promotion.

InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

uint64_t getFeatureMask(const Function &F) const

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const

bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const

bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

bool isElementTypeLegalForScalableVector(Type *Ty) const

InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)

bool preferFixedOverScalableIfEqualCost() const

bool areInlineCompatible(const Function *Caller, const Function *Callee) const

bool enableScalableVectorization() const

bool useNeonVector(const Type *Ty) const

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)

bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const

InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

bool isMultiversionedFunction(const Function &F) const

unsigned getMaxNumElements(ElementCount VF) const

Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...

InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)

bool isLegalMaskedGatherScatter(Type *DataType) const

unsigned getMaxInterleaveFactor(ElementCount VF)

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getIntImmCost(int64_t Val)

Calculate the cost of materializing a 64-bit value.

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)

TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src)

InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)

EVT getPromotedVTForPredicate(EVT VT) const

unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL, bool UseScalable) const

Returns the number of interleaved accesses that will be generated when lowering accesses of the given...

bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const

Returns true if VecTy is a legal interleaved access type.

Class for arbitrary precision integers.

bool isNegatedPowerOf2() const

Check if this APInt's negated value is a power of two greater than zero.

unsigned popcount() const

Count the number of bits set.

unsigned countLeadingOnes() const

void negate()

Negate this APInt in place.

APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

unsigned logBase2() const

APInt ashr(unsigned ShiftAmt) const

Arithmetic right-shift function.

bool isPowerOf2() const

Check if this APInt's value is a power of two greater than zero.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

int64_t getSExtValue() const

Get sign extended value.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

LLVM Basic Block Representation.

bool isTypeLegal(Type *Ty)

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)

Get intrinsic cost based on arguments.

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)

InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)

Try to calculate op costs for min/max reduction operations.

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)

InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool areInlineCompatible(const Function *Caller, const Function *Callee) const

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)

InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)

Compute a cost of the given call instruction.

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

Estimate the cost of type-legalization and the legalized type.

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)

InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})

Estimate the overhead of scalarizing an instruction.

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)

static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

Value * getArgOperand(unsigned i) const

unsigned arg_size() const

This class represents a function call, abstracting a target machine's calling convention.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ FCMP_OEQ

0 0 0 1 True if ordered and equal

@ FCMP_OLT

0 1 0 0 True if ordered and less than

@ FCMP_OGT

0 0 1 0 True if ordered and greater than

@ FCMP_OGE

0 0 1 1 True if ordered and greater than or equal

@ FCMP_OLE

0 1 0 1 True if ordered and less than or equal

@ FCMP_UNE

1 1 1 0 True if unordered or not equal

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

bool isIntPredicate() const

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

static ConstantAggregateZero * get(Type *Ty)

This is the shared class of boolean and integer constants.

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

const APInt & getValue() const

Return the constant as an APInt value reference.

static Constant * get(StructType *T, ArrayRef< Constant * > V)

This is an important base class in LLVM.

static Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

This class represents an Operation in the Expression.

A parsed version of the target data layout string in and methods for querying it.

iterator find(const_arg_type_t< KeyT > Val)

static constexpr ElementCount getScalable(ScalarTy MinVal)

static constexpr ElementCount getFixed(ScalarTy MinVal)

This provides a helper for copying FMF from an instruction or setting specified flags.

Convenience struct for specifying and reasoning about fast-math flags.

bool allowContract() const

static FixedVectorType * get(Type *ElementType, unsigned NumElts)

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

bool isEquality() const

Return true if this predicate is either EQ or NE.

Value * CreateVScale(Constant *Scaling, const Twine &Name="")

Create a call to llvm.vscale, multiplied by Scaling.

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")

CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")

Create a call to the vector.insert intrinsic.

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

IntegerType * getIntNTy(unsigned N)

Fetch the type representing an N-bit integer.

Type * getDoubleTy()

Fetch the type representing a 64-bit floating point value.

Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")

Create a call to Masked Load intrinsic.

Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

Type * getHalfTy()

Fetch the type representing a 16-bit floating point value.

IntegerType * getInt64Ty()

Fetch the type representing a 64-bit integer.

Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)

CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)

Create a call to Masked Store intrinsic.

Type * getFloatTy()

Fetch the type representing a 32-bit floating point value.

Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

This instruction inserts a single (scalar) element into a VectorType value.

static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)

The core instruction combiner logic.

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

static InstructionCost getInvalid(CostType Val=0)

std::optional< CostType > getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

Class to represent integer types.

bool hasGroups() const

Returns true if we have any interleave groups.

const SmallVectorImpl< Type * > & getArgTypes() const

Type * getReturnType() const

const SmallVectorImpl< const Value * > & getArgs() const

Intrinsic::ID getID() const

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

Value * getPointerOperand()

iterator_range< block_iterator > blocks() const

RecurrenceSet & getFixedOrderRecurrences()

Return the fixed-order recurrences found in the loop.

PredicatedScalarEvolution * getPredicatedScalarEvolution() const

const ReductionList & getReductionVars() const

Returns the reduction variables found in the loop.

Represents a single loop in the control flow graph.

uint64_t getScalarSizeInBits() const

unsigned getVectorNumElements() const

bool isVector() const

Return true if this is a vector value type.

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static PointerType * getUnqual(Type *ElementType)

This constructs a pointer to an object of the specified type in the default address space (address sp...

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

Type * getRecurrenceType() const

Returns the type of the recurrence.

RecurKind getRecurrenceKind() const

This node represents a polynomial recurrence on the trip count of the specified loop.

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

This class represents an analyzed expression in the program.

SMEAttrs is a utility class to parse the SME ACLE attributes on functions.

bool requiresSMChange(const SMEAttrs &Callee) const

void set(unsigned M, bool Enable=true)

bool hasStreamingBody() const

static ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)

static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)

The main scalar evolution driver.

const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns the upper bound of the loop trip count as a normal unsigned value.

bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

This instruction constructs a fixed permutation of two input vectors.

static bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)

Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...

static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)

Return true if this shuffle mask is an extract subvector mask.

static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)

Return true if the mask interleaves one or more input vectors together.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

iterator insert(iterator I, T &&Elt)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

static StackOffset getScalable(int64_t Scalable)

static StackOffset getFixed(int64_t Fixed)

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

A switch()-like statement whose cases are string literals.

StringSwitch & Case(StringLiteral S, T Value)

Class to represent struct types.

int InstructionOpcodeToISD(unsigned Opcode) const

Get the ISD node that corresponds to the Instruction class opcode.

EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const

Return the EVT corresponding to this LLVM type.

unsigned getMaxExpandSizeMemcmp(bool OptSize) const

Get maximum # of load operations permitted for memcmp.

bool isTypeLegal(EVT VT) const

Return true if the target has native support for the specified value type.

bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const

Return true if the specified operation is legal on this target or can be made legal with custom lower...

LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const

Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...

LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const

Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...

std::pair< LegalizeTypeAction, EVT > LegalizeKind

LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.

bool shouldTreatInstructionLikeSelect(const Instruction *I)

bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const

bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const

bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const

bool isLoweredToCall(const Function *F) const

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

static OperandValueInfo getOperandInfo(const Value *V)

Collect properties of V used in cost analysis, e.g. OP_PowerOf2.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)

A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...

PopcntSupportKind

Flags indicating the kind of support for population count.

PartialReductionExtendKind

@ TCC_Free

Expected to fold away in lowering.

@ TCC_Basic

The cost of a typical 'add' instruction.

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const

Estimate the cost of a given IR user when lowered.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_InsertSubvector

InsertSubvector. Index indicates start offset.

@ SK_Select

Selects elements from the corresponding lane of either source operand.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Transpose

Transpose two vectors.

@ SK_Splice

Concatenates elements from the first input vector with elements of the second input vector.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_PermuteTwoSrc

Merge elements from two source vectors into one with any shuffle mask.

@ SK_Reverse

Reverse the order of the vector.

@ SK_ExtractSubvector

ExtractSubvector Index indicates start offset.

CastContextHint

Represents a hint about the context in which a cast is used.

@ Masked

The cast is used with a masked load/store.

@ None

The cast is not used with a load/store of any kind.

@ Normal

The cast is used with a normal load/store.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

static constexpr TypeSize getScalable(ScalarTy MinimumSize)

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isPointerTy() const

True if this is an instance of PointerType.

static IntegerType * getInt1Ty(LLVMContext &C)

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

bool isBFloatTy() const

Return true if this is 'bfloat', a 16-bit bfloat type.

static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

bool isFP128Ty() const

Return true if this is 'fp128'.

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const

Return true if this is a type whose size is a known multiple of vscale.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

bool isDoubleTy() const

Return true if this is 'double', a 64-bit IEEE fp type.

bool isFloatingPointTy() const

Return true if this is one of the floating-point types.

bool isPtrOrPtrVectorTy() const

Return true if this is a pointer type or a vector of pointer types.

static IntegerType * getInt32Ty(LLVMContext &C)

static IntegerType * getInt64Ty(LLVMContext &C)

static Type * getFloatTy(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

static UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

Align getPointerAlignment(const DataLayout &DL) const

Returns an alignment of the pointer value.

void takeName(Value *V)

Transfer the name from V to this value.

Base class of all SIMD vector types.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

static VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

Type * getElementType() const

int getNumOccurrences() const

constexpr ScalarTy getFixedValue() const

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr bool isFixed() const

Returns true if the quantity is not scaled by vscale.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

const ParentTy * getParent() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

static bool isLogicalImmediate(uint64_t imm, unsigned regSize)

isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...

void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)

Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...

uint64_t getFMVPriority(ArrayRef< StringRef > Features)

static constexpr unsigned SVEBitsPerBlock

@ C

The default llvm calling convention, compatible with C.

@ SETCC

SetCC operator - This evaluates to a true value iff the condition is true.

@ ADD

Simple integer binary arithmetic operators.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ FADD

Simple binary floating point operators.

@ BITCAST

BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...

@ SIGN_EXTEND

Conversion operators.

@ FNEG

Perform various unary floating-point operations inspired by libm.

@ SELECT

Select(COND, TRUEVAL, FALSEVAL).

@ MULHU

MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...

@ SHL

Shift and rotation operations.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ FP_EXTEND

X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)

Matches an And with LHS and RHS in either order.

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)

Matches ExtractElementInst.

cst_pred_ty< is_nonnegative > m_NonNegative()

Match an integer or vector of non-negative values.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

OneUse_match< T > m_OneUse(const T &SubPattern)

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

specific_fpval m_FPOne()

Match a float 1.0 or vector with all elements equal to 1.0.

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

VScaleVal_match m_VScale()

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)

auto m_Undef()

Match an arbitrary undef constant.

BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)

Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)

Matches an Or with LHS and RHS in either order.

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)

Find in cost table.

TailFoldingOpts

An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for loop.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)

Return true if 'V & Mask' is known to be zero.

unsigned M1(unsigned Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

void report_fatal_error(Error Err, bool gen_crash_diag=true)

Report a serious error, calling any installed error handler.

std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)

Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...

constexpr int PoisonMaskElem

raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

@ Mod

The access may modify the value stored in memory.

bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)

Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> or <4, 12,...

@ UMin

Unsigned integer min implemented in terms of select(cmp()).

@ FAnyOf

Any_of reduction with select(fcmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...

@ Or

Bitwise or logical OR of integers.

@ Mul

Product of integers.

@ Xor

Bitwise or logical XOR of integers.

@ FMax

FP max implemented in terms of select(cmp()).

@ FMulAdd

Sum of float products with llvm.fmuladd(a * b + sum).

@ SMax

Signed integer max implemented in terms of select(cmp()).

@ And

Bitwise or logical AND of integers.

@ SMin

Signed integer min implemented in terms of select(cmp()).

@ FMin

FP min implemented in terms of select(cmp()).

@ IAnyOf

Any_of reduction with select(icmp(),x,y) where one of (x,y) is loop invariant, and both x and y are i...

@ UMax

Unsigned integer max implemented in terms of select(cmp()).

void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

DWARFExpression::Operation Op

unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)

Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.

auto predecessors(const MachineBasicBlock *BB)

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

Type * toVectorTy(Type *Scalar, ElementCount EC)

A helper function for converting Scalar types to vector types.

@ Default

The result values are uniform if and only if all operands are uniform.

const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)

Find in type conversion cost table.

constexpr uint64_t NextPowerOf2(uint64_t A)

Returns the next power of two (in 64-bits) that is strictly greater than A.

This struct is a compact representation of a valid (non-zero power of two) alignment.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)

Returns the EVT that represents a vector NumElements in length, where each element is of type VT.

bool bitsGT(EVT VT) const

Return true if this has more bits than VT.

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

uint64_t getScalarSizeInBits() const

static EVT getEVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

bool isFixedLengthVector() const

Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

bool isScalableVector() const

Return true if this is a vector type where the runtime length is machine dependent.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

This struct is a compact representation of a valid (power of two) or undefined (0) alignment.

Information about a load/store intrinsic defined by the target.

InterleavedAccessInfo * IAI

LoopVectorizationLegality * LVL

This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...

unsigned Insns

TODO: Some of these could be merged.

Returns options for expansion of memcmp. IsZeroCmp is.

OperandValueInfo getNoProps() const

Parameters that control the generic loop unrolling transformation.

bool UpperBound

Allow using trip count upper bound to unroll loops.

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

unsigned DefaultUnrollRuntimeCount

Default unroll count for loops with run-time trip count.

unsigned SCEVExpansionBudget

Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.

unsigned UnrollAndJamInnerLoopThreshold

Threshold for unroll and jam, for inner loop size.

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

unsigned PartialThreshold

The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...

Type Conversion Cost Table.