LLVM: lib/Target/AArch64/AArch64TargetTransformInfo.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

23#include "llvm/IR/IntrinsicsAArch64.h"

30#include

31#include

32using namespace llvm;

34

35#define DEBUG_TYPE "aarch64tti"

36

39

41 "sve-prefer-fixed-over-scalable-if-equal", cl::Hidden);

42

45

48

51

55

59 "Penalty of calling a function that requires a change to PSTATE.SM"));

60

63 cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"));

64

67

70

71

74 cl::desc("The cost of a histcnt instruction"));

75

78 cl::desc("The number of instructions to search for a redundant dmb"));

79

82 cl::desc("Threshold for forced unrolling of small loops in AArch64"));

83

84namespace {

85class TailFoldingOption {

86

87

88

89

90

91

92

93

97

98

99

100 bool NeedsDefault = true;

101

102 void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }

103

104 void setNeedsDefault(bool V) { NeedsDefault = V; }

105

107 EnableBits |= Bit;

108 DisableBits &= ~Bit;

109 }

110

112 EnableBits &= ~Bit;

113 DisableBits |= Bit;

114 }

115

118

119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&

120 "Initial bits should only include one of "

121 "(disabled|all|simple|default)");

122 Bits = NeedsDefault ? DefaultBits : InitialBits;

123 Bits |= EnableBits;

124 Bits &= ~DisableBits;

125

127 }

128

130 errs() << "invalid argument '" << Opt

131 << "' to -sve-tail-folding=; the option should be of the form\n"

132 " (disabled|all|default|simple)[+(reductions|recurrences"

133 "|reverse|noreductions|norecurrences|noreverse)]\n";

135 }

136

137public:

138

139 void operator=(const std::string &Val) {

140

141 if (Val.empty()) {

143 return;

144 }

145

146

147

148 setNeedsDefault(false);

149

151 StringRef(Val).split(TailFoldTypes, '+', -1, false);

152

153 unsigned StartIdx = 1;

154 if (TailFoldTypes[0] == "disabled")

155 setInitialBits(TailFoldingOpts::Disabled);

156 else if (TailFoldTypes[0] == "all")

157 setInitialBits(TailFoldingOpts::All);

158 else if (TailFoldTypes[0] == "default")

159 setNeedsDefault(true);

160 else if (TailFoldTypes[0] == "simple")

161 setInitialBits(TailFoldingOpts::Simple);

162 else {

163 StartIdx = 0;

164 setInitialBits(TailFoldingOpts::Disabled);

165 }

166

167 for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {

168 if (TailFoldTypes[I] == "reductions")

169 setEnableBit(TailFoldingOpts::Reductions);

170 else if (TailFoldTypes[I] == "recurrences")

171 setEnableBit(TailFoldingOpts::Recurrences);

172 else if (TailFoldTypes[I] == "reverse")

173 setEnableBit(TailFoldingOpts::Reverse);

174 else if (TailFoldTypes[I] == "noreductions")

175 setDisableBit(TailFoldingOpts::Reductions);

176 else if (TailFoldTypes[I] == "norecurrences")

177 setDisableBit(TailFoldingOpts::Recurrences);

178 else if (TailFoldTypes[I] == "noreverse")

179 setDisableBit(TailFoldingOpts::Reverse);

180 else

182 }

183 }

184

187 }

188};

189}

190

192

194 "sve-tail-folding",

196 "Control the use of vectorisation using tail-folding for SVE where the"

197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"

198 "\ndisabled (Initial) No loop types will vectorize using "

199 "tail-folding"

200 "\ndefault (Initial) Uses the default tail-folding settings for "

201 "the target CPU"

202 "\nall (Initial) All legal loop types will vectorize using "

203 "tail-folding"

204 "\nsimple (Initial) Use tail-folding for simple loops (not "

205 "reductions or recurrences)"

206 "\nreductions Use tail-folding for loops containing reductions"

207 "\nnoreductions Inverse of above"

208 "\nrecurrences Use tail-folding for loops containing fixed order "

209 "recurrences"

210 "\nnorecurrences Inverse of above"

211 "\nreverse Use tail-folding for loops requiring reversed "

212 "predicates"

213 "\nnoreverse Inverse of above"),

215

216

217

218

220 "enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden);

221

222

223

224

226 "enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);

227

234

235

236

237

242

243

244

245

249 return true;

250 }

251 }

252 return false;

253}

254

258 StringRef FeatureStr = F.getFnAttribute(AttributeStr).getValueAsString();

260 FeatureStr.split(Features, ",");

262}

263

265 return F.hasFnAttribute("fmv-features");

266}

267

268const FeatureBitset AArch64TTIImpl::InlineInverseFeatures = {

269 AArch64::FeatureExecuteOnly,

270};

271

273 const Function *Callee) const {

275

276

277

278

281 return false;

282

283

284

288 }

289

291 return false;

292

297 return false;

298 }

299

300 const TargetMachine &TM = getTLI()->getTargetMachine();

305

306

307

308

309 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;

310 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;

311

312 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;

313}

314

319 return false;

320

321

322

323

324

325

326

327

328

329 if (ST->useSVEForFixedLengthVectors() && llvm::any_of(Types, [](Type *Ty) {

330 auto FVTy = dyn_cast(Ty);

331 return FVTy &&

332 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;

333 }))

334 return false;

335

336 return true;

337}

338

339unsigned

341 unsigned DefaultCallPenalty) const {

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

363 SMECallAttrs CallAttrs(Call, &getTLI()->getRuntimeLibcallsInfo());

364

366 if (F == Call.getCaller())

370 }

371

372 return DefaultCallPenalty;

373}

374

378

380 return true;

381

383 ST->isSVEorStreamingSVEAvailable() &&

384 !ST->disableMaximizeScalableBandwidth();

385}

386

387

388

389

391

393 return 0;

394

395 if (Val < 0)

396 Val = ~Val;

397

398

401 return Insn.size();

402}

403

404

408 assert(Ty->isIntegerTy());

409

410 unsigned BitSize = Ty->getPrimitiveSizeInBits();

411 if (BitSize == 0)

412 return ~0U;

413

414

415 APInt ImmVal = Imm;

416 if (BitSize & 0x3f)

417 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);

418

419

420

422 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {

426 }

427

428 return std::max(1, Cost);

429}

430

435 assert(Ty->isIntegerTy());

436

437 unsigned BitSize = Ty->getPrimitiveSizeInBits();

438

439

440 if (BitSize == 0)

442

443 unsigned ImmIdx = ~0U;

444 switch (Opcode) {

445 default:

447 case Instruction::GetElementPtr:

448

449 if (Idx == 0)

452 case Instruction::Store:

453 ImmIdx = 0;

454 break;

455 case Instruction::Add:

456 case Instruction::Sub:

457 case Instruction::Mul:

458 case Instruction::UDiv:

459 case Instruction::SDiv:

460 case Instruction::URem:

461 case Instruction::SRem:

462 case Instruction::And:

463 case Instruction::Or:

464 case Instruction::Xor:

465 case Instruction::ICmp:

466 ImmIdx = 1;

467 break;

468

469 case Instruction::Shl:

470 case Instruction::LShr:

471 case Instruction::AShr:

472 if (Idx == 1)

474 break;

475 case Instruction::Trunc:

476 case Instruction::ZExt:

477 case Instruction::SExt:

478 case Instruction::IntToPtr:

479 case Instruction::PtrToInt:

480 case Instruction::BitCast:

481 case Instruction::PHI:

482 case Instruction::Call:

483 case Instruction::Select:

484 case Instruction::Ret:

485 case Instruction::Load:

486 break;

487 }

488

489 if (Idx == ImmIdx) {

490 int NumConstants = (BitSize + 63) / 64;

495 }

497}

498

503 assert(Ty->isIntegerTy());

504

505 unsigned BitSize = Ty->getPrimitiveSizeInBits();

506

507

508 if (BitSize == 0)

510

511

512

513

514 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)

516

517 switch (IID) {

518 default:

520 case Intrinsic::sadd_with_overflow:

521 case Intrinsic::uadd_with_overflow:

522 case Intrinsic::ssub_with_overflow:

523 case Intrinsic::usub_with_overflow:

524 case Intrinsic::smul_with_overflow:

525 case Intrinsic::umul_with_overflow:

526 if (Idx == 1) {

527 int NumConstants = (BitSize + 63) / 64;

532 }

533 break;

534 case Intrinsic::experimental_stackmap:

535 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

537 break;

538 case Intrinsic::experimental_patchpoint_void:

539 case Intrinsic::experimental_patchpoint:

540 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

542 break;

543 case Intrinsic::experimental_gc_statepoint:

544 if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))

546 break;

547 }

549}

550

554 if (TyWidth == 32 || TyWidth == 64)

556

558}

559

564

567

568

571

572

573 if (!ST->hasSVE2())

575

576 Type *BucketPtrsTy = ICA.getArgTypes()[0];

577 Type *EltTy = ICA.getArgTypes()[1];

578 unsigned TotalHistCnts = 1;

579

581

584

585

586

588 unsigned EC = VTy->getElementCount().getKnownMinValue();

591

592

593 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;

594

595 if (EC == 2 || (LegalEltSize == 32 && EC == 4))

597

599 TotalHistCnts = EC / NaturalVectorWidth;

600

602 }

603

605}

606

610

611

612

613

618

619 switch (ICA.getID()) {

620 case Intrinsic::experimental_vector_histogram_add: {

622

624 return HistCost;

625 break;

626 }

627 case Intrinsic::umin:

628 case Intrinsic::umax:

629 case Intrinsic::smin:

630 case Intrinsic::smax: {

631 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,

632 MVT::v8i16, MVT::v2i32, MVT::v4i32,

633 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,

634 MVT::nxv2i64};

636

637 if (LT.second == MVT::v2i64)

638 return LT.first * 2;

639 if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))

640 return LT.first;

641 break;

642 }

643 case Intrinsic::sadd_sat:

644 case Intrinsic::ssub_sat:

645 case Intrinsic::uadd_sat:

646 case Intrinsic::usub_sat: {

647 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,

648 MVT::v8i16, MVT::v2i32, MVT::v4i32,

649 MVT::v2i64};

651

652

653 unsigned Instrs =

654 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;

655 if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))

656 return LT.first * Instrs;

657

660

661 if (ST->isSVEAvailable() && VectorSize >= 128 && isPowerOf2_64(VectorSize))

662 return LT.first * Instrs;

663

664 break;

665 }

666 case Intrinsic::abs: {

667 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,

668 MVT::v8i16, MVT::v2i32, MVT::v4i32,

669 MVT::v2i64};

671 if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }))

672 return LT.first;

673 break;

674 }

675 case Intrinsic::bswap: {

676 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,

677 MVT::v4i32, MVT::v2i64};

679 if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }) &&

680 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())

681 return LT.first;

682 break;

683 }

684 case Intrinsic::fma:

685 case Intrinsic::fmuladd: {

686

687

690 (EltTy->isHalfTy() && ST->hasFullFP16()))

692 break;

693 }

694 case Intrinsic::stepvector: {

697

698

699 if (LT.first > 1) {

703 Cost += AddCost * (LT.first - 1);

704 }

706 }

707 case Intrinsic::vector_extract:

708 case Intrinsic::vector_insert: {

709

710

711

712

713

714

717 break;

718

720 EVT VecVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);

721 bool IsExtract = ICA.getID() == Intrinsic::vector_extract;

722 EVT SubVecVT = IsExtract ? getTLI()->getValueType(DL, RetTy)

723 : getTLI()->getValueType(DL, ICA.getArgTypes()[1]);

724

725

727 break;

728

730 getTLI()->getTypeConversion(C, SubVecVT);

732 getTLI()->getTypeConversion(C, VecVT);

738 break;

739 }

740 case Intrinsic::bitreverse: {

741 static const CostTblEntry BitreverseTbl[] = {

742 {Intrinsic::bitreverse, MVT::i32, 1},

743 {Intrinsic::bitreverse, MVT::i64, 1},

744 {Intrinsic::bitreverse, MVT::v8i8, 1},

745 {Intrinsic::bitreverse, MVT::v16i8, 1},

746 {Intrinsic::bitreverse, MVT::v4i16, 2},

747 {Intrinsic::bitreverse, MVT::v8i16, 2},

748 {Intrinsic::bitreverse, MVT::v2i32, 2},

749 {Intrinsic::bitreverse, MVT::v4i32, 2},

750 {Intrinsic::bitreverse, MVT::v1i64, 2},

751 {Intrinsic::bitreverse, MVT::v2i64, 2},

752 };

754 const auto *Entry =

756 if (Entry) {

757

758

759 if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||

760 TLI->getValueType(DL, RetTy, true) == MVT::i16)

761 return LegalisationCost.first * Entry->Cost + 1;

762

763 return LegalisationCost.first * Entry->Cost;

764 }

765 break;

766 }

767 case Intrinsic::ctpop: {

768 if (!ST->hasNEON()) {

769

771 }

782 };

784 MVT MTy = LT.second;

786

787

789 RetTy->getScalarSizeInBits()

790 ? 1

791 : 0;

792 return LT.first * Entry->Cost + ExtraCost;

793 }

794 break;

795 }

796 case Intrinsic::sadd_with_overflow:

797 case Intrinsic::uadd_with_overflow:

798 case Intrinsic::ssub_with_overflow:

799 case Intrinsic::usub_with_overflow:

800 case Intrinsic::smul_with_overflow:

801 case Intrinsic::umul_with_overflow: {

802 static const CostTblEntry WithOverflowCostTbl[] = {

803 {Intrinsic::sadd_with_overflow, MVT::i8, 3},

804 {Intrinsic::uadd_with_overflow, MVT::i8, 3},

805 {Intrinsic::sadd_with_overflow, MVT::i16, 3},

806 {Intrinsic::uadd_with_overflow, MVT::i16, 3},

807 {Intrinsic::sadd_with_overflow, MVT::i32, 1},

808 {Intrinsic::uadd_with_overflow, MVT::i32, 1},

809 {Intrinsic::sadd_with_overflow, MVT::i64, 1},

810 {Intrinsic::uadd_with_overflow, MVT::i64, 1},

811 {Intrinsic::ssub_with_overflow, MVT::i8, 3},

812 {Intrinsic::usub_with_overflow, MVT::i8, 3},

813 {Intrinsic::ssub_with_overflow, MVT::i16, 3},

814 {Intrinsic::usub_with_overflow, MVT::i16, 3},

815 {Intrinsic::ssub_with_overflow, MVT::i32, 1},

816 {Intrinsic::usub_with_overflow, MVT::i32, 1},

817 {Intrinsic::ssub_with_overflow, MVT::i64, 1},

818 {Intrinsic::usub_with_overflow, MVT::i64, 1},

819 {Intrinsic::smul_with_overflow, MVT::i8, 5},

820 {Intrinsic::umul_with_overflow, MVT::i8, 4},

821 {Intrinsic::smul_with_overflow, MVT::i16, 5},

822 {Intrinsic::umul_with_overflow, MVT::i16, 4},

823 {Intrinsic::smul_with_overflow, MVT::i32, 2},

824 {Intrinsic::umul_with_overflow, MVT::i32, 2},

825 {Intrinsic::smul_with_overflow, MVT::i64, 3},

826 {Intrinsic::umul_with_overflow, MVT::i64, 3},

827 };

828 EVT MTy = TLI->getValueType(DL, RetTy->getContainedType(0), true);

832 return Entry->Cost;

833 break;

834 }

835 case Intrinsic::fptosi_sat:

836 case Intrinsic::fptoui_sat: {

838 break;

839 bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;

841 EVT MTy = TLI->getValueType(DL, RetTy);

842

843

844 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||

845 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||

846 LT.second == MVT::v2f64)) {

848 (LT.second == MVT::f64 && MTy == MVT::i32) ||

849 (LT.second == MVT::f32 && MTy == MVT::i64)))

850 return LT.first;

851

852 if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() &&

855 }

856

857

858 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())

861 RetTy,

865 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||

866 (LT.second == MVT::f16 && MTy == MVT::i64) ||

867 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&

869 return LT.first;

870

871 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&

874

875

876 if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() &&

879

880

881 if ((LT.second.getScalarType() == MVT::f32 ||

882 LT.second.getScalarType() == MVT::f64 ||

883 LT.second.getScalarType() == MVT::f16) &&

885 Type *LegalTy =

886 Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits());

887 if (LT.second.isVector())

888 LegalTy = VectorType::get(LegalTy, LT.second.getVectorElementCount());

891 LegalTy, {LegalTy, LegalTy});

894 LegalTy, {LegalTy, LegalTy});

896 return LT.first * Cost +

897 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0

898 : 1);

899 }

900

901

903 RetTy = RetTy->getScalarType();

904 if (LT.second.isVector()) {

905 FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount());

906 RetTy = VectorType::get(RetTy, LT.second.getVectorElementCount());

907 }

913 getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI,

915 if (IsSigned) {

921 }

922 return LT.first * Cost;

923 }

924 case Intrinsic::fshl:

925 case Intrinsic::fshr: {

927 break;

928

930

931

932

933

934 if (RetTy->isIntegerTy() && ICA.getArgs()[0] == ICA.getArgs()[1] &&

935 (RetTy->getPrimitiveSizeInBits() == 32 ||

936 RetTy->getPrimitiveSizeInBits() == 64)) {

938 (ICA.getID() == Intrinsic::fshl && !OpInfoZ.isConstant()) ? 1 : 0;

939 return 1 + NegCost;

940 }

941

942

944 break;

945

949 {Intrinsic::fshl, MVT::v4i32, 2},

950 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},

951 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},

952 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};

953

954

955 const auto *Entry =

956 CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);

957 if (Entry)

958 return LegalisationCost.first * Entry->Cost;

959 }

960

962 if (!RetTy->isIntegerTy())

963 break;

964

965

966

967 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&

968 RetTy->getScalarSizeInBits() < 64) ||

969 (RetTy->getScalarSizeInBits() % 64 != 0);

970 unsigned ExtraCost = HigherCost ? 1 : 0;

971 if (RetTy->getScalarSizeInBits() == 32 ||

972 RetTy->getScalarSizeInBits() == 64)

973 ExtraCost = 0;

974

975 else if (HigherCost)

976 ExtraCost = 1;

977 else

978 break;

979 return TyL.first + ExtraCost;

980 }

981 case Intrinsic::get_active_lane_mask: {

983 EVT RetVT = getTLI()->getValueType(DL, RetTy);

985 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))

986 break;

987

988 if (RetTy->isScalableTy()) {

989 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=

991 break;

992

995

996

997

998

999 if (ST->hasSVE2p1() || ST->hasSME2()) {

1001 if (Cost == 1)

1002 return Cost;

1003 }

1004

1005

1006

1007

1014 return Cost + (SplitCost * (Cost - 1));

1015 } else if (!getTLI()->isTypeLegal(RetVT)) {

1016

1017

1018

1019

1020

1021

1022

1023

1024

1026 }

1027 break;

1028 }

1029 case Intrinsic::experimental_vector_match: {

1031 EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);

1032 unsigned SearchSize = NeedleTy->getNumElements();

1033 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {

1034

1035

1036

1037

1038

1042 return Cost;

1043 }

1044 break;

1045 }

1046 case Intrinsic::experimental_cttz_elts: {

1047 EVT ArgVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);

1048 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {

1049

1050

1051

1052 return 4;

1053 }

1054 break;

1055 }

1056 case Intrinsic::experimental_vector_extract_last_active:

1057 if (ST->isSVEorStreamingSVEAvailable()) {

1059

1060 return LegalCost;

1061 }

1062 break;

1063 default:

1064 break;

1065 }

1067}

1068

1069

1070

1074 auto RequiredType = II.getType();

1075

1077 assert(PN && "Expected Phi Node!");

1078

1079

1080 if (!PN->hasOneUse())

1081 return std::nullopt;

1082

1083 for (Value *IncValPhi : PN->incoming_values()) {

1085 if (!Reinterpret ||

1086 Reinterpret->getIntrinsicID() !=

1087 Intrinsic::aarch64_sve_convert_to_svbool ||

1088 RequiredType != Reinterpret->getArgOperand(0)->getType())

1089 return std::nullopt;

1090 }

1091

1092

1096

1097 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {

1099 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));

1100 Worklist.push_back(Reinterpret);

1101 }

1102

1103

1105}

1106

1107

1108

1110

1111

1112

1113

1114

1115

1123

1124

1131

1132

1138

1139

1145

1146

1147

1153

1154

1155

1162

1163

1164

1166

1167

1168

1169

1170

1172 return GoverningPredicateIdx != std::numeric_limits::max();

1173 }

1174

1177 return GoverningPredicateIdx;

1178 }

1179

1182 GoverningPredicateIdx = Index;

1183 return *this;

1184 }

1185

1186

1187

1188

1189

1190

1191

1192

1193

1197

1200 return UndefIntrinsic;

1201 }

1202

1205 UndefIntrinsic = IID;

1206 return *this;

1207 }

1208

1210

1213 return IROpcode;

1214 }

1215

1218 IROpcode = Opcode;

1219 return *this;

1220 }

1221

1222

1223

1224

1225

1227 return ResultLanes == InactiveLanesTakenFromOperand;

1228 }

1229

1232 return OperandIdxForInactiveLanes;

1233 }

1234

1236 assert(ResultLanes == Uninitialized && "Cannot set property twice!");

1237 ResultLanes = InactiveLanesTakenFromOperand;

1238 OperandIdxForInactiveLanes = Index;

1239 return *this;

1240 }

1241

1243 return ResultLanes == InactiveLanesAreNotDefined;

1244 }

1245

1247 assert(ResultLanes == Uninitialized && "Cannot set property twice!");

1248 ResultLanes = InactiveLanesAreNotDefined;

1249 return *this;

1250 }

1251

1253 return ResultLanes == InactiveLanesAreUnused;

1254 }

1255

1257 assert(ResultLanes == Uninitialized && "Cannot set property twice!");

1258 ResultLanes = InactiveLanesAreUnused;

1259 return *this;

1260 }

1261

1262

1263

1264

1266

1268 ResultIsZeroInitialized = true;

1269 return *this;

1270 }

1271

1272

1273

1274

1275

1276

1277

1279 return OperandIdxWithNoActiveLanes != std::numeric_limits::max();

1280 }

1281

1284 return OperandIdxWithNoActiveLanes;

1285 }

1286

1289 OperandIdxWithNoActiveLanes = Index;

1290 return *this;

1291 }

1292

1293private:

1294 unsigned GoverningPredicateIdx = std::numeric_limits::max();

1295

1297 unsigned IROpcode = 0;

1298

1299 enum PredicationStyle {

1301 InactiveLanesTakenFromOperand,

1302 InactiveLanesAreNotDefined,

1303 InactiveLanesAreUnused

1305

1306 bool ResultIsZeroInitialized = false;

1307 unsigned OperandIdxForInactiveLanes = std::numeric_limits::max();

1308 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits::max();

1309};

1310

1312

1313

1316 return !isa(V->getType());

1317 }))

1319

1321 switch (IID) {

1322 default:

1323 break;

1324 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:

1325 case Intrinsic::aarch64_sve_fcvt_f16f32:

1326 case Intrinsic::aarch64_sve_fcvt_f16f64:

1327 case Intrinsic::aarch64_sve_fcvt_f32f16:

1328 case Intrinsic::aarch64_sve_fcvt_f32f64:

1329 case Intrinsic::aarch64_sve_fcvt_f64f16:

1330 case Intrinsic::aarch64_sve_fcvt_f64f32:

1331 case Intrinsic::aarch64_sve_fcvtlt_f32f16:

1332 case Intrinsic::aarch64_sve_fcvtlt_f64f32:

1333 case Intrinsic::aarch64_sve_fcvtx_f32f64:

1334 case Intrinsic::aarch64_sve_fcvtzs:

1335 case Intrinsic::aarch64_sve_fcvtzs_i32f16:

1336 case Intrinsic::aarch64_sve_fcvtzs_i32f64:

1337 case Intrinsic::aarch64_sve_fcvtzs_i64f16:

1338 case Intrinsic::aarch64_sve_fcvtzs_i64f32:

1339 case Intrinsic::aarch64_sve_fcvtzu:

1340 case Intrinsic::aarch64_sve_fcvtzu_i32f16:

1341 case Intrinsic::aarch64_sve_fcvtzu_i32f64:

1342 case Intrinsic::aarch64_sve_fcvtzu_i64f16:

1343 case Intrinsic::aarch64_sve_fcvtzu_i64f32:

1344 case Intrinsic::aarch64_sve_scvtf:

1345 case Intrinsic::aarch64_sve_scvtf_f16i32:

1346 case Intrinsic::aarch64_sve_scvtf_f16i64:

1347 case Intrinsic::aarch64_sve_scvtf_f32i64:

1348 case Intrinsic::aarch64_sve_scvtf_f64i32:

1349 case Intrinsic::aarch64_sve_ucvtf:

1350 case Intrinsic::aarch64_sve_ucvtf_f16i32:

1351 case Intrinsic::aarch64_sve_ucvtf_f16i64:

1352 case Intrinsic::aarch64_sve_ucvtf_f32i64:

1353 case Intrinsic::aarch64_sve_ucvtf_f64i32:

1355

1356 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:

1357 case Intrinsic::aarch64_sve_fcvtnt_f16f32:

1358 case Intrinsic::aarch64_sve_fcvtnt_f32f64:

1359 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:

1361

1362 case Intrinsic::aarch64_sve_fabd:

1364 case Intrinsic::aarch64_sve_fadd:

1367 case Intrinsic::aarch64_sve_fdiv:

1370 case Intrinsic::aarch64_sve_fmax:

1372 case Intrinsic::aarch64_sve_fmaxnm:

1374 case Intrinsic::aarch64_sve_fmin:

1376 case Intrinsic::aarch64_sve_fminnm:

1378 case Intrinsic::aarch64_sve_fmla:

1380 case Intrinsic::aarch64_sve_fmls:

1382 case Intrinsic::aarch64_sve_fmul:

1385 case Intrinsic::aarch64_sve_fmulx:

1387 case Intrinsic::aarch64_sve_fnmla:

1389 case Intrinsic::aarch64_sve_fnmls:

1391 case Intrinsic::aarch64_sve_fsub:

1394 case Intrinsic::aarch64_sve_add:

1397 case Intrinsic::aarch64_sve_mla:

1399 case Intrinsic::aarch64_sve_mls:

1401 case Intrinsic::aarch64_sve_mul:

1404 case Intrinsic::aarch64_sve_sabd:

1406 case Intrinsic::aarch64_sve_sdiv:

1409 case Intrinsic::aarch64_sve_smax:

1411 case Intrinsic::aarch64_sve_smin:

1413 case Intrinsic::aarch64_sve_smulh:

1415 case Intrinsic::aarch64_sve_sub:

1418 case Intrinsic::aarch64_sve_uabd:

1420 case Intrinsic::aarch64_sve_udiv:

1423 case Intrinsic::aarch64_sve_umax:

1425 case Intrinsic::aarch64_sve_umin:

1427 case Intrinsic::aarch64_sve_umulh:

1429 case Intrinsic::aarch64_sve_asr:

1432 case Intrinsic::aarch64_sve_lsl:

1435 case Intrinsic::aarch64_sve_lsr:

1438 case Intrinsic::aarch64_sve_and:

1441 case Intrinsic::aarch64_sve_bic:

1443 case Intrinsic::aarch64_sve_eor:

1446 case Intrinsic::aarch64_sve_orr:

1449 case Intrinsic::aarch64_sve_sqrshl:

1451 case Intrinsic::aarch64_sve_sqshl:

1453 case Intrinsic::aarch64_sve_sqsub:

1455 case Intrinsic::aarch64_sve_srshl:

1457 case Intrinsic::aarch64_sve_uqrshl:

1459 case Intrinsic::aarch64_sve_uqshl:

1461 case Intrinsic::aarch64_sve_uqsub:

1463 case Intrinsic::aarch64_sve_urshl:

1465

1466 case Intrinsic::aarch64_sve_add_u:

1468 Instruction::Add);

1469 case Intrinsic::aarch64_sve_and_u:

1471 Instruction::And);

1472 case Intrinsic::aarch64_sve_asr_u:

1474 Instruction::AShr);

1475 case Intrinsic::aarch64_sve_eor_u:

1477 Instruction::Xor);

1478 case Intrinsic::aarch64_sve_fadd_u:

1480 Instruction::FAdd);

1481 case Intrinsic::aarch64_sve_fdiv_u:

1483 Instruction::FDiv);

1484 case Intrinsic::aarch64_sve_fmul_u:

1486 Instruction::FMul);

1487 case Intrinsic::aarch64_sve_fsub_u:

1489 Instruction::FSub);

1490 case Intrinsic::aarch64_sve_lsl_u:

1492 Instruction::Shl);

1493 case Intrinsic::aarch64_sve_lsr_u:

1495 Instruction::LShr);

1496 case Intrinsic::aarch64_sve_mul_u:

1498 Instruction::Mul);

1499 case Intrinsic::aarch64_sve_orr_u:

1501 Instruction::Or);

1502 case Intrinsic::aarch64_sve_sdiv_u:

1504 Instruction::SDiv);

1505 case Intrinsic::aarch64_sve_sub_u:

1507 Instruction::Sub);

1508 case Intrinsic::aarch64_sve_udiv_u:

1510 Instruction::UDiv);

1511

1512 case Intrinsic::aarch64_sve_addqv:

1513 case Intrinsic::aarch64_sve_and_z:

1514 case Intrinsic::aarch64_sve_bic_z:

1515 case Intrinsic::aarch64_sve_brka_z:

1516 case Intrinsic::aarch64_sve_brkb_z:

1517 case Intrinsic::aarch64_sve_brkn_z:

1518 case Intrinsic::aarch64_sve_brkpa_z:

1519 case Intrinsic::aarch64_sve_brkpb_z:

1520 case Intrinsic::aarch64_sve_cntp:

1521 case Intrinsic::aarch64_sve_compact:

1522 case Intrinsic::aarch64_sve_eor_z:

1523 case Intrinsic::aarch64_sve_eorv:

1524 case Intrinsic::aarch64_sve_eorqv:

1525 case Intrinsic::aarch64_sve_nand_z:

1526 case Intrinsic::aarch64_sve_nor_z:

1527 case Intrinsic::aarch64_sve_orn_z:

1528 case Intrinsic::aarch64_sve_orr_z:

1529 case Intrinsic::aarch64_sve_orv:

1530 case Intrinsic::aarch64_sve_orqv:

1531 case Intrinsic::aarch64_sve_pnext:

1532 case Intrinsic::aarch64_sve_rdffr_z:

1533 case Intrinsic::aarch64_sve_saddv:

1534 case Intrinsic::aarch64_sve_uaddv:

1535 case Intrinsic::aarch64_sve_umaxv:

1536 case Intrinsic::aarch64_sve_umaxqv:

1537 case Intrinsic::aarch64_sve_cmpeq:

1538 case Intrinsic::aarch64_sve_cmpeq_wide:

1539 case Intrinsic::aarch64_sve_cmpge:

1540 case Intrinsic::aarch64_sve_cmpge_wide:

1541 case Intrinsic::aarch64_sve_cmpgt:

1542 case Intrinsic::aarch64_sve_cmpgt_wide:

1543 case Intrinsic::aarch64_sve_cmphi:

1544 case Intrinsic::aarch64_sve_cmphi_wide:

1545 case Intrinsic::aarch64_sve_cmphs:

1546 case Intrinsic::aarch64_sve_cmphs_wide:

1547 case Intrinsic::aarch64_sve_cmple_wide:

1548 case Intrinsic::aarch64_sve_cmplo_wide:

1549 case Intrinsic::aarch64_sve_cmpls_wide:

1550 case Intrinsic::aarch64_sve_cmplt_wide:

1551 case Intrinsic::aarch64_sve_cmpne:

1552 case Intrinsic::aarch64_sve_cmpne_wide:

1553 case Intrinsic::aarch64_sve_facge:

1554 case Intrinsic::aarch64_sve_facgt:

1555 case Intrinsic::aarch64_sve_fcmpeq:

1556 case Intrinsic::aarch64_sve_fcmpge:

1557 case Intrinsic::aarch64_sve_fcmpgt:

1558 case Intrinsic::aarch64_sve_fcmpne:

1559 case Intrinsic::aarch64_sve_fcmpuo:

1560 case Intrinsic::aarch64_sve_ld1:

1561 case Intrinsic::aarch64_sve_ld1_gather:

1562 case Intrinsic::aarch64_sve_ld1_gather_index:

1563 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:

1564 case Intrinsic::aarch64_sve_ld1_gather_sxtw:

1565 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:

1566 case Intrinsic::aarch64_sve_ld1_gather_uxtw:

1567 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:

1568 case Intrinsic::aarch64_sve_ld1q_gather_index:

1569 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:

1570 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:

1571 case Intrinsic::aarch64_sve_ld1ro:

1572 case Intrinsic::aarch64_sve_ld1rq:

1573 case Intrinsic::aarch64_sve_ld1udq:

1574 case Intrinsic::aarch64_sve_ld1uwq:

1575 case Intrinsic::aarch64_sve_ld2_sret:

1576 case Intrinsic::aarch64_sve_ld2q_sret:

1577 case Intrinsic::aarch64_sve_ld3_sret:

1578 case Intrinsic::aarch64_sve_ld3q_sret:

1579 case Intrinsic::aarch64_sve_ld4_sret:

1580 case Intrinsic::aarch64_sve_ld4q_sret:

1581 case Intrinsic::aarch64_sve_ldff1:

1582 case Intrinsic::aarch64_sve_ldff1_gather:

1583 case Intrinsic::aarch64_sve_ldff1_gather_index:

1584 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:

1585 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:

1586 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:

1587 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:

1588 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:

1589 case Intrinsic::aarch64_sve_ldnf1:

1590 case Intrinsic::aarch64_sve_ldnt1:

1591 case Intrinsic::aarch64_sve_ldnt1_gather:

1592 case Intrinsic::aarch64_sve_ldnt1_gather_index:

1593 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:

1594 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:

1596

1597 case Intrinsic::aarch64_sve_prf:

1598 case Intrinsic::aarch64_sve_prfb_gather_index:

1599 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:

1600 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:

1601 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:

1602 case Intrinsic::aarch64_sve_prfd_gather_index:

1603 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:

1604 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:

1605 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:

1606 case Intrinsic::aarch64_sve_prfh_gather_index:

1607 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:

1608 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:

1609 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:

1610 case Intrinsic::aarch64_sve_prfw_gather_index:

1611 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:

1612 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:

1613 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:

1615

1616 case Intrinsic::aarch64_sve_st1_scatter:

1617 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:

1618 case Intrinsic::aarch64_sve_st1_scatter_sxtw:

1619 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:

1620 case Intrinsic::aarch64_sve_st1_scatter_uxtw:

1621 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:

1622 case Intrinsic::aarch64_sve_st1dq:

1623 case Intrinsic::aarch64_sve_st1q_scatter_index:

1624 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:

1625 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:

1626 case Intrinsic::aarch64_sve_st1wq:

1627 case Intrinsic::aarch64_sve_stnt1:

1628 case Intrinsic::aarch64_sve_stnt1_scatter:

1629 case Intrinsic::aarch64_sve_stnt1_scatter_index:

1630 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:

1631 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:

1633 case Intrinsic::aarch64_sve_st2:

1634 case Intrinsic::aarch64_sve_st2q:

1636 case Intrinsic::aarch64_sve_st3:

1637 case Intrinsic::aarch64_sve_st3q:

1639 case Intrinsic::aarch64_sve_st4:

1640 case Intrinsic::aarch64_sve_st4q:

1642 }

1643

1645}

1646

1648 Value *UncastedPred;

1649

1650

1652 m_Value(UncastedPred)))) {

1654 Pred = UncastedPred;

1655

1657 m_Value(UncastedPred))))

1658

1659

1660 if (OrigPredTy->getMinNumElements() <=

1662 ->getMinNumElements())

1663 Pred = UncastedPred;

1664 }

1665

1667 return C && C->isAllOnesValue();

1668}

1669

1670

1671

1674 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&

1675 Dup->getOperand(1) == Pg && isa(Dup->getOperand(2)))

1679

1680 return V;

1681}

1682

1683static std::optional<Instruction *>

1688

1689 Value *Pg = II.getOperand(0);

1690 Value *Op1 = II.getOperand(1);

1691 Value *Op2 = II.getOperand(2);

1693

1694

1699 return &II;

1700 }

1701

1702

1705

1706 Value *SimpleII;

1708 SimpleII = simplifyBinOp(Opc, Op1, Op2, FII->getFastMathFlags(), DL);

1709 else

1711

1712

1713

1714

1715

1717 return std::nullopt;

1718

1721

1723

1724

1725 if (SimpleII == Inactive)

1727

1728

1731}

1732

1733

1734

1735static std::optional<Instruction *>

1739 return std::nullopt;

1740

1742

1743

1748

1752

1754 }

1755 }

1756

1757

1763 }

1764

1768 II.setCalledFunction(NewDecl);

1769 return &II;

1770 }

1771 }

1772

1773

1777

1778 return std::nullopt;

1779}

1780

1781

1782

1783

1784

1785

1786

1787

1788

1789

1790static std::optional<Instruction *>

1793 if (!BinOp)

1794 return std::nullopt;

1795

1796 auto IntrinsicID = BinOp->getIntrinsicID();

1797 switch (IntrinsicID) {

1798 case Intrinsic::aarch64_sve_and_z:

1799 case Intrinsic::aarch64_sve_bic_z:

1800 case Intrinsic::aarch64_sve_eor_z:

1801 case Intrinsic::aarch64_sve_nand_z:

1802 case Intrinsic::aarch64_sve_nor_z:

1803 case Intrinsic::aarch64_sve_orn_z:

1804 case Intrinsic::aarch64_sve_orr_z:

1805 break;

1806 default:

1807 return std::nullopt;

1808 }

1809

1810 auto BinOpPred = BinOp->getOperand(0);

1811 auto BinOpOp1 = BinOp->getOperand(1);

1812 auto BinOpOp2 = BinOp->getOperand(2);

1813

1815 if (!PredIntr ||

1816 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)

1817 return std::nullopt;

1818

1819 auto PredOp = PredIntr->getOperand(0);

1821 if (PredOpTy != II.getType())

1822 return std::nullopt;

1823

1826 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});

1827 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);

1828 if (BinOpOp1 == BinOpOp2)

1829 NarrowedBinOpArgs.push_back(NarrowBinOpOp1);

1830 else

1832 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));

1833

1834 auto NarrowedBinOp =

1837}

1838

1839static std::optional<Instruction *>

1841

1844

1846 return BinOpCombine;

1847

1848

1851 return std::nullopt;

1852

1854 Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;

1855

1857

1858

1859 while (Cursor) {

1860

1861

1862 const auto *CursorVTy = cast(Cursor->getType());

1863 if (CursorVTy->getElementCount().getKnownMinValue() <

1864 IVTy->getElementCount().getKnownMinValue())

1865 break;

1866

1867

1868 if (Cursor->getType() == IVTy)

1869 EarliestReplacement = Cursor;

1870

1872

1873

1874 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==

1875 Intrinsic::aarch64_sve_convert_to_svbool ||

1876 IntrinsicCursor->getIntrinsicID() ==

1877 Intrinsic::aarch64_sve_convert_from_svbool))

1878 break;

1879

1880 CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);

1881 Cursor = IntrinsicCursor->getOperand(0);

1882 }

1883

1884

1885

1886 if (!EarliestReplacement)

1887 return std::nullopt;

1888

1890}

1891

1894

1895 auto *OpPredicate = II.getOperand(0);

1898

1902}

1903

1906 Value *Pg = II.getOperand(1);

1907

1908

1912 II.getArgOperand(2));

1914 }

1915

1918 return std::nullopt;

1919

1920

1922 II.getArgOperand(0), II.getArgOperand(2), uint64_t(0));

1924}

1925

1928

1931 II.getArgOperand(0));

1934}

1935

1939

1941 return std::nullopt;

1942

1943

1944 auto *SplatValue =

1946 if (!SplatValue || !SplatValue->isZero())

1947 return std::nullopt;

1948

1949

1951 if (!DupQLane ||

1952 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)

1953 return std::nullopt;

1954

1955

1957 if (!DupQLaneIdx || !DupQLaneIdx->isZero())

1958 return std::nullopt;

1959

1961 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)

1962 return std::nullopt;

1963

1964

1965

1967 return std::nullopt;

1968

1970 return std::nullopt;

1971

1973 if (!ConstVec)

1974 return std::nullopt;

1975

1978 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())

1979 return std::nullopt;

1980

1981 unsigned NumElts = VecTy->getNumElements();

1982 unsigned PredicateBits = 0;

1983

1984

1985 for (unsigned I = 0; I < NumElts; ++I) {

1987 if (!Arg)

1988 return std::nullopt;

1989 if (!Arg->isZero())

1990 PredicateBits |= 1 << (I * (16 / NumElts));

1991 }

1992

1993

1994 if (PredicateBits == 0) {

1996 PFalse->takeName(&II);

1998 }

1999

2000

2001 unsigned Mask = 8;

2002 for (unsigned I = 0; I < 16; ++I)

2003 if ((PredicateBits & (1 << I)) != 0)

2004 Mask |= (I % 8);

2005

2006 unsigned PredSize = Mask & -Mask;

2009

2010

2011 for (unsigned I = 0; I < 16; I += PredSize)

2012 if ((PredicateBits & (1 << I)) == 0)

2013 return std::nullopt;

2014

2015 auto *PTruePat =

2016 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);

2018 {PredType}, {PTruePat});

2020 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});

2021 auto *ConvertFromSVBool =

2023 {II.getType()}, {ConvertToSVBool});

2024

2027}

2028

2031 Value *Pg = II.getArgOperand(0);

2032 Value *Vec = II.getArgOperand(1);

2033 auto IntrinsicID = II.getIntrinsicID();

2034 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;

2035

2036

2039

2040

2041

2046 auto OpC = OldBinOp->getOpcode();

2047 auto *NewLHS =

2049 auto *NewRHS =

2052 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), II.getIterator());

2054 }

2055 }

2056

2058 if (IsAfter && C && C->isNullValue()) {

2059

2062 Extract->insertBefore(II.getIterator());

2063 Extract->takeName(&II);

2065 }

2066

2068 if (!IntrPG)

2069 return std::nullopt;

2070

2071 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)

2072 return std::nullopt;

2073

2074 const auto PTruePattern =

2076

2077

2079 if (!MinNumElts)

2080 return std::nullopt;

2081

2082 unsigned Idx = MinNumElts - 1;

2083

2084

2085 if (IsAfter)

2086 ++Idx;

2087

2088

2089

2090

2092 if (Idx >= PgVTy->getMinNumElements())

2093 return std::nullopt;

2094

2095

2098 Extract->insertBefore(II.getIterator());

2099 Extract->takeName(&II);

2101}

2102

2105

2106

2107

2108

2109

2110

2111

2112 Value *Pg = II.getArgOperand(0);

2114 Value *Vec = II.getArgOperand(2);

2115 Type *Ty = II.getType();

2116

2117 if (!Ty->isIntegerTy())

2118 return std::nullopt;

2119

2122 default:

2123 return std::nullopt;

2124 case 16:

2126 break;

2127 case 32:

2129 break;

2130 case 64:

2132 break;

2133 }

2134

2140 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});

2143}

2144

2148

2149

2150 auto *AllPat =

2151 ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);

2153 {II.getType()}, {AllPat});

2154 auto *RDFFR =

2158}

2159

2160static std::optional<Instruction *>

2163

2164 if (Pattern == AArch64SVEPredPattern::all) {

2169 }

2170

2172

2173 return MinNumElts && NumElts >= MinNumElts

2175 II, ConstantInt::get(II.getType(), MinNumElts)))

2176 : std::nullopt;

2177}

2178

2179static std::optional<Instruction *>

2182 if (!ST->isStreaming())

2183 return std::nullopt;

2184

2185

2186

2191}

2192

2195 Value *PgVal = II.getArgOperand(0);

2196 Value *OpVal = II.getArgOperand(1);

2197

2198

2199

2200 if (PgVal == OpVal &&

2201 (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||

2202 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {

2203 Value *Ops[] = {PgVal, OpVal};

2205

2206 auto *PTest =

2209

2211 }

2212

2215

2216 if (!Pg || Op)

2217 return std::nullopt;

2218

2220

2221 if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&

2222 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&

2226

2228

2231 }

2232

2233

2234

2235

2236 if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&

2237 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||

2238 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||

2239 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||

2240 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||

2241 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||

2242 (OpIID == Intrinsic::aarch64_sve_and_z) ||

2243 (OpIID == Intrinsic::aarch64_sve_bic_z) ||

2244 (OpIID == Intrinsic::aarch64_sve_eor_z) ||

2245 (OpIID == Intrinsic::aarch64_sve_nand_z) ||

2246 (OpIID == Intrinsic::aarch64_sve_nor_z) ||

2247 (OpIID == Intrinsic::aarch64_sve_orn_z) ||

2248 (OpIID == Intrinsic::aarch64_sve_orr_z))) {

2251

2254

2256 }

2257

2258 return std::nullopt;

2259}

2260

2261template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>

2262static std::optional<Instruction *>

2264 bool MergeIntoAddendOp) {

2265 Value *P = II.getOperand(0);

2266 Value *MulOp0, *MulOp1, *AddendOp, *Mul;

2267 if (MergeIntoAddendOp) {

2268 AddendOp = II.getOperand(1);

2269 Mul = II.getOperand(2);

2270 } else {

2271 AddendOp = II.getOperand(2);

2272 Mul = II.getOperand(1);

2273 }

2274

2277 return std::nullopt;

2278

2279 if (Mul->hasOneUse())

2280 return std::nullopt;

2281

2283 if (II.getType()->isFPOrFPVectorTy()) {

2285

2286

2288 return std::nullopt;

2290 return std::nullopt;

2292 }

2293

2295 if (MergeIntoAddendOp)

2297 {P, AddendOp, MulOp0, MulOp1}, FMFSource);

2298 else

2300 {P, MulOp0, MulOp1, AddendOp}, FMFSource);

2301

2303}

2304

2305static std::optional<Instruction *>

2307 Value *Pred = II.getOperand(0);

2308 Value *PtrOp = II.getOperand(1);

2309 Type *VecTy = II.getType();

2310

2313 Load->copyMetadata(II);

2315 }

2316

2322}

2323

2324static std::optional<Instruction *>

2326 Value *VecOp = II.getOperand(0);

2327 Value *Pred = II.getOperand(1);

2328 Value *PtrOp = II.getOperand(2);

2329

2332 Store->copyMetadata(II);

2334 }

2335

2340}

2341

2344 case Intrinsic::aarch64_sve_fmul_u:

2345 return Instruction::BinaryOps::FMul;

2346 case Intrinsic::aarch64_sve_fadd_u:

2347 return Instruction::BinaryOps::FAdd;

2348 case Intrinsic::aarch64_sve_fsub_u:

2349 return Instruction::BinaryOps::FSub;

2350 default:

2351 return Instruction::BinaryOpsEnd;

2352 }

2353}

2354

2355static std::optional<Instruction *>

2357

2358 if (II.isStrictFP())

2359 return std::nullopt;

2360

2361 auto *OpPredicate = II.getOperand(0);

2363 if (BinOpCode == Instruction::BinaryOpsEnd ||

2365 return std::nullopt;

2367 BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());

2369}

2370

2374 Intrinsic::aarch64_sve_mla>(

2375 IC, II, true))

2376 return MLA;

2378 Intrinsic::aarch64_sve_mad>(

2379 IC, II, false))

2380 return MAD;

2381 return std::nullopt;

2382}

2383

2384static std::optional<Instruction *>

2386 if (auto FMLA =

2388 Intrinsic::aarch64_sve_fmla>(IC, II,

2389 true))

2390 return FMLA;

2391 if (auto FMAD =

2393 Intrinsic::aarch64_sve_fmad>(IC, II,

2394 false))

2395 return FMAD;

2396 if (auto FMLA =

2398 Intrinsic::aarch64_sve_fmla>(IC, II,

2399 true))

2400 return FMLA;

2401 return std::nullopt;

2402}

2403

2404static std::optional<Instruction *>

2406 if (auto FMLA =

2408 Intrinsic::aarch64_sve_fmla>(IC, II,

2409 true))

2410 return FMLA;

2411 if (auto FMAD =

2413 Intrinsic::aarch64_sve_fmad>(IC, II,

2414 false))

2415 return FMAD;

2416 if (auto FMLA_U =

2418 Intrinsic::aarch64_sve_fmla_u>(

2419 IC, II, true))

2420 return FMLA_U;

2422}

2423

2424static std::optional<Instruction *>

2426 if (auto FMLS =

2428 Intrinsic::aarch64_sve_fmls>(IC, II,

2429 true))

2430 return FMLS;

2431 if (auto FMSB =

2433 Intrinsic::aarch64_sve_fnmsb>(

2434 IC, II, false))

2435 return FMSB;

2436 if (auto FMLS =

2438 Intrinsic::aarch64_sve_fmls>(IC, II,

2439 true))

2440 return FMLS;

2441 return std::nullopt;

2442}

2443

2444static std::optional<Instruction *>

2446 if (auto FMLS =

2448 Intrinsic::aarch64_sve_fmls>(IC, II,

2449 true))

2450 return FMLS;

2451 if (auto FMSB =

2453 Intrinsic::aarch64_sve_fnmsb>(

2454 IC, II, false))

2455 return FMSB;

2456 if (auto FMLS_U =

2458 Intrinsic::aarch64_sve_fmls_u>(

2459 IC, II, true))

2460 return FMLS_U;

2462}

2463

2467 Intrinsic::aarch64_sve_mls>(

2468 IC, II, true))

2469 return MLS;

2470 return std::nullopt;

2471}

2472

2475 Value *UnpackArg = II.getArgOperand(0);

2477 bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||

2478 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;

2479

2480

2481

2482 if (auto *ScalarArg = getSplatValue(UnpackArg)) {

2483 ScalarArg =

2489 }

2490

2491 return std::nullopt;

2492}

2495 auto *OpVal = II.getOperand(0);

2496 auto *OpIndices = II.getOperand(1);

2498

2499

2500

2502 if (!SplatValue ||

2503 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))

2504 return std::nullopt;

2505

2506

2507

2509 auto *VectorSplat =

2511

2514}

2515

2519 Type *RetTy = II.getType();

2520 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;

2521 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;

2522

2523

2524

2525 if ((match(II.getArgOperand(0),

2527 match(II.getArgOperand(1),

2532 if (TyA == B->getType() &&

2537 TyA->getMinNumElements());

2540 }

2541 }

2542

2543 return std::nullopt;

2544}

2545

2548

2549

2551 if (match(II.getArgOperand(0),

2556 II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));

2557

2558 return std::nullopt;

2559}

2560

2561static std::optional<Instruction *>

2563 Value *Mask = II.getOperand(0);

2564 Value *BasePtr = II.getOperand(1);

2565 Value *Index = II.getOperand(2);

2566 Type *Ty = II.getType();

2568

2569

2570

2571

2572 Value *IndexBase;

2575 Align Alignment =

2576 BasePtr->getPointerAlignment(II.getDataLayout());

2577

2579 BasePtr, IndexBase);

2584 }

2585

2586 return std::nullopt;

2587}

2588

2589static std::optional<Instruction *>

2591 Value *Val = II.getOperand(0);

2592 Value *Mask = II.getOperand(1);

2593 Value *BasePtr = II.getOperand(2);

2594 Value *Index = II.getOperand(3);

2596

2597

2598

2599

2600 Value *IndexBase;

2603 Align Alignment =

2604 BasePtr->getPointerAlignment(II.getDataLayout());

2605

2607 BasePtr, IndexBase);

2609

2611 }

2612

2613 return std::nullopt;

2614}

2615

2619 Value *Pred = II.getOperand(0);

2620 Value *Vec = II.getOperand(1);

2621 Value *DivVec = II.getOperand(2);

2622

2625 if (!SplatConstantInt)

2626 return std::nullopt;

2627

2629 const int64_t DivisorValue = Divisor.getSExtValue();

2630 if (DivisorValue == -1)

2631 return std::nullopt;

2632 if (DivisorValue == 1)

2634

2638 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});

2640 }

2645 Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});

2647 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});

2649 }

2650

2651 return std::nullopt;

2652}

2653

2655 size_t VecSize = Vec.size();

2656 if (VecSize == 1)

2657 return true;

2659 return false;

2660 size_t HalfVecSize = VecSize / 2;

2661

2662 for (auto LHS = Vec.begin(), RHS = Vec.begin() + HalfVecSize;

2664 if (*LHS != nullptr && *RHS != nullptr) {

2666 continue;

2667 else

2668 return false;

2669 }

2670 if (!AllowPoison)

2671 return false;

2672 if (*LHS == nullptr && *RHS != nullptr)

2674 }

2675

2676 Vec.resize(HalfVecSize);

2678 return true;

2679}

2680

2681

2682

2685 Value *CurrentInsertElt = nullptr, *Default = nullptr;

2686 if (match(II.getOperand(0),

2690 return std::nullopt;

2692

2693

2697 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);

2698 CurrentInsertElt = InsertElt->getOperand(0);

2699 }

2700

2701 bool AllowPoison =

2704 return std::nullopt;

2705

2706

2708 for (size_t I = 0; I < Elts.size(); I++) {

2709 if (Elts[I] == nullptr)

2710 continue;

2713 }

2714 if (InsertEltChain == nullptr)

2715 return std::nullopt;

2716

2717

2718

2719

2720

2721 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.size();

2722 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *

2723 IIScalableTy->getMinNumElements() /

2724 PatternWidth;

2725

2728 auto *WideShuffleMaskTy =

2730

2734 auto WideBitcast =

2738 WideBitcast, PoisonValue::get(WideScalableTy), WideShuffleMask);

2739 auto NarrowBitcast =

2741

2743}

2744

2747 Value *A = II.getArgOperand(0);

2748 Value *B = II.getArgOperand(1);

2749 if (A == B)

2751

2752 return std::nullopt;

2753}

2754

2757 Value *Pred = II.getOperand(0);

2758 Value *Vec = II.getOperand(1);

2759 Value *Shift = II.getOperand(2);

2760

2761

2762 Value *AbsPred, *MergedValue;

2767

2768 return std::nullopt;

2769

2770

2771

2772

2773

2776 return std::nullopt;

2777

2778

2779

2781 return std::nullopt;

2782

2784 {II.getType()}, {Pred, Vec, Shift});

2785

2787}

2788

2791 Value *Vec = II.getOperand(0);

2792

2795

2796 return std::nullopt;

2797}

2798

2801

2802 auto *NI = II.getNextNode();

2805 return I->mayReadOrWriteMemory() && I->mayHaveSideEffects();

2806 };

2807 while (LookaheadThreshold-- && CanSkipOver(NI)) {

2808 auto *NIBB = NI->getParent();

2809 NI = NI->getNextNode();

2810 if (!NI) {

2811 if (auto *SuccBB = NIBB->getUniqueSuccessor())

2812 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();

2813 else

2814 break;

2815 }

2816 }

2818 if (NextII && II.isIdenticalTo(NextII))

2820

2821 return std::nullopt;

2822}

2823

2829 {II.getType(), II.getOperand(0)->getType()},

2830 {II.getOperand(0), II.getOperand(1)}));

2831}

2832

2837 return std::nullopt;

2838}

2839

2842 unsigned NumBits) {

2843 Value *Passthru = II.getOperand(0);

2844 Value *Pg = II.getOperand(1);

2846

2847

2851 auto *Mask = ConstantInt::get(Ty, MaskValue);

2853 {Pg, Op, Mask});

2855 }

2856

2857 return std::nullopt;

2858}

2859

2860static std::optional<Instruction *>

2862 SMEAttrs FnSMEAttrs(*II.getFunction());

2867 return std::nullopt;

2868}

2869

2870std::optional<Instruction *>

2875 return I;

2876

2878 switch (IID) {

2879 default:

2880 break;

2881 case Intrinsic::aarch64_dmb:

2883 case Intrinsic::aarch64_neon_fmaxnm:

2884 case Intrinsic::aarch64_neon_fminnm:

2886 case Intrinsic::aarch64_sve_convert_from_svbool:

2888 case Intrinsic::aarch64_sve_dup:

2890 case Intrinsic::aarch64_sve_dup_x:

2892 case Intrinsic::aarch64_sve_cmpne:

2893 case Intrinsic::aarch64_sve_cmpne_wide:

2895 case Intrinsic::aarch64_sve_rdffr:

2897 case Intrinsic::aarch64_sve_lasta:

2898 case Intrinsic::aarch64_sve_lastb:

2900 case Intrinsic::aarch64_sve_clasta_n:

2901 case Intrinsic::aarch64_sve_clastb_n:

2903 case Intrinsic::aarch64_sve_cntd:

2905 case Intrinsic::aarch64_sve_cntw:

2907 case Intrinsic::aarch64_sve_cnth:

2909 case Intrinsic::aarch64_sve_cntb:

2911 case Intrinsic::aarch64_sme_cntsd:

2913 case Intrinsic::aarch64_sve_ptest_any:

2914 case Intrinsic::aarch64_sve_ptest_first:

2915 case Intrinsic::aarch64_sve_ptest_last:

2917 case Intrinsic::aarch64_sve_fadd:

2919 case Intrinsic::aarch64_sve_fadd_u:

2921 case Intrinsic::aarch64_sve_fmul_u:

2923 case Intrinsic::aarch64_sve_fsub:

2925 case Intrinsic::aarch64_sve_fsub_u:

2927 case Intrinsic::aarch64_sve_add:

2929 case Intrinsic::aarch64_sve_add_u:

2931 Intrinsic::aarch64_sve_mla_u>(

2932 IC, II, true);

2933 case Intrinsic::aarch64_sve_sub:

2935 case Intrinsic::aarch64_sve_sub_u:

2937 Intrinsic::aarch64_sve_mls_u>(

2938 IC, II, true);

2939 case Intrinsic::aarch64_sve_tbl:

2941 case Intrinsic::aarch64_sve_uunpkhi:

2942 case Intrinsic::aarch64_sve_uunpklo:

2943 case Intrinsic::aarch64_sve_sunpkhi:

2944 case Intrinsic::aarch64_sve_sunpklo:

2946 case Intrinsic::aarch64_sve_uzp1:

2948 case Intrinsic::aarch64_sve_zip1:

2949 case Intrinsic::aarch64_sve_zip2:

2951 case Intrinsic::aarch64_sve_ld1_gather_index:

2953 case Intrinsic::aarch64_sve_st1_scatter_index:

2955 case Intrinsic::aarch64_sve_ld1:

2957 case Intrinsic::aarch64_sve_st1:

2959 case Intrinsic::aarch64_sve_sdiv:

2961 case Intrinsic::aarch64_sve_sel:

2963 case Intrinsic::aarch64_sve_srshl:

2965 case Intrinsic::aarch64_sve_dupq_lane:

2967 case Intrinsic::aarch64_sve_insr:

2969 case Intrinsic::aarch64_sve_whilelo:

2971 case Intrinsic::aarch64_sve_ptrue:

2973 case Intrinsic::aarch64_sve_uxtb:

2975 case Intrinsic::aarch64_sve_uxth:

2977 case Intrinsic::aarch64_sve_uxtw:

2979 case Intrinsic::aarch64_sme_in_streaming_mode:

2981 }

2982

2983 return std::nullopt;

2984}

2985

2990 SimplifyAndSetOp) const {

2991 switch (II.getIntrinsicID()) {

2992 default:

2993 break;

2994 case Intrinsic::aarch64_neon_fcvtxn:

2995 case Intrinsic::aarch64_neon_rshrn:

2996 case Intrinsic::aarch64_neon_sqrshrn:

2997 case Intrinsic::aarch64_neon_sqrshrun:

2998 case Intrinsic::aarch64_neon_sqshrn:

2999 case Intrinsic::aarch64_neon_sqshrun:

3000 case Intrinsic::aarch64_neon_sqxtn:

3001 case Intrinsic::aarch64_neon_sqxtun:

3002 case Intrinsic::aarch64_neon_uqrshrn:

3003 case Intrinsic::aarch64_neon_uqshrn:

3004 case Intrinsic::aarch64_neon_uqxtn:

3005 SimplifyAndSetOp(&II, 0, OrigDemandedElts, UndefElts);

3006 break;

3007 }

3008

3009 return std::nullopt;

3010}

3011

3013 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&

3015}

3016

3019 switch (K) {

3023 if (ST->useSVEForFixedLengthVectors() &&

3026 std::max(ST->getMinSVEVectorSizeInBits(), 128u));

3027 else if (ST->isNeonAvailable())

3029 else

3032 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&

3035 else

3037 }

3039}

3040

3041bool AArch64TTIImpl::isSingleExtWideningInstruction(

3043 Type *SrcOverrideTy) const {

3044

3045

3049 };

3050

3051

3052

3053

3054

3055

3057 if (useNeonVector(DstTy) || Args.size() != 2 ||

3058 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))

3059 return false;

3060

3061 Type *SrcTy = SrcOverrideTy;

3062 switch (Opcode) {

3063 case Instruction::Add:

3064 case Instruction::Sub: {

3065

3067 if (!SrcTy)

3068 SrcTy =

3070 break;

3071 }

3072

3073 if (Opcode == Instruction::Sub)

3074 return false;

3075

3076

3078 if (!SrcTy)

3079 SrcTy =

3081 break;

3082 }

3083 return false;

3084 }

3085 default:

3086 return false;

3087 }

3088

3089

3090

3092 if (!DstTyL.second.isVector() || DstEltSize != DstTy->getScalarSizeInBits())

3093 return false;

3094

3095

3096

3097 assert(SrcTy && "Expected some SrcTy");

3099 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();

3100 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())

3101 return false;

3102

3103

3105 DstTyL.first * DstTyL.second.getVectorMinNumElements();

3107 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();

3108

3109

3110

3111 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;

3112}

3113

3114Type *AArch64TTIImpl::isBinExtWideningInstruction(unsigned Opcode, Type *DstTy,

3116 Type *SrcOverrideTy) const {

3117 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&

3118 Opcode != Instruction::Mul)

3119 return nullptr;

3120

3121

3122

3123

3124

3125

3128 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))

3129 return nullptr;

3130

3131 auto getScalarSizeWithOverride = [&](const Value *V) {

3132 if (SrcOverrideTy)

3135 ->getOperand(0)

3136 ->getType()

3137 ->getScalarSizeInBits();

3138 };

3139

3140 unsigned MaxEltSize = 0;

3143 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);

3144 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);

3145 MaxEltSize = std::max(EltSize0, EltSize1);

3148 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);

3149 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);

3150

3151

3152 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)

3153 return nullptr;

3154 MaxEltSize = DstEltSize / 2;

3155 } else if (Opcode == Instruction::Mul &&

3157

3158

3159

3160 KnownBits Known =

3165 return nullptr;

3166

3167 MaxEltSize =

3168 getScalarSizeWithOverride(isa(Args[0]) ? Args[0] : Args[1]);

3169 } else

3170 return nullptr;

3171

3172 if (MaxEltSize * 2 > DstEltSize)

3173 return nullptr;

3174

3177 return nullptr;

3178 return ExtTy;

3179}

3180

3181

3182

3183

3184

3185

3186

3188 Type *Src) const {

3189

3190 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(DL, Src)) ||

3191 (Src->isScalableTy() && !ST->hasSVE2()))

3192 return false;

3193

3194 if (ExtUser->getOpcode() != Instruction::Add || !ExtUser->hasOneUse())

3195 return false;

3196

3197

3199 auto *AddUser =

3201 if (AddUser && AddUser->getOpcode() == Instruction::Add)

3202 Add = AddUser;

3203

3205 if (!Shr || Shr->getOpcode() != Instruction::LShr)

3206 return false;

3207

3209 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||

3210 Src->getScalarSizeInBits() !=

3211 cast(Trunc)->getDestTy()->getScalarSizeInBits())

3212 return false;

3213

3214

3215

3219 return false;

3220

3221

3224 return true;

3225

3226 return false;

3227}

3228

3234 int ISD = TLI->InstructionOpcodeToISD(Opcode);

3235 assert(ISD && "Invalid opcode");

3236

3237

3238 if (I && I->hasOneUser()) {

3241 if (Type *ExtTy = isBinExtWideningInstruction(

3242 SingleUser->getOpcode(), Dst, Operands,

3243 Src != I->getOperand(0)->getType() ? Src : nullptr)) {

3244

3245

3247 Type *DoubleSrcTy =

3251 }

3252

3253 return 0;

3254 }

3255

3256 if (isSingleExtWideningInstruction(

3257 SingleUser->getOpcode(), Dst, Operands,

3258 Src != I->getOperand(0)->getType() ? Src : nullptr)) {

3259

3260

3261

3262 if (SingleUser->getOpcode() == Instruction::Add) {

3263 if (I == SingleUser->getOperand(1) ||

3265 cast(SingleUser->getOperand(1))->getOpcode() == Opcode))

3266 return 0;

3267 } else {

3268

3269

3270 return 0;

3271 }

3272 }

3273

3274

3277 return 0;

3278 }

3279

3280

3283 return Cost == 0 ? 0 : 1;

3284 return Cost;

3285 };

3286

3287 EVT SrcTy = TLI->getValueType(DL, Src);

3288 EVT DstTy = TLI->getValueType(DL, Dst);

3289

3290 if (!SrcTy.isSimple() || !DstTy.isSimple())

3291 return AdjustCost(

3293

3294

3295

3296 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&

3300

3302 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1},

3303 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1},

3304 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1},

3305 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2},

3306 {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2},

3307 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3},

3308 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6},

3309 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1},

3310 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1},

3311 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3},

3312 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2},

3313 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5},

3314 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11},

3315 };

3316

3317 if (ST->hasBF16())

3319 BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))

3320 return AdjustCost(Entry->Cost);

3321

3322

3323

3324

3325

3326

3327

3328 const unsigned int SVE_EXT_COST = 1;

3329 const unsigned int SVE_FCVT_COST = 1;

3330 const unsigned int SVE_UNPACK_ONCE = 4;

3331 const unsigned int SVE_UNPACK_TWICE = 16;

3332

3334 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1},

3335 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1},

3336 {ISD::TRUNCATE, MVT::v2i32, MVT::v2i64, 1},

3337 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1},

3338 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 3},

3339 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1},

3340 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2},

3341 {ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1},

3342 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1},

3343 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2},

3344 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 4},

3345 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1},

3346 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 3},

3347 {ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 2},

3348 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 1},

3349 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 3},

3350 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 7},

3351 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2},

3352 {ISD::TRUNCATE, MVT::v16i16, MVT::v16i64, 6},

3353 {ISD::TRUNCATE, MVT::v16i32, MVT::v16i64, 4},

3354

3355

3386 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i16, 1},

3387 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i32, 3},

3388 {ISD::TRUNCATE, MVT::nxv16i8, MVT::nxv16i64, 7},

3389

3390

3407

3408

3409 {ISD::FP_EXTEND, MVT::f64, MVT::f32, 1},

3410 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f32, 1},

3411 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 2},

3412

3413 {ISD::FP_EXTEND, MVT::f32, MVT::f16, 1},

3414 {ISD::FP_EXTEND, MVT::f64, MVT::f16, 1},

3415 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},

3416 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 2},

3417 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2},

3418 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3},

3419 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6},

3420

3421 {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1},

3422 {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2},

3423 {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1},

3424 {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2},

3425 {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2},

3426 {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3},

3427 {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6},

3428

3430 {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1},

3431 {ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 2},

3432

3435 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, 1},

3436 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, 2},

3437 {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2},

3438 {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3},

3439 {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6},

3440

3441 {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8},

3442 {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9},

3449

3450

3457

3458

3460 SVE_EXT_COST + SVE_FCVT_COST},

3461 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},

3462 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},

3463 {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},

3465 SVE_EXT_COST + SVE_FCVT_COST},

3466 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},

3467 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},

3468 {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},

3469

3470

3472 SVE_EXT_COST + SVE_FCVT_COST},

3473 {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},

3474 {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},

3476 SVE_EXT_COST + SVE_FCVT_COST},

3477 {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},

3478 {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},

3479

3480

3482 SVE_EXT_COST + SVE_FCVT_COST},

3483 {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},

3485 SVE_EXT_COST + SVE_FCVT_COST},

3486 {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},

3487

3488

3490 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3492 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3493

3494

3499

3500

3502 SVE_EXT_COST + SVE_FCVT_COST},

3503 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},

3504 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},

3505 {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},

3507 SVE_EXT_COST + SVE_FCVT_COST},

3508 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},

3509 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},

3510 {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},

3511

3512

3517

3518

3520 SVE_EXT_COST + SVE_FCVT_COST},

3521 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},

3522 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},

3524 SVE_EXT_COST + SVE_FCVT_COST},

3525 {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},

3526 {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},

3527

3528

3533

3534

3536 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3538 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3540 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3542 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3543

3544

3546 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},

3548 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},

3549

3550

3553

3554

3561

3562

3564 SVE_EXT_COST + SVE_FCVT_COST},

3565 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},

3566 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},

3567 {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},

3569 SVE_EXT_COST + SVE_FCVT_COST},

3570 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},

3571 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},

3572 {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},

3573

3574

3577

3578

3580 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3582 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3584 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3586 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3588 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3590 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},

3591

3592

3594 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},

3596 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},

3598 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},

3600 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},

3601

3602

3609

3610

3617

3618

3623

3624

3633

3634

3641

3642

3653

3654

3665

3666

3671

3672

3679

3680

3685

3686

3697

3698

3707

3708

3717

3718

3722

3723

3724 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},

3725 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},

3726 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},

3727

3728

3732

3733

3734 {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},

3735 {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},

3736 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},

3737

3738

3742

3743

3744 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2f16, 1},

3745 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},

3746 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},

3747

3748

3749 {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1},

3750 {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1},

3751 {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 4},

3752

3753

3754 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},

3755 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},

3756 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},

3757

3758

3759 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2},

3760 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6},

3761 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14},

3762

3763

3764 {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},

3765 {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},

3766 {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},

3767

3768

3769 {ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0},

3770 {ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0},

3771 {ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0},

3772

3773

3774 {ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0},

3775 {ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0},

3776 {ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0},

3777

3778

3779

3780

3787

3794 };

3795

3796

3797

3798

3799 EVT WiderTy = SrcTy.bitsGT(DstTy) ? SrcTy : DstTy;

3802 ST->useSVEForFixedLengthVectors(WiderTy)) {

3803 std::pair<InstructionCost, MVT> LT =

3805 unsigned NumElements =

3807 return AdjustCost(

3808 LT.first *

3813 }

3814

3816 ConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))

3817 return AdjustCost(Entry->Cost);

3818

3824 {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f16, 2},

3826 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f16, 2},

3830 {ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f16, 4},

3832 {ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f16, 3},

3834 {ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f16, 2},

3836 {ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f16, 8},

3838 {ISD::UINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},

3839 {ISD::SINT_TO_FP, MVT::v8f16, MVT::v8i8, 2},

3840 {ISD::UINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},

3841 {ISD::SINT_TO_FP, MVT::v16f16, MVT::v16i8, 4},

3842 };

3843

3844 if (ST->hasFullFP16())

3846 FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))

3847 return AdjustCost(Entry->Cost);

3848

3849

3850

3854 return AdjustCost(

3856 getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(),

3862

3865 ST->isSVEorStreamingSVEAvailable() &&

3866 TLI->getTypeAction(Src->getContext(), SrcTy) ==

3868 TLI->getTypeAction(Dst->getContext(), DstTy) ==

3870

3871

3872

3873

3877 Opcode, LegalTy, Src, CCH, CostKind, I);

3880 return Part1 + Part2;

3881 }

3882

3883

3884

3887 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))

3889

3890 return AdjustCost(

3892}

3893

3898

3899

3900 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&

3901 "Invalid opcode");

3902

3903

3904

3906

3907

3909

3910

3911

3913 CostKind, Index, nullptr, nullptr);

3914

3915

3917 auto DstVT = TLI->getValueType(DL, Dst);

3918 auto SrcVT = TLI->getValueType(DL, Src);

3919

3920

3921

3922

3923 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))

3926

3927

3928

3929 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())

3932

3933 switch (Opcode) {

3934 default:

3936

3937

3938

3939 case Instruction::SExt:

3940 return Cost;

3941

3942

3943

3944 case Instruction::ZExt:

3945 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)

3946 return Cost;

3947 }

3948

3949

3952}

3953

3958 return Opcode == Instruction::PHI ? 0 : 1;

3960

3961 return 0;

3962}

3963

3964InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(

3967 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {

3969

3970 if (Index != -1U) {

3971

3973

3974

3975 if (!LT.second.isVector())

3976 return 0;

3977

3978

3979

3980 if (LT.second.isFixedLengthVector()) {

3981 unsigned Width = LT.second.getVectorNumElements();

3982 Index = Index % Width;

3983 }

3984

3985

3986

3987

3988

3990 return 0;

3991

3992

3993

3994

3995

3998 ? 0

4000

4001

4002

4005 ? 2

4007

4008

4009

4010

4011

4012

4013 }

4014

4015

4016

4017

4018

4019

4020

4021

4022

4023

4024

4025

4026

4027

4028

4029

4030 auto ExtractCanFuseWithFmul = [&]() {

4031

4032 if (Index == 0)

4033 return false;

4034

4035

4036

4037 auto IsAllowedScalarTy = [&](const Type *T) {

4038 return T->isFloatTy() || T->isDoubleTy() ||

4039 (T->isHalfTy() && ST->hasFullFP16());

4040 };

4041

4042

4043 auto IsUserFMulScalarTy = [](const Value *EEUser) {

4044

4046 return BO && BO->getOpcode() == BinaryOperator::FMul &&

4047 !BO->getType()->isVectorTy();

4048 };

4049

4050

4051

4052 auto IsExtractLaneEquivalentToZero = [&](unsigned Idx, unsigned EltSz) {

4053 auto RegWidth =

4056 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);

4057 };

4058

4059

4060

4062 return false;

4063

4064 if (Scalar) {

4065 DenseMap<User *, unsigned> UserToExtractIdx;

4066 for (auto *U : Scalar->users()) {

4067 if (!IsUserFMulScalarTy(U))

4068 return false;

4069

4070

4071 UserToExtractIdx[U];

4072 }

4073 if (UserToExtractIdx.empty())

4074 return false;

4075 for (auto &[S, U, L] : ScalarUserAndIdx) {

4076 for (auto *U : S->users()) {

4077 if (UserToExtractIdx.contains(U)) {

4079 auto *Op0 = FMul->getOperand(0);

4080 auto *Op1 = FMul->getOperand(1);

4081 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {

4082 UserToExtractIdx[U] = L;

4083 break;

4084 }

4085 }

4086 }

4087 }

4088 for (auto &[U, L] : UserToExtractIdx) {

4089 if (!IsExtractLaneEquivalentToZero(Index, Val->getScalarSizeInBits()) &&

4091 return false;

4092 }

4093 } else {

4095

4097 if (!IdxOp)

4098 return false;

4099

4100 return !EE->users().empty() && all_of(EE->users(), [&](const User *U) {

4101 if (!IsUserFMulScalarTy(U))

4102 return false;

4103

4104

4105

4106 const auto *BO = cast(U);

4107 const auto *OtherEE = dyn_cast(

4108 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));

4109 if (OtherEE) {

4110 const auto *IdxOp = dyn_cast(OtherEE->getIndexOperand());

4111 if (!IdxOp)

4112 return false;

4113 return IsExtractLaneEquivalentToZero(

4114 cast(OtherEE->getIndexOperand())

4115 ->getValue()

4116 .getZExtValue(),

4117 OtherEE->getType()->getScalarSizeInBits());

4118 }

4119 return true;

4120 });

4121 }

4122 return true;

4123 };

4124

4125 if (Opcode == Instruction::ExtractElement && (I || Scalar) &&

4126 ExtractCanFuseWithFmul())

4127 return 0;

4128

4129

4131 : ST->getVectorInsertExtractBaseCost();

4132}

4133

4136 unsigned Index,

4137 const Value *Op0,

4138 const Value *Op1) const {

4139

4140

4141

4142 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&

4144 return 0;

4145 return getVectorInstrCostHelper(Opcode, Val, CostKind, Index);

4146}

4147

4151 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {

4152 return getVectorInstrCostHelper(Opcode, Val, CostKind, Index, nullptr, Scalar,

4153 ScalarUserAndIdx);

4154}

4155

4159 unsigned Index) const {

4160 return getVectorInstrCostHelper(I.getOpcode(), Val, CostKind, Index, &I);

4161}

4162

4166 unsigned Index) const {

4169 Index);

4170

4171

4172

4173

4174

4175

4177 ? 2

4178 : ST->getVectorInsertExtractBaseCost() + 1;

4179}

4180

4182 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,

4187 if (Ty->getElementType()->isFloatingPointTy())

4190 unsigned VecInstCost =

4192 return DemandedElts.popcount() * (Insert + Extract) * VecInstCost;

4193}

4194

4199 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())

4200 return std::nullopt;

4201 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())

4202 return std::nullopt;

4203 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&

4204 ST->isNonStreamingSVEorSME2Available())

4205 return std::nullopt;

4206

4212 Cost += InstCost(PromotedTy);

4213 if (IncludeTrunc)

4216 return Cost;

4217}

4218

4223

4224

4225

4226

4227

4231

4232

4235 Op2Info, Args, CxtI);

4236

4237

4239 int ISD = TLI->InstructionOpcodeToISD(Opcode);

4240

4241

4242

4246 Ty, CostKind, Op1Info, Op2Info, true,

4247

4249 [&](Type *PromotedTy) {

4251 Op1Info, Op2Info);

4252 }))

4253 return *PromotedCost;

4254

4255

4256

4257

4258

4259 if (Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {

4260 if (ExtTy != Ty)

4264 return LT.first;

4265 }

4266

4267 switch (ISD) {

4268 default:

4270 Op2Info);

4273

4274

4275

4276

4277

4278

4279

4280

4281

4282

4283

4284

4285

4286

4287

4288

4289

4290

4291

4292

4293

4294

4295

4296

4297

4298

4299

4300

4301

4302

4303

4304

4305

4306

4307

4308

4309

4310

4311

4312

4313

4324

4325

4326 auto VT = TLI->getValueType(DL, Ty);

4327 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {

4329

4330 return ISD == ISD::SDIV ? (3 * AddCost + AsrCost)

4331 : (3 * AsrCost + AddCost);

4332 } else {

4333 return MulCost + AsrCost + 2 * AddCost;

4334 }

4335 } else if (VT.isVector()) {

4338

4339

4340

4341

4342

4345 if (Ty->isScalableTy() && ST->hasSVE())

4346 Cost += 2 * AsrCost;

4347 else {

4349 UsraCost +

4351 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost

4352 : 2 * AddCost);

4353 }

4354 return Cost;

4355 } else if (LT.second == MVT::v2i64) {

4356 return VT.getVectorNumElements() *

4360 } else {

4361

4362

4363 if (Ty->isScalableTy() && ST->hasSVE())

4364 return MulCost + 2 * AddCost + 2 * AsrCost;

4365 return 2 * MulCost + AddCost + AsrCost + UsraCost;

4366 }

4367 }

4368 }

4370 LT.second.isFixedLengthVector()) {

4371

4372

4373

4374

4375 auto ExtractCost = 2 * getVectorInstrCost(Instruction::ExtractElement, Ty,

4376 CostKind, -1, nullptr, nullptr);

4377 auto InsertCost = getVectorInstrCost(Instruction::InsertElement, Ty,

4378 CostKind, -1, nullptr, nullptr);

4380 return ExtractCost + InsertCost +

4384 }

4385 [[fallthrough]];

4388 auto VT = TLI->getValueType(DL, Ty);

4390

4399

4401

4402

4403

4404 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||

4405 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||

4406 LT.second == MVT::nxv16i8;

4407 bool Is128bit = LT.second.is128BitVector();

4408

4418 InstructionCost DivCost = MulCost * (Is128bit ? 2 : 1) +

4419 (HasMULH ? 0 : ShrCost) +

4420 AddCost * 2 + ShrCost;

4421 return DivCost + (ISD == ISD::UREM ? MulCost + AddCost : 0);

4422 }

4423 }

4424

4425

4426

4427

4428 if (!VT.isVector() && VT.getSizeInBits() > 64)

4430

4432 Opcode, Ty, CostKind, Op1Info, Op2Info);

4434 if (TLI->isOperationLegalOrCustom(ISD, LT.second) && ST->hasSVE()) {

4435

4436

4438 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {

4446

4448 if (nullptr != Entry)

4449 return Entry->Cost;

4450 }

4451

4452

4453 if (LT.second.getScalarType() == MVT::i8)

4455 else if (LT.second.getScalarType() == MVT::i16)

4457 return Cost;

4458 } else {

4459

4460

4461

4462

4467 Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info);

4468 return (4 + DivCost) * VTy->getNumElements();

4469 }

4470 }

4471

4472

4474 -1, nullptr, nullptr);

4476 nullptr, nullptr);

4477 }

4478

4479

4480

4482 }

4483 return Cost;

4484 }

4486

4487

4488 if (LT.second == MVT::v2i64 && ST->hasSVE())

4489 return LT.first;

4490

4491

4492

4493

4494

4495

4496

4497

4498

4499

4500

4501 if (LT.second != MVT::v2i64)

4502 return LT.first;

4503 return cast(Ty)->getElementCount().getKnownMinValue() *

4506 nullptr, nullptr) *

4507 2 +

4509 nullptr, nullptr));

4517

4518

4519 return LT.first;

4520

4521 case ISD::FNEG:

4522

4523 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||

4524 (Ty->isHalfTy() && ST->hasFullFP16())) &&

4525 CxtI &&

4529 return 0;

4530 [[fallthrough]];

4533 if (!Ty->getScalarType()->isFP128Ty())

4534 return LT.first;

4535 [[fallthrough]];

4538

4539

4540 if (!Ty->getScalarType()->isFP128Ty())

4541 return 2 * LT.first;

4542

4544 Op2Info);

4546

4547

4548 if (!Ty->isVectorTy())

4551 Op2Info);

4552 }

4553}

4554

4557 const SCEV *Ptr,

4559

4560

4561

4562

4564 int MaxMergeDistance = 64;

4565

4568 return NumVectorInstToHideOverhead;

4569

4570

4571

4572 return 1;

4573}

4574

4575

4576

4578 unsigned Opcode1, unsigned Opcode2) const {

4581 if (Sched.hasInstrSchedModel())

4582 return false;

4583

4585 Sched.getSchedClassDesc(TII->get(Opcode1).getSchedClass());

4587 Sched.getSchedClassDesc(TII->get(Opcode2).getSchedClass());

4588

4589

4590

4591

4593 "Cannot handle variant scheduling classes without an MI");

4595 return false;

4596

4599}

4600

4605

4606

4608

4609 const int AmortizationCost = 20;

4610

4611

4612

4617 VecPred = CurrentPred;

4618 }

4619

4620

4625 static const auto ValidMinMaxTys = {

4626 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,

4627 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};

4628 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};

4629

4631 if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }) ||

4632 (ST->hasFullFP16() &&

4633 any_of(ValidFP16MinMaxTys, [&LT](MVT M) { return M == LT.second; })))

4634 return LT.first;

4635 }

4636

4638 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},

4639 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},

4640 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},

4641 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},

4642 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},

4643 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},

4644 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},

4645 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},

4646 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},

4647 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},

4648 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};

4649

4650 EVT SelCondTy = TLI->getValueType(DL, CondTy);

4651 EVT SelValTy = TLI->getValueType(DL, ValTy);

4656 return Entry->Cost;

4657 }

4658 }

4659

4660 if (Opcode == Instruction::FCmp) {

4662 ValTy, CostKind, Op1Info, Op2Info, false,

4663

4664 false, [&](Type *PromotedTy) {

4667 CostKind, Op1Info, Op2Info);

4670 Instruction::Trunc,

4674 return Cost;

4675 }))

4676 return *PromotedCost;

4677

4679

4680 if (LT.second.getScalarType() != MVT::f64 &&

4681 LT.second.getScalarType() != MVT::f32 &&

4682 LT.second.getScalarType() != MVT::f16)

4683 return LT.first * getCallInstrCost( nullptr, ValTy,

4685

4686

4687 unsigned Factor = 1;

4690 Factor = 2;

4694 Factor = 3;

4697 Factor = 3;

4698

4702 AArch64::FCMEQv4f32))

4703 Factor *= 2;

4704

4706 }

4707

4708

4709

4710

4711

4714 TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&

4717 return 0;

4718

4719

4722 return 0;

4723

4724

4727 return 0;

4728 }

4729

4730

4731

4733 Op1Info, Op2Info, I);

4734}

4735

4739 if (ST->requiresStrictAlign()) {

4740

4741

4743 }

4744 Options.AllowOverlappingLoads = true;

4745 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);

4747

4748

4749

4750 Options.LoadSizes = {8, 4, 2, 1};

4751 Options.AllowedTailExpansions = {3, 5, 6};

4753}

4754

4756 return ST->hasSVE();

4757}

4758

4762 switch (MICA.getID()) {

4763 case Intrinsic::masked_scatter:

4764 case Intrinsic::masked_gather:

4766 case Intrinsic::masked_load:

4767 case Intrinsic::masked_store:

4769 }

4771}

4772

4777

4781 if (!LT.first.isValid())

4783

4784

4786 if (VT->getElementType()->isIntegerTy(1))

4788

4789

4790

4791

4792

4795

4796 return LT.first;

4797}

4798

4799

4800

4803 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&

4804 "Should be called on only load or stores.");

4805 switch (Opcode) {

4806 case Instruction::Load:

4809 return ST->getGatherOverhead();

4810 break;

4811 case Instruction::Store:

4814 return ST->getScatterOverhead();

4815 break;

4816 default:

4818 }

4819}

4820

4824

4825 unsigned Opcode = (MICA.getID() == Intrinsic::masked_gather ||

4826 MICA.getID() == Intrinsic::vp_gather)

4827 ? Instruction::Load

4828 : Instruction::Store;

4829

4833

4838 if (!LT.first.isValid())

4840

4841

4842 if (!LT.second.isVector() ||

4844 VT->getElementType()->isIntegerTy(1))

4846

4847

4848

4849

4850

4853

4854 ElementCount LegalVF = LT.second.getVectorElementCount();

4857 {TTI::OK_AnyValue, TTI::OP_None}, I);

4858

4861}

4862

4866

4868 Align Alignment,

4873 EVT VT = TLI->getValueType(DL, Ty, true);

4874

4875 if (VT == MVT::Other)

4878

4880 if (!LT.first.isValid())

4882

4883

4884

4885

4886

4887

4890 (VTy->getElementType()->isIntegerTy(1) &&

4891 !VTy->getElementCount().isKnownMultipleOf(

4894

4895

4897 return LT.first;

4898

4900 return 1;

4901

4902 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&

4903 LT.second.is128BitVector() && Alignment < Align(16)) {

4904

4905

4906

4907

4908

4909 const int AmortizationCost = 6;

4910

4911 return LT.first * 2 * AmortizationCost;

4912 }

4913

4914

4915 if (Ty->isPtrOrPtrVectorTy())

4916 return LT.first;

4917

4919

4920 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {

4921

4922 if (VT == MVT::v4i8)

4923 return 2;

4924

4926 }

4929 if (isPowerOf2\_32(EltSize) || EltSize < 8 || EltSize > 64 ||

4931 return LT.first;

4932

4933

4935 return LT.first;

4936

4937

4938

4939

4944 while (!TypeWorklist.empty()) {

4949 continue;

4950 }

4951

4952 unsigned PrevPow2 = NextPowerOf2(CurrNumElements) / 2;

4956 }

4957 return Cost;

4958 }

4959

4960 return LT.first;

4961}

4962

4966 bool UseMaskForCond, bool UseMaskForGaps) const {

4967 assert(Factor >= 2 && "Invalid interleave factor");

4969

4972

4973

4974

4975

4976

4979

4980

4981

4982 if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))

4984

4985 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {

4986 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();

4987 auto *SubVecTy =

4989 VecVTy->getElementCount().divideCoefficientBy(Factor));

4990

4991

4992

4993

4994 bool UseScalable;

4995 if (MinElts % Factor == 0 &&

4996 TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))

4997 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);

4998 }

4999

5002 UseMaskForCond, UseMaskForGaps);

5003}

5004

5009 for (auto *I : Tys) {

5010 if (I->isVectorTy())

5011 continue;

5013 128)

5016 }

5017 return Cost;

5018}

5019

5021 return ST->getMaxInterleaveFactor();

5022}

5023

5024

5025

5026

5027

5028static void

5031 enum { MaxStridedLoads = 7 };

5033 int StridedLoads = 0;

5034

5035

5036 for (const auto BB : L->blocks()) {

5037 for (auto &I : *BB) {

5039 if (!LMemI)

5040 continue;

5041

5043 if (L->isLoopInvariant(PtrValue))

5044 continue;

5045

5046 const SCEV *LSCEV = SE.getSCEV(PtrValue);

5048 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())

5049 continue;

5050

5051

5052

5053

5054 ++StridedLoads;

5055

5056

5057 if (StridedLoads > MaxStridedLoads / 2)

5058 return StridedLoads;

5059 }

5060 }

5061 return StridedLoads;

5062 };

5063

5064 int StridedLoads = countStridedLoads(L, SE);

5065 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads

5066 << " strided loads\n");

5067

5068

5069 if (StridedLoads) {

5070 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);

5071 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "

5073 }

5074}

5075

5076

5077

5078

5079

5082 unsigned *FinalSize) {

5083

5085

5086 for (auto *BB : L->getBlocks()) {

5087 for (auto &I : *BB) {

5091

5092

5093 if (!Cost.isValid())

5094 return false;

5095

5096 LoopCost += Cost;

5097 if (LoopCost > Budget)

5098 return false;

5099 }

5100 }

5101

5102 if (FinalSize)

5103 *FinalSize = LoopCost.getValue();

5104 return true;

5105}

5106

5109

5110

5111

5114 return false;

5115

5116

5117

5119 if (MaxTC > 0 && MaxTC <= 32)

5120 return false;

5121

5122

5124 return false;

5125

5126

5127

5128

5130 if (Blocks.size() != 2)

5131 return false;

5132

5135 }))

5136 return false;

5137

5138 return true;

5139}

5140

5141

5142

5143static void

5147

5148

5149

5150

5151

5152 if (!L->isInnermost() || L->getNumBlocks() > 8)

5153 return;

5154

5155

5156 if (!L->getExitBlock())

5157 return;

5158

5159

5160

5161

5162 bool HasParellelizableReductions =

5163 L->getNumBlocks() == 1 &&

5164 any_of(L->getHeader()->phis(),

5165 [&SE, L](PHINode &Phi) {

5166 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);

5167 }) &&

5169 if (HasParellelizableReductions &&

5174 }

5175

5180 return;

5181

5183 return;

5184

5186 return;

5187

5188

5190

5191 if (HasParellelizableReductions) {

5195 }

5196

5197

5198

5199

5200 BasicBlock *Header = L->getHeader();

5201 BasicBlock *Latch = L->getLoopLatch();

5202 if (Header == Latch) {

5203

5204 unsigned Size;

5205 unsigned Width = 10;

5207 return;

5208

5209

5210

5211 unsigned MaxInstsPerLine = 16;

5212 unsigned UC = 1;

5213 unsigned BestUC = 1;

5214 unsigned SizeWithBestUC = BestUC * Size;

5215 while (UC <= 8) {

5216 unsigned SizeWithUC = UC * Size;

5217 if (SizeWithUC > 48)

5218 break;

5219 if ((SizeWithUC % MaxInstsPerLine) == 0 ||

5220 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {

5221 BestUC = UC;

5222 SizeWithBestUC = BestUC * Size;

5223 }

5224 UC++;

5225 }

5226

5227 if (BestUC == 1)

5228 return;

5229

5232 for (auto *BB : L->blocks()) {

5233 for (auto &I : *BB) {

5235 if (!Ptr)

5236 continue;

5239 continue;

5241 LoadedValuesPlus.insert(&I);

5242

5243 for (auto *U : I.users())

5245 LoadedValuesPlus.insert(U);

5246 } else

5248 }

5249 }

5250

5252 return LoadedValuesPlus.contains(SI->getOperand(0));

5253 }))

5254 return;

5255

5258 return;

5259 }

5260

5261

5262

5265 if (!Term || !Term->isConditional() || Preds.size() == 1 ||

5267 none_of(Preds, [L](BasicBlock *Pred) { return L->contains(Pred); }))

5268 return;

5269

5270 std::function<bool(Instruction *, unsigned)> DependsOnLoopLoad =

5273 return false;

5274

5276 return true;

5277

5278 return any_of(I->operands(), [&](Value *V) {

5279 auto *I = dyn_cast(V);

5280 return I && DependsOnLoopLoad(I, Depth + 1);

5281 });

5282 };

5287 DependsOnLoopLoad(I, 0)) {

5289 }

5290}

5291

5295

5297

5299

5300

5301

5302

5303 if (L->getLoopDepth() > 1)

5305

5306

5308

5309

5310

5311

5314 for (auto *BB : L->getBlocks()) {

5315 for (auto &I : *BB) {

5316

5317

5318

5319 if (IsVectorized && I.getType()->isVectorTy())

5320 return;

5325 continue;

5326 return;

5327 }

5328

5332 }

5333 }

5334

5335

5336 if (ST->isAppleMLike())

5338 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&

5341

5342

5343

5347

5349

5350

5352 return;

5353 }

5354

5355

5356

5357

5358

5360 !ST->getSchedModel().isOutOfOrder()) {

5365

5368 }

5369

5370

5371

5373 UP.Force = true;

5374}

5375

5380

5382 Type *ExpectedType,

5383 bool CanCreate) const {

5385 default:

5386 return nullptr;

5387 case Intrinsic::aarch64_neon_st2:

5388 case Intrinsic::aarch64_neon_st3:

5389 case Intrinsic::aarch64_neon_st4: {

5390

5392 if (!CanCreate || !ST)

5393 return nullptr;

5394 unsigned NumElts = Inst->arg_size() - 1;

5395 if (ST->getNumElements() != NumElts)

5396 return nullptr;

5397 for (unsigned i = 0, e = NumElts; i != e; ++i) {

5399 return nullptr;

5400 }

5403 for (unsigned i = 0, e = NumElts; i != e; ++i) {

5405 Res = Builder.CreateInsertValue(Res, L, i);

5406 }

5407 return Res;

5408 }

5409 case Intrinsic::aarch64_neon_ld2:

5410 case Intrinsic::aarch64_neon_ld3:

5411 case Intrinsic::aarch64_neon_ld4:

5412 if (Inst->getType() == ExpectedType)

5413 return Inst;

5414 return nullptr;

5415 }

5416}

5417

5421 default:

5422 break;

5423 case Intrinsic::aarch64_neon_ld2:

5424 case Intrinsic::aarch64_neon_ld3:

5425 case Intrinsic::aarch64_neon_ld4:

5426 Info.ReadMem = true;

5427 Info.WriteMem = false;

5429 break;

5430 case Intrinsic::aarch64_neon_st2:

5431 case Intrinsic::aarch64_neon_st3:

5432 case Intrinsic::aarch64_neon_st4:

5433 Info.ReadMem = false;

5434 Info.WriteMem = true;

5436 break;

5437 }

5438

5440 default:

5441 return false;

5442 case Intrinsic::aarch64_neon_ld2:

5443 case Intrinsic::aarch64_neon_st2:

5444 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;

5445 break;

5446 case Intrinsic::aarch64_neon_ld3:

5447 case Intrinsic::aarch64_neon_st3:

5448 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;

5449 break;

5450 case Intrinsic::aarch64_neon_ld4:

5451 case Intrinsic::aarch64_neon_st4:

5452 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;

5453 break;

5454 }

5455 return true;

5456}

5457

5458

5459

5460

5461

5462

5464 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {

5465 bool Considerable = false;

5466 AllowPromotionWithoutCommonHeader = false;

5468 return false;

5469 Type *ConsideredSExtType =

5471 if (I.getType() != ConsideredSExtType)

5472 return false;

5473

5474

5475 for (const User *U : I.users()) {

5477 Considerable = true;

5478

5479

5480

5481 if (GEPInst->getNumOperands() > 2) {

5482 AllowPromotionWithoutCommonHeader = true;

5483 break;

5484 }

5485 }

5486 }

5487 return Considerable;

5488}

5489

5493 return true;

5494

5497 return false;

5498

5515 return true;

5516 default:

5517 return false;

5518 }

5519}

5520

5525

5526

5527

5528

5532

5534

5535 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())

5537

5539 if (LT.first > 1) {

5543 }

5544

5545 return LegalizationCost + 2;

5546}

5547

5552 if (LT.first > 1) {

5555 LegalizationCost *= LT.first - 1;

5556 }

5557

5558 int ISD = TLI->InstructionOpcodeToISD(Opcode);

5559 assert(ISD && "Invalid opcode");

5560

5561 switch (ISD) {

5567 return LegalizationCost + 2;

5568 default:

5570 }

5571}

5572

5575 std::optional FMF,

5577

5578

5579

5580

5584

5589

5590

5591 return BaseCost + FixedVTy->getNumElements();

5592 }

5593

5594 if (Opcode != Instruction::FAdd)

5596

5601 return Cost;

5602 }

5603

5606

5608 MVT MTy = LT.second;

5609 int ISD = TLI->InstructionOpcodeToISD(Opcode);

5610 assert(ISD && "Invalid opcode");

5611

5612

5613

5614

5615

5616

5617

5618

5619

5620 static const CostTblEntry CostTblNoPairwise[]{

5628 {ISD::OR, MVT::v8i8, 5},

5629 {ISD::OR, MVT::v16i8, 7},

5630 {ISD::OR, MVT::v4i16, 4},

5631 {ISD::OR, MVT::v8i16, 6},

5632 {ISD::OR, MVT::v2i32, 3},

5633 {ISD::OR, MVT::v4i32, 5},

5634 {ISD::OR, MVT::v2i64, 3},

5635 {ISD::XOR, MVT::v8i8, 5},

5642 {ISD::AND, MVT::v8i8, 5},

5649 };

5650 switch (ISD) {

5651 default:

5652 break;

5655

5656

5657 MTy.isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||

5658 (EltTy->isHalfTy() && ST->hasFullFP16()))) {

5660 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&

5662

5663

5664

5665

5666

5667

5668

5669

5670 return (LT.first - 1) + Log2_32(NElts);

5671 }

5672 break;

5674 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))

5675 return (LT.first - 1) + Entry->Cost;

5676 break;

5681 if (!Entry)

5682 break;

5687 if (LT.first != 1) {

5688

5689

5693 ExtraCost *= LT.first - 1;

5694 }

5695

5696 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;

5697 return Cost + ExtraCost;

5698 }

5699 break;

5700 }

5702}

5703

5705 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *VecTy,

5707 EVT VecVT = TLI->getValueType(DL, VecTy);

5708 EVT ResVT = TLI->getValueType(DL, ResTy);

5709

5710 if (Opcode == Instruction::Add && VecVT.isSimple() && ResVT.isSimple() &&

5713

5714

5715

5716

5718 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&

5719 RevVTSize <= 32) ||

5720 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&

5721 RevVTSize <= 32) ||

5722 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&

5723 RevVTSize <= 64))

5724 return (LT.first - 1) * 2 + 2;

5725 }

5726

5729}

5730

5735 EVT VecVT = TLI->getValueType(DL, VecTy);

5736 EVT ResVT = TLI->getValueType(DL, ResTy);

5737

5738 if (ST->hasDotProd() && VecVT.isSimple() && ResVT.isSimple() &&

5739 RedOpcode == Instruction::Add) {

5741

5742

5743

5744

5745 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&

5746 ResVT == MVT::i32)

5747 return LT.first + 2;

5748 }

5749

5752}

5753

5771 };

5772

5773

5774

5775

5776

5779

5782 EVT PromotedVT = LT.second.getScalarType() == MVT::i1

5783 ? TLI->getPromotedVTForPredicate(EVT(LT.second))

5784 : LT.second;

5787 if (Index < 0) {

5788 LegalizationCost =

5793 }

5794

5795

5796

5797 if (LT.second.getScalarType() == MVT::i1) {

5798 LegalizationCost +=

5803 }

5804 const auto *Entry =

5806 assert(Entry && "Illegal Type for Splice");

5807 LegalizationCost += Entry->Cost;

5808 return LegalizationCost * LT.first;

5809}

5810

5812 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,

5817

5820

5821 if (VF.isFixed() && !ST->isSVEorStreamingSVEAvailable() &&

5822 (!ST->isNeonAvailable() || !ST->hasDotProd()))

5824

5825 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub) ||

5828

5830 (!BinOp || (OpBExtend != TTI::PR_None && InputTypeB)) &&

5831 "Unexpected values for OpBExtend or InputTypeB");

5832

5833

5834

5835 if (BinOp && (*BinOp != Instruction::Mul || InputTypeA != InputTypeB))

5837

5838 bool IsUSDot = OpBExtend != TTI::PR_None && OpAExtend != OpBExtend;

5839 if (IsUSDot && !ST->hasMatMulInt8())

5841

5842 unsigned Ratio =

5846

5850

5851 auto TC = TLI->getTypeConversion(AccumVectorType->getContext(),

5853 switch (TC.first) {

5854 default:

5859

5860 if (TLI->getTypeAction(AccumVectorType->getContext(), TC.second) !=

5863 break;

5864 }

5865

5866 std::pair<InstructionCost, MVT> AccumLT =

5868 std::pair<InstructionCost, MVT> InputLT =

5870

5872

5873

5876

5877

5878

5880

5881 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {

5882

5883 if (AccumLT.second.getScalarType() == MVT::i64 &&

5884 InputLT.second.getScalarType() == MVT::i16)

5885 return Cost;

5886

5887 if (AccumLT.second.getScalarType() == MVT::i64 &&

5888 InputLT.second.getScalarType() == MVT::i8)

5889

5890

5891

5892

5893

5894 return Cost;

5895 }

5896

5897

5898 if (ST->isSVEorStreamingSVEAvailable() ||

5899 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&

5900 ST->hasDotProd())) {

5901 if (AccumLT.second.getScalarType() == MVT::i32 &&

5902 InputLT.second.getScalarType() == MVT::i8)

5903 return Cost;

5904 }

5905

5906

5907 return Cost + 2;

5908}

5909

5918 "Expected the Mask to match the return size if given");

5920 "Expected the same scalar types");

5922

5923

5924

5926 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&

5927 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&

5928 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {

5929

5930

5931

5932

5933 if (Args.size() >= 1 && isa(Args[0]) &&

5936 return std::max(1, LT.first / 4);

5937

5938

5939

5940

5941

5944 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||

5946 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))

5947 return LT.first;

5948

5949 unsigned TpNumElts = Mask.size();

5950 unsigned LTNumElts = LT.second.getVectorNumElements();

5951 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;

5953 LT.second.getVectorElementCount());

5955 std::map<std::tuple<unsigned, unsigned, SmallVector>, InstructionCost>

5956 PreviousCosts;

5957 for (unsigned N = 0; N < NumVecs; N++) {

5959

5960

5961 unsigned Source1 = -1U, Source2 = -1U;

5962 unsigned NumSources = 0;

5963 for (unsigned E = 0; E < LTNumElts; E++) {

5964 int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]

5966 if (MaskElt < 0) {

5968 continue;

5969 }

5970

5971

5972

5973 unsigned Source = MaskElt / LTNumElts;

5974 if (NumSources == 0) {

5975 Source1 = Source;

5976 NumSources = 1;

5977 } else if (NumSources == 1 && Source != Source1) {

5978 Source2 = Source;

5979 NumSources = 2;

5980 } else if (NumSources >= 2 && Source != Source1 && Source != Source2) {

5981 NumSources++;

5982 }

5983

5984

5985

5986 if (Source == Source1)

5987 NMask.push_back(MaskElt % LTNumElts);

5988 else if (Source == Source2)

5989 NMask.push_back(MaskElt % LTNumElts + LTNumElts);

5990 else

5991 NMask.push_back(MaskElt % LTNumElts);

5992 }

5993

5994

5995

5996

5997 auto Result =

5998 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});

5999

6000 if (!Result.second)

6001 continue;

6002

6003

6004

6006 NumSources <= 2

6009 NTp, NTp, NMask, CostKind, 0, nullptr, Args,

6010 CxtI)

6011 : LTNumElts;

6012 Result.first->second = NCost;

6013 Cost += NCost;

6014 }

6015 return Cost;

6016 }

6017

6020

6021

6022

6023

6024

6025

6026 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {

6027 if (LT.second.getFixedSizeInBits() >= 128 &&

6029 LT.second.getVectorNumElements() / 2) {

6030 if (Index == 0)

6031 return 0;

6032 if (Index == (int)LT.second.getVectorNumElements() / 2)

6033 return 1;

6034 }

6036 }

6037

6038

6041 SrcTy = DstTy;

6042 }

6043

6044

6045

6046 if (!Mask.empty() && LT.second.isFixedLengthVector() &&

6049 return M.value() < 0 || M.value() == (int)M.index();

6050 }))

6051 return 0;

6052

6053

6055 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&

6056 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(

6058

6060 unsigned Segments =

6062 unsigned SegmentElts = VTy->getNumElements() / Segments;

6063

6064

6065 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&

6066 ST->isSVEorStreamingSVEAvailable() &&

6067 isDUPQMask(Mask, Segments, SegmentElts))

6068 return LT.first;

6069

6070

6071 if (ST->isSVEorStreamingSVEAvailable() &&

6073 return LT.first;

6074 }

6075

6076

6077

6078

6079

6080

6081

6082

6084 bool IsLoad = !Args.empty() && isa(Args[0]);

6085 if (IsLoad && LT.second.isVector() &&

6087 LT.second.getVectorElementCount()))

6088 return 0;

6089 }

6090

6091

6092

6093 if (Mask.size() == 4 &&

6095 (SrcTy->getScalarSizeInBits() == 16 ||

6096 SrcTy->getScalarSizeInBits() == 32) &&

6097 all_of(Mask, [](int E) { return E < 8; }))

6099

6100

6101

6102 unsigned Unused;

6103 if (LT.second.isFixedLengthVector() &&

6104 LT.second.getVectorNumElements() == Mask.size() &&

6106 (isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||

6107 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||

6108 isREVMask(Mask, LT.second.getScalarSizeInBits(),

6109 LT.second.getVectorNumElements(), 16) ||

6110 isREVMask(Mask, LT.second.getScalarSizeInBits(),

6111 LT.second.getVectorNumElements(), 32) ||

6112 isREVMask(Mask, LT.second.getScalarSizeInBits(),

6113 LT.second.getVectorNumElements(), 64) ||

6114

6116 [&Mask](int M) { return M < 0 || M == Mask[0]; })))

6117 return 1;

6118

6123

6138

6139

6154

6155

6157 {TTI::SK_Select, MVT::v4i32, 2},

6160 {TTI::SK_Select, MVT::v4f32, 2},

6162

6177

6192

6207

6225

6243 };

6244 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))

6245 return LT.first * Entry->Cost;

6246 }

6247

6250

6251

6252

6254 LT.second.getSizeInBits() <= 128 && SubTp) {

6256 if (SubLT.second.isVector()) {

6257 int NumElts = LT.second.getVectorNumElements();

6258 int NumSubElts = SubLT.second.getVectorNumElements();

6259 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)

6260 return SubLT.first;

6261 }

6262 }

6263

6264

6265 if (IsExtractSubvector)

6268 Args, CxtI);

6269}

6270

6276

6277

6282 if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,

6283 true, false)

6284 .value_or(0) < 0)

6285 return true;

6286 }

6287 }

6288 }

6289 return false;

6290}

6291

6295

6296

6297

6298

6299 if (IsEpilogue)

6300 return true;

6301 return ST->useFixedOverScalableIfEqualCost();

6302}

6303

6305 return ST->getEpilogueVectorizationMinVF();

6306}

6307

6309 if (!ST->hasSVE())

6310 return false;

6311

6312

6313

6314

6316 return false;

6317

6323

6324

6325

6326

6333

6336 return false;

6337

6338

6339

6340 unsigned NumInsns = 0;

6342 NumInsns += BB->sizeWithoutDebug();

6343 }

6344

6345

6347}

6348

6351 StackOffset BaseOffset, bool HasBaseReg,

6352 int64_t Scale, unsigned AddrSpace) const {

6353

6354

6355

6356

6357

6358

6359

6364 AM.Scale = Scale;

6367

6368

6369 return AM.Scale != 0 && AM.Scale != 1;

6371}

6372

6376

6377

6378

6379

6380 if (I->getOpcode() == Instruction::Or &&

6383 return true;

6384

6385 if (I->getOpcode() == Instruction::Add ||

6386 I->getOpcode() == Instruction::Sub)

6387 return true;

6388 }

6390}

6391

6395

6396

6397

6398

6399

6405

6407}

6408

6411 return all_equal(Shuf->getShuffleMask());

6412 return false;

6413}

6414

6415

6416

6418 bool AllowSplat = false) {

6419

6421 return false;

6422

6423 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {

6424 auto *FullTy = FullV->getType();

6425 auto *HalfTy = HalfV->getType();

6427 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();

6428 };

6429

6430 auto extractHalf = [](Value *FullV, Value *HalfV) {

6433 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();

6434 };

6435

6437 Value *S1Op1 = nullptr, *S2Op1 = nullptr;

6440 return false;

6441

6442

6443

6445 S1Op1 = nullptr;

6447 S2Op1 = nullptr;

6448

6449

6450

6451 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||

6452 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))

6453 return false;

6454

6455

6456

6457 int M1Start = 0;

6458 int M2Start = 0;

6460 if ((S1Op1 &&

6462 (S2Op1 &&

6464 return false;

6465

6466 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||

6467 (M2Start != 0 && M2Start != (NumElements / 2)))

6468 return false;

6469 if (S1Op1 && S2Op1 && M1Start != M2Start)

6470 return false;

6471

6472 return true;

6473}

6474

6475

6476

6478 auto areExtDoubled = [](Instruction *Ext) {

6479 return Ext->getType()->getScalarSizeInBits() ==

6480 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();

6481 };

6482

6487 return false;

6488

6489 return true;

6490}

6491

6492

6494 Value *VectorOperand = nullptr;

6498 ElementIndex->getValue() == 1 &&

6501}

6502

6503

6507

6509

6511 if (GEP || GEP->getNumOperands() != 2)

6512 return false;

6513

6515 Value *Offsets = GEP->getOperand(1);

6516

6517

6518 if (Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())

6519 return false;

6520

6521

6524 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&

6525 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)

6526 Ops.push_back(&GEP->getOperandUse(1));

6527 }

6528

6529

6530 return true;

6531}

6532

6533

6534

6535

6538 return true;

6542 return true;

6543 }

6549 return true;

6550 }

6551 return false;

6552}

6553

6554

6555

6556

6560 switch (II->getIntrinsicID()) {

6561 case Intrinsic::aarch64_neon_smull:

6562 case Intrinsic::aarch64_neon_umull:

6564 true)) {

6565 Ops.push_back(&II->getOperandUse(0));

6566 Ops.push_back(&II->getOperandUse(1));

6567 return true;

6568 }

6569 [[fallthrough]];

6570

6571 case Intrinsic::fma:

6572 case Intrinsic::fmuladd:

6575 !ST->hasFullFP16())

6576 return false;

6577 [[fallthrough]];

6578 case Intrinsic::aarch64_neon_sqdmull:

6579 case Intrinsic::aarch64_neon_sqdmulh:

6580 case Intrinsic::aarch64_neon_sqrdmulh:

6581

6583 Ops.push_back(&II->getOperandUse(0));

6585 Ops.push_back(&II->getOperandUse(1));

6586 return Ops.empty();

6587 case Intrinsic::aarch64_neon_fmlal:

6588 case Intrinsic::aarch64_neon_fmlal2:

6589 case Intrinsic::aarch64_neon_fmlsl:

6590 case Intrinsic::aarch64_neon_fmlsl2:

6591

6593 Ops.push_back(&II->getOperandUse(1));

6595 Ops.push_back(&II->getOperandUse(2));

6596 return Ops.empty();

6597 case Intrinsic::aarch64_sve_ptest_first:

6598 case Intrinsic::aarch64_sve_ptest_last:

6600 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)

6601 Ops.push_back(&II->getOperandUse(0));

6602 return Ops.empty();

6603 case Intrinsic::aarch64_sme_write_horiz:

6604 case Intrinsic::aarch64_sme_write_vert:

6605 case Intrinsic::aarch64_sme_writeq_horiz:

6606 case Intrinsic::aarch64_sme_writeq_vert: {

6608 if (!Idx || Idx->getOpcode() != Instruction::Add)

6609 return false;

6610 Ops.push_back(&II->getOperandUse(1));

6611 return true;

6612 }

6613 case Intrinsic::aarch64_sme_read_horiz:

6614 case Intrinsic::aarch64_sme_read_vert:

6615 case Intrinsic::aarch64_sme_readq_horiz:

6616 case Intrinsic::aarch64_sme_readq_vert:

6617 case Intrinsic::aarch64_sme_ld1b_vert:

6618 case Intrinsic::aarch64_sme_ld1h_vert:

6619 case Intrinsic::aarch64_sme_ld1w_vert:

6620 case Intrinsic::aarch64_sme_ld1d_vert:

6621 case Intrinsic::aarch64_sme_ld1q_vert:

6622 case Intrinsic::aarch64_sme_st1b_vert:

6623 case Intrinsic::aarch64_sme_st1h_vert:

6624 case Intrinsic::aarch64_sme_st1w_vert:

6625 case Intrinsic::aarch64_sme_st1d_vert:

6626 case Intrinsic::aarch64_sme_st1q_vert:

6627 case Intrinsic::aarch64_sme_ld1b_horiz:

6628 case Intrinsic::aarch64_sme_ld1h_horiz:

6629 case Intrinsic::aarch64_sme_ld1w_horiz:

6630 case Intrinsic::aarch64_sme_ld1d_horiz:

6631 case Intrinsic::aarch64_sme_ld1q_horiz:

6632 case Intrinsic::aarch64_sme_st1b_horiz:

6633 case Intrinsic::aarch64_sme_st1h_horiz:

6634 case Intrinsic::aarch64_sme_st1w_horiz:

6635 case Intrinsic::aarch64_sme_st1d_horiz:

6636 case Intrinsic::aarch64_sme_st1q_horiz: {

6638 if (!Idx || Idx->getOpcode() != Instruction::Add)

6639 return false;

6640 Ops.push_back(&II->getOperandUse(3));

6641 return true;

6642 }

6643 case Intrinsic::aarch64_neon_pmull:

6645 return false;

6646 Ops.push_back(&II->getOperandUse(0));

6647 Ops.push_back(&II->getOperandUse(1));

6648 return true;

6649 case Intrinsic::aarch64_neon_pmull64:

6651 II->getArgOperand(1)))

6652 return false;

6653 Ops.push_back(&II->getArgOperandUse(0));

6654 Ops.push_back(&II->getArgOperandUse(1));

6655 return true;

6656 case Intrinsic::masked_gather:

6658 return false;

6659 Ops.push_back(&II->getArgOperandUse(0));

6660 return true;

6661 case Intrinsic::masked_scatter:

6663 return false;

6664 Ops.push_back(&II->getArgOperandUse(1));

6665 return true;

6666 default:

6667 return false;

6668 }

6669 }

6670

6671 auto ShouldSinkCondition = [](Value *Cond,

6674 return false;

6676 if (II->getIntrinsicID() != Intrinsic::vector_reduce_or ||

6678 return false;

6680 Ops.push_back(&II->getOperandUse(0));

6681 return true;

6682 };

6683

6684 switch (I->getOpcode()) {

6685 case Instruction::GetElementPtr:

6686 case Instruction::Add:

6687 case Instruction::Sub:

6688

6689 for (unsigned Op = 0; Op < I->getNumOperands(); ++Op) {

6691 Ops.push_back(&I->getOperandUse(Op));

6692 return true;

6693 }

6694 }

6695 break;

6696 case Instruction::Select: {

6697 if (!ShouldSinkCondition(I->getOperand(0), Ops))

6698 return false;

6699

6700 Ops.push_back(&I->getOperandUse(0));

6701 return true;

6702 }

6703 case Instruction::Br: {

6705 return false;

6706

6708 return false;

6709

6710 Ops.push_back(&I->getOperandUse(0));

6711 return true;

6712 }

6713 default:

6714 break;

6715 }

6716

6717 if (I->getType()->isVectorTy())

6718 return false;

6719

6720 switch (I->getOpcode()) {

6721 case Instruction::Sub:

6722 case Instruction::Add: {

6724 return false;

6725

6726

6727

6731 Ops.push_back(&Ext1->getOperandUse(0));

6732 Ops.push_back(&Ext2->getOperandUse(0));

6733 }

6734

6735 Ops.push_back(&I->getOperandUse(0));

6736 Ops.push_back(&I->getOperandUse(1));

6737

6738 return true;

6739 }

6740 case Instruction::Or: {

6741

6742

6743 if (ST->hasNEON()) {

6745 Value *MaskValue;

6746

6750 if (match(OtherAnd,

6752 Instruction *MainAnd = I->getOperand(0) == OtherAnd

6755

6756

6757 if (I->getParent() != MainAnd->getParent() ||

6758 I->getParent() != OtherAnd->getParent())

6759 return false;

6760

6761

6762 if (I->getParent() != IA->getParent() ||

6763 I->getParent() != IB->getParent())

6764 return false;

6765

6766 Ops.push_back(

6768 Ops.push_back(&I->getOperandUse(0));

6769 Ops.push_back(&I->getOperandUse(1));

6770

6771 return true;

6772 }

6773 }

6774 }

6775

6776 return false;

6777 }

6778 case Instruction::Mul: {

6779 auto ShouldSinkSplatForIndexedVariant = [](Value *V) {

6781

6782 if (Ty->isScalableTy())

6783 return false;

6784

6785

6786 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;

6787 };

6788

6789 int NumZExts = 0, NumSExts = 0;

6790 for (auto &Op : I->operands()) {

6791

6792 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))

6793 continue;

6794

6797 auto *ExtOp = Ext->getOperand(0);

6798 if (isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))

6799 Ops.push_back(&Ext->getOperandUse(0));

6800 Ops.push_back(&Op);

6801

6803 NumSExts++;

6804 } else {

6805 NumZExts++;

6806

6807 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <

6808 I->getType()->getScalarSizeInBits())

6809 NumSExts++;

6810 }

6811

6812 continue;

6813 }

6814

6816 if (!Shuffle)

6817 continue;

6818

6819

6820

6821

6825 Ops.push_back(&Op);

6827 NumSExts++;

6828 else

6829 NumZExts++;

6830 continue;

6831 }

6832

6835 if (!Insert)

6836 continue;

6837

6839 if (!OperandInstr)

6840 continue;

6841

6844

6845 if (!ElementConstant || !ElementConstant->isZero())

6846 continue;

6847

6848 unsigned Opcode = OperandInstr->getOpcode();

6849 if (Opcode == Instruction::SExt)

6850 NumSExts++;

6851 else if (Opcode == Instruction::ZExt)

6852 NumZExts++;

6853 else {

6854

6855

6856 unsigned Bitwidth = I->getType()->getScalarSizeInBits();

6859 continue;

6860 NumZExts++;

6861 }

6862

6863

6864

6866 Ops.push_back(&Insert->getOperandUse(1));

6868 Ops.push_back(&Op);

6869 }

6870

6871

6872 if (Ops.empty() && (NumSExts == 2 || NumZExts == 2))

6873 return true;

6874

6875

6876 if (!ShouldSinkSplatForIndexedVariant(I))

6877 return false;

6878

6879 Ops.clear();

6881 Ops.push_back(&I->getOperandUse(0));

6883 Ops.push_back(&I->getOperandUse(1));

6884

6885 return Ops.empty();

6886 }

6887 case Instruction::FMul: {

6888

6889 if (I->getType()->isScalableTy())

6890 return false;

6891

6892 if (cast(I->getType())->getElementType()->isHalfTy() &&

6893 !ST->hasFullFP16())

6894 return false;

6895

6896

6898 Ops.push_back(&I->getOperandUse(0));

6900 Ops.push_back(&I->getOperandUse(1));

6901 return Ops.empty();

6902 }

6903 default:

6904 return false;

6905 }

6906 return false;

6907}

static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

const TargetInstrInfo & TII

static std::optional< Instruction * > instCombinePTrue(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2833

TailFoldingOption TailFoldingOptionLoc

Definition AArch64TargetTransformInfo.cpp:191

static std::optional< Instruction * > instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2385

static std::optional< Instruction * > instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II, bool MergeIntoAddendOp)

Definition AArch64TargetTransformInfo.cpp:2263

static void getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP)

Definition AArch64TargetTransformInfo.cpp:5029

bool SimplifyValuePattern(SmallVector< Value * > &Vec, bool AllowPoison)

Definition AArch64TargetTransformInfo.cpp:2654

static std::optional< Instruction * > instCombineSVESel(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1892

static bool hasPossibleIncompatibleOps(const Function *F, const AArch64TargetLowering &TLI)

Returns true if the function has explicit operations that can only be lowered using incompatible inst...

Definition AArch64TargetTransformInfo.cpp:238

static bool shouldSinkVScale(Value *Op, SmallVectorImpl< Use * > &Ops)

We want to sink following cases: (add|sub|gep) A, ((mul|shl) vscale, imm); (add|sub|gep) A,...

Definition AArch64TargetTransformInfo.cpp:6536

static InstructionCost getHistogramCost(const AArch64Subtarget *ST, const IntrinsicCostAttributes &ICA)

Definition AArch64TargetTransformInfo.cpp:565

static std::optional< Instruction * > tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1791

static std::optional< Instruction * > instCombineSVEUnpack(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2473

static cl::opt< unsigned > SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold", cl::init(15), cl::Hidden)

static cl::opt< bool > EnableFixedwidthAutovecInStreamingMode("enable-fixedwidth-autovec-in-streaming-mode", cl::init(false), cl::Hidden)

static void getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE, TargetTransformInfo::UnrollingPreferences &UP, const AArch64TTIImpl &TTI)

For Apple CPUs, we want to runtime-unroll loops to make better use if the OOO engine's wide instructi...

Definition AArch64TargetTransformInfo.cpp:5144

static std::optional< Instruction * > instCombineWhilelo(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2824

static std::optional< Instruction * > instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2405

static bool areExtractExts(Value *Ext1, Value *Ext2)

Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.

Definition AArch64TargetTransformInfo.cpp:6477

static cl::opt< bool > EnableLSRCostOpt("enable-aarch64-lsr-cost-opt", cl::init(true), cl::Hidden)

static bool shouldSinkVectorOfPtrs(Value *Ptrs, SmallVectorImpl< Use * > &Ops)

Definition AArch64TargetTransformInfo.cpp:6508

static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE, const AArch64TTIImpl &TTI)

Definition AArch64TargetTransformInfo.cpp:5107

static std::optional< Instruction * > simplifySVEIntrinsicBinOp(InstCombiner &IC, IntrinsicInst &II, const SVEIntrinsicInfo &IInfo)

Definition AArch64TargetTransformInfo.cpp:1684

static std::optional< Instruction * > instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2464

static bool isLoopSizeWithinBudget(Loop *L, const AArch64TTIImpl &TTI, InstructionCost Budget, unsigned *FinalSize)

Definition AArch64TargetTransformInfo.cpp:5080

static std::optional< Instruction * > instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2562

static std::optional< Instruction * > instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2425

static std::optional< Instruction * > processPhiNode(InstCombiner &IC, IntrinsicInst &II)

The function will remove redundant reinterprets casting in the presence of the control flow.

Definition AArch64TargetTransformInfo.cpp:1071

static std::optional< Instruction * > instCombineSVEInsr(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2789

static std::optional< Instruction * > instCombineSMECntsd(InstCombiner &IC, IntrinsicInst &II, const AArch64Subtarget *ST)

Definition AArch64TargetTransformInfo.cpp:2180

static std::optional< Instruction * > instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2590

static bool isSMEABIRoutineCall(const CallInst &CI, const AArch64TargetLowering &TLI)

Definition AArch64TargetTransformInfo.cpp:228

static std::optional< Instruction * > instCombineSVESDIV(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2616

static std::optional< Instruction * > instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)

Definition AArch64TargetTransformInfo.cpp:2325

static Value * stripInactiveLanes(Value *V, const Value *Pg)

Definition AArch64TargetTransformInfo.cpp:1672

static cl::opt< bool > SVEPreferFixedOverScalableIfEqualCost("sve-prefer-fixed-over-scalable-if-equal", cl::Hidden)

static bool isUnpackedVectorVT(EVT VecVT)

Definition AArch64TargetTransformInfo.cpp:560

static std::optional< Instruction * > instCombineSVEDupX(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1926

static std::optional< Instruction * > instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1936

static std::optional< Instruction * > instCombineDMB(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2799

static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1311

static std::optional< Instruction * > instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2445

static std::optional< Instruction * > instCombineRDFFR(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2145

static std::optional< Instruction * > instCombineMaxMinNM(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2745

static cl::opt< unsigned > SVEGatherOverhead("sve-gather-overhead", cl::init(10), cl::Hidden)

static std::optional< Instruction * > instCombineSVECondLast(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2103

static std::optional< Instruction * > instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2193

static std::optional< Instruction * > instCombineSVEZip(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2546

static cl::opt< int > Aarch64ForceUnrollThreshold("aarch64-force-unroll-threshold", cl::init(0), cl::Hidden, cl::desc("Threshold for forced unrolling of small loops in AArch64"))

static std::optional< Instruction * > instCombineSVEDup(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1904

static cl::opt< unsigned > BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, cl::desc("The cost of a histcnt instruction"))

static std::optional< Instruction * > instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:1840

static cl::opt< unsigned > CallPenaltyChangeSM("call-penalty-sm-change", cl::init(5), cl::Hidden, cl::desc("Penalty of calling a function that requires a change to PSTATE.SM"))

static std::optional< Instruction * > instCombineSVEUzp1(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2516

static std::optional< Instruction * > instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2356

static cl::opt< bool > EnableScalableAutovecInStreamingMode("enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden)

static std::optional< Instruction * > instCombineSVETBL(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2493

static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)

Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.

Definition AArch64TargetTransformInfo.cpp:6504

static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic)

Definition AArch64TargetTransformInfo.cpp:2342

static bool containsDecreasingPointers(Loop *TheLoop, PredicatedScalarEvolution *PSE, const DominatorTree &DT)

Definition AArch64TargetTransformInfo.cpp:6271

static bool isSplatShuffle(Value *V)

Definition AArch64TargetTransformInfo.cpp:6409

static cl::opt< unsigned > InlineCallPenaltyChangeSM("inline-call-penalty-sm-change", cl::init(10), cl::Hidden, cl::desc("Penalty of inlining a call that requires a change to PSTATE.SM"))

static std::optional< Instruction * > instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL)

Definition AArch64TargetTransformInfo.cpp:2306

static std::optional< Instruction * > instCombineSVESrshl(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2755

static cl::opt< unsigned > DMBLookaheadThreshold("dmb-lookahead-threshold", cl::init(10), cl::Hidden, cl::desc("The number of instructions to search for a redundant dmb"))

static std::optional< Instruction * > simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II, const SVEIntrinsicInfo &IInfo)

Definition AArch64TargetTransformInfo.cpp:1736

static unsigned getSVEGatherScatterOverhead(unsigned Opcode, const AArch64Subtarget *ST)

Definition AArch64TargetTransformInfo.cpp:4801

static bool isOperandOfVmullHighP64(Value *Op)

Check if Op could be used with vmull_high_p64 intrinsic.

Definition AArch64TargetTransformInfo.cpp:6493

static std::optional< Instruction * > instCombineInStreamingMode(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2861

static std::optional< Instruction * > instCombineSVELast(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2029

static cl::opt< unsigned > NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10), cl::Hidden)

static cl::opt< bool > EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", cl::init(true), cl::Hidden)

static std::optional< Instruction * > instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts)

Definition AArch64TargetTransformInfo.cpp:2161

static std::optional< Instruction * > instCombineSVEUxt(InstCombiner &IC, IntrinsicInst &II, unsigned NumBits)

Definition AArch64TargetTransformInfo.cpp:2840

static cl::opt< TailFoldingOption, true, cl::parser< std::string > > SVETailFolding("sve-tail-folding", cl::desc("Control the use of vectorisation using tail-folding for SVE where the" " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:" "\ndisabled (Initial) No loop types will vectorize using " "tail-folding" "\ndefault (Initial) Uses the default tail-folding settings for " "the target CPU" "\nall (Initial) All legal loop types will vectorize using " "tail-folding" "\nsimple (Initial) Use tail-folding for simple loops (not " "reductions or recurrences)" "\nreductions Use tail-folding for loops containing reductions" "\nnoreductions Inverse of above" "\nrecurrences Use tail-folding for loops containing fixed order " "recurrences" "\nnorecurrences Inverse of above" "\nreverse Use tail-folding for loops requiring reversed " "predicates" "\nnoreverse Inverse of above"), cl::location(TailFoldingOptionLoc))

static bool areExtractShuffleVectors(Value *Op1, Value *Op2, bool AllowSplat=false)

Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...

Definition AArch64TargetTransformInfo.cpp:6417

static std::optional< Instruction * > instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2371

static cl::opt< bool > EnableOrLikeSelectOpt("enable-aarch64-or-like-select", cl::init(true), cl::Hidden)

static cl::opt< unsigned > SVEScatterOverhead("sve-scatter-overhead", cl::init(10), cl::Hidden)

static std::optional< Instruction * > instCombineSVEDupqLane(InstCombiner &IC, IntrinsicInst &II)

Definition AArch64TargetTransformInfo.cpp:2683

This file a TargetTransformInfoImplBase conforming object specific to the AArch64 target machine.

AMDGPU Register Bank Select

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

This file provides a helper that implements much of the TTI interface in terms of the target-independ...

static Error reportError(StringRef Message)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))

Cost tables and simple lookup functions.

This file defines the DenseMap class.

static Value * getCondition(Instruction *I)

This file provides the interface for the instcombine pass implementation.

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

This file defines the LoopVectorizationLegality class.

static const Function * getCalledFunction(const Value *V)

MachineInstr unsigned OpIdx

uint64_t IntrinsicInst * II

const SmallVectorImpl< MachineOperand > & Cond

static uint64_t getBits(uint64_t Val, int Start, int End)

static unsigned getNumElements(Type *Ty)

static unsigned getScalarSizeInBits(Type *Ty)

static SymbolRef::Type getType(const Symbol *Sym)

This file describes how to lower LLVM code to machine code.

This pass exposes codegen information to IR-level passes.

static unsigned getBitWidth(Type *Ty, const DataLayout &DL)

Returns the bitwidth of the given scalar or pointer type.

unsigned getVectorInsertExtractBaseCost() const

InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:5811

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:5574

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:4219

InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override

Definition AArch64TargetTransformInfo.cpp:5006

unsigned getMaxInterleaveFactor(ElementCount VF) const override

Definition AArch64TargetTransformInfo.cpp:5020

InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

Definition AArch64TargetTransformInfo.cpp:4774

InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const

Definition AArch64TargetTransformInfo.cpp:4822

bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override

InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:4556

bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const

Definition AArch64TargetTransformInfo.cpp:3187

InstructionCost getIntImmCost(int64_t Val) const

Calculate the cost of materializing a 64-bit value.

Definition AArch64TargetTransformInfo.cpp:390

std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const

FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...

Definition AArch64TargetTransformInfo.cpp:4195

bool prefersVectorizedAddressing() const override

Definition AArch64TargetTransformInfo.cpp:4755

InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override

Definition AArch64TargetTransformInfo.cpp:4164

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:608

InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override

Definition AArch64TargetTransformInfo.cpp:5732

InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override

Definition AArch64TargetTransformInfo.cpp:4134

InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:431

bool isElementTypeLegalForScalableVector(Type *Ty) const override

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

Definition AArch64TargetTransformInfo.cpp:5376

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:3229

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

Definition AArch64TargetTransformInfo.cpp:5292

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override

Definition AArch64TargetTransformInfo.cpp:5418

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:5522

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:4867

bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override

Definition AArch64TargetTransformInfo.cpp:375

bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override

Definition AArch64TargetTransformInfo.cpp:6392

InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override

Definition AArch64TargetTransformInfo.cpp:4181

InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:3954

bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override

Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...

Definition AArch64TargetTransformInfo.cpp:6557

std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override

Definition AArch64TargetTransformInfo.cpp:2986

bool useNeonVector(const Type *Ty) const

Definition AArch64TargetTransformInfo.cpp:4863

std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override

Definition AArch64TargetTransformInfo.cpp:2871

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:4601

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

Definition AArch64TargetTransformInfo.cpp:5911

bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override

Definition AArch64TargetTransformInfo.cpp:6308

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:5704

TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override

Definition AArch64TargetTransformInfo.cpp:552

InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:3895

unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override

Definition AArch64TargetTransformInfo.cpp:340

bool areInlineCompatible(const Function *Caller, const Function *Callee) const override

Definition AArch64TargetTransformInfo.cpp:272

unsigned getMaxNumElements(ElementCount VF) const

Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...

bool shouldTreatInstructionLikeSelect(const Instruction *I) const override

Definition AArch64TargetTransformInfo.cpp:6373

bool isMultiversionedFunction(const Function &F) const override

Definition AArch64TargetTransformInfo.cpp:264

TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override

Definition AArch64TargetTransformInfo.cpp:3018

bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override

Definition AArch64TargetTransformInfo.cpp:5490

TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override

Definition AArch64TargetTransformInfo.cpp:4737

InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:500

bool isLegalMaskedGatherScatter(Type *DataType) const

bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override

See if I should be considered for address type promotion.

Definition AArch64TargetTransformInfo.cpp:5463

APInt getFeatureMask(const Function &F) const override

Definition AArch64TargetTransformInfo.cpp:255

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override

Definition AArch64TargetTransformInfo.cpp:4963

bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override

Definition AArch64TargetTransformInfo.cpp:315

bool enableScalableVectorization() const override

Definition AArch64TargetTransformInfo.cpp:3012

InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override

Definition AArch64TargetTransformInfo.cpp:4760

Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override

Definition AArch64TargetTransformInfo.cpp:5381

bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const

Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.

Definition AArch64TargetTransformInfo.cpp:4577

unsigned getEpilogueVectorizationMinVF() const override

Definition AArch64TargetTransformInfo.cpp:6304

InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const

Definition AArch64TargetTransformInfo.cpp:5755

InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const

Definition AArch64TargetTransformInfo.cpp:5548

InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override

Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...

Definition AArch64TargetTransformInfo.cpp:6350

bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override

Definition AArch64TargetTransformInfo.cpp:6292

Class for arbitrary precision integers.

bool isNegatedPowerOf2() const

Check if this APInt's negated value is a power of two greater than zero.

unsigned popcount() const

Count the number of bits set.

unsigned countLeadingOnes() const

void negate()

Negate this APInt in place.

LLVM_ABI APInt sextOrTrunc(unsigned width) const

Sign extend or truncate to width.

unsigned logBase2() const

APInt ashr(unsigned ShiftAmt) const

Arithmetic right-shift function.

bool isPowerOf2() const

Check if this APInt's value is a power of two greater than zero.

static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)

Constructs an APInt value that has the bottom loBitsSet bits set.

static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)

Constructs an APInt value that has the top hiBitsSet bits set.

int64_t getSExtValue() const

Get sign extended value.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

LLVM Basic Block Representation.

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override

InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override

InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override

InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override

void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override

InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override

InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override

std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const

InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override

InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override

InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override

bool isTypeLegal(Type *Ty) const override

static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)

Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

Value * getArgOperand(unsigned i) const

unsigned arg_size() const

This class represents a function call, abstracting a target machine's calling convention.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ FCMP_OEQ

0 0 0 1 True if ordered and equal

@ ICMP_SLT

signed less than

@ ICMP_SLE

signed less or equal

@ FCMP_OLT

0 1 0 0 True if ordered and less than

@ FCMP_OGT

0 0 1 0 True if ordered and greater than

@ FCMP_OGE

0 0 1 1 True if ordered and greater than or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_SGT

signed greater than

@ FCMP_ONE

0 1 1 0 True if ordered and operands are unequal

@ FCMP_UEQ

1 0 0 1 True if unordered or equal

@ FCMP_OLE

0 1 0 1 True if ordered and less than or equal

@ FCMP_ORD

0 1 1 1 True if ordered (no nans)

@ ICMP_SGE

signed greater or equal

@ FCMP_UNE

1 1 1 0 True if unordered or not equal

@ FCMP_UNO

1 0 0 0 True if unordered: isnan(X) | isnan(Y)

static bool isIntPredicate(Predicate P)

An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...

static LLVM_ABI ConstantAggregateZero * get(Type *Ty)

This is the shared class of boolean and integer constants.

bool isZero() const

This is just a convenience method to make client code smaller for a common code.

const APInt & getValue() const

Return the constant as an APInt value reference.

static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)

static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)

Return a ConstantVector with the specified constant in each element.

This is an important base class in LLVM.

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

A parsed version of the target data layout string in and methods for querying it.

TypeSize getTypeSizeInBits(Type *Ty) const

Size examples:

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

static constexpr ElementCount getScalable(ScalarTy MinVal)

static constexpr ElementCount getFixed(ScalarTy MinVal)

This provides a helper for copying FMF from an instruction or setting specified flags.

Convenience struct for specifying and reasoning about fast-math flags.

bool allowContract() const

Container class for subtarget features.

Class to represent fixed width SIMD vectors.

unsigned getNumElements() const

static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")

CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")

Create a call to the vector.insert intrinsic.

Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")

IntegerType * getIntNTy(unsigned N)

Fetch the type representing an N-bit integer.

Type * getDoubleTy()

Fetch the type representing a 64-bit floating point value.

LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")

Create a call to Masked Load intrinsic.

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

IntegerType * getInt32Ty()

Fetch the type representing a 32-bit integer.

Type * getHalfTy()

Fetch the type representing a 16-bit floating point value.

Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())

ConstantInt * getInt64(uint64_t C)

Get a constant 64-bit value.

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")

PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")

Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)

LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)

Create a call to Masked Store intrinsic.

Type * getFloatTy()

Fetch the type representing a 32-bit floating point value.

Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)

Create an expression which evaluates to the number of elements in EC at runtime.

This provides a uniform API for creating instructions and inserting them into a basic block: either a...

This instruction inserts a single (scalar) element into a VectorType value.

The core instruction combiner logic.

virtual Instruction * eraseInstFromFunction(Instruction &I)=0

Combiner aware instruction erasure.

Instruction * replaceInstUsesWith(Instruction &I, Value *V)

A combiner-aware RAUW-like routine.

Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)

Replace operand of instruction and add old operand to the worklist.

static InstructionCost getInvalid(CostType Val=0)

CostType getValue() const

This function is intended to be used as sparingly as possible, since the class provides the full rang...

LLVM_ABI bool isCommutative() const LLVM_READONLY

Return true if the instruction is commutative:

unsigned getOpcode() const

Returns a member of one of the enums like Instruction::Add.

LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())

Copy metadata from SrcInst to this instruction.

Class to represent integer types.

bool hasGroups() const

Returns true if we have any interleave groups.

const SmallVectorImpl< Type * > & getArgTypes() const

Type * getReturnType() const

const SmallVectorImpl< const Value * > & getArgs() const

Intrinsic::ID getID() const

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

Value * getPointerOperand()

iterator_range< block_iterator > blocks() const

RecurrenceSet & getFixedOrderRecurrences()

Return the fixed-order recurrences found in the loop.

DominatorTree * getDominatorTree() const

PredicatedScalarEvolution * getPredicatedScalarEvolution() const

const ReductionList & getReductionVars() const

Returns the reduction variables found in the loop.

Represents a single loop in the control flow graph.

const FeatureBitset & getFeatureBits() const

uint64_t getScalarSizeInBits() const

unsigned getVectorNumElements() const

bool isVector() const

Return true if this is a vector value type.

Information for memory intrinsic cost model.

Align getAlignment() const

Type * getDataType() const

Intrinsic::ID getID() const

const Instruction * getInst() const

void addIncoming(Value *V, BasicBlock *BB)

Add an incoming value to the end of the PHI list.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...

The RecurrenceDescriptor is used to identify recurrences variables in a loop.

Type * getRecurrenceType() const

Returns the type of the recurrence.

RecurKind getRecurrenceKind() const

This node represents a polynomial recurrence on the trip count of the specified loop.

bool isAffine() const

Return true if this represents an expression A + B*x where A and B are loop invariant values.

This class represents an analyzed expression in the program.

SMEAttrs is a utility class to parse the SME ACLE attributes on functions.

bool hasNonStreamingInterfaceAndBody() const

bool hasStreamingCompatibleInterface() const

bool hasStreamingInterfaceOrBody() const

bool isSMEABIRoutine() const

bool hasStreamingBody() const

void set(unsigned M, bool Enable=true)

SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.

bool requiresPreservingZT0() const

bool requiresSMChange() const

bool requiresLazySave() const

bool requiresPreservingAllZAState() const

static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)

static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)

The main scalar evolution driver.

LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)

If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...

LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)

Returns the largest constant divisor of the trip count as a normal unsigned value,...

LLVM_ABI const SCEV * getSCEV(Value *V)

Return a SCEV expression for the full generality of the specified expression.

LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)

Returns the upper bound of the loop trip count as a normal unsigned value.

LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)

Return true if the value of the given SCEV is unchanging in the specified loop.

const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)

When successful, this returns a SCEV that is greater than or equal to (i.e.

This instruction constructs a fixed permutation of two input vectors.

static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)

Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...

static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)

Return true if this shuffle mask is an extract subvector mask.

static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)

Return true if the mask interleaves one or more input vectors together.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

iterator insert(iterator I, T &&Elt)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StackOffset holds a fixed and a scalable offset in bytes.

static StackOffset getScalable(int64_t Scalable)

static StackOffset getFixed(int64_t Fixed)

An instruction for storing to memory.

StringRef - Represent a constant reference to a string, i.e.

std::pair< StringRef, StringRef > split(char Separator) const

Split into two substrings around the first occurrence of a separator character.

Class to represent struct types.

TargetInstrInfo - Interface to description of machine instruction set.

std::pair< LegalizeTypeAction, EVT > LegalizeKind

LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.

const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

virtual const DataLayout & getDataLayout() const

virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const

virtual bool isLoweredToCall(const Function *F) const

virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const

bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const

virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const

InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override

static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)

Collect properties of V used in cost analysis, e.g. OP_PowerOf2.

TargetCostKind

The kind of cost model.

@ TCK_RecipThroughput

Reciprocal throughput.

@ TCK_CodeSize

Instruction code size.

@ TCK_SizeAndLatency

The weighted sum of size and latency.

@ TCK_Latency

The latency of instruction.

static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)

A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...

PopcntSupportKind

Flags indicating the kind of support for population count.

PartialReductionExtendKind

@ TCC_Free

Expected to fold away in lowering.

@ TCC_Basic

The cost of a typical 'add' instruction.

ShuffleKind

The various kinds of shuffle patterns for vector queries.

@ SK_InsertSubvector

InsertSubvector. Index indicates start offset.

@ SK_Select

Selects elements from the corresponding lane of either source operand.

@ SK_PermuteSingleSrc

Shuffle elements of single source vector with any shuffle mask.

@ SK_Transpose

Transpose two vectors.

@ SK_Splice

Concatenates elements from the first input vector with elements of the second input vector.

@ SK_Broadcast

Broadcast element 0 to all other elements.

@ SK_PermuteTwoSrc

Merge elements from two source vectors into one with any shuffle mask.

@ SK_Reverse

Reverse the order of the vector.

@ SK_ExtractSubvector

ExtractSubvector Index indicates start offset.

CastContextHint

Represents a hint about the context in which a cast is used.

@ Masked

The cast is used with a masked load/store.

@ None

The cast is not used with a load/store of any kind.

@ Normal

The cast is used with a normal load/store.

static constexpr TypeSize getFixed(ScalarTy ExactSize)

static constexpr TypeSize getScalable(ScalarTy MinimumSize)

The instances of the Type class are immutable: once they are created, they are never changed.

static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)

bool isVectorTy() const

True if this is an instance of VectorType.

LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const

Return true if this is a type whose size is a known multiple of vscale.

static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)

bool isPointerTy() const

True if this is an instance of PointerType.

bool isFloatTy() const

Return true if this is 'float', a 32-bit IEEE fp type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY

Return the basic size of this type if it is a primitive type.

LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const

Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...

bool isHalfTy() const

Return true if this is 'half', a 16-bit IEEE fp type.

LLVM_ABI Type * getWithNewType(Type *EltTy) const

Given vector type, change the element type, whilst keeping the old number of elements.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isDoubleTy() const

Return true if this is 'double', a 64-bit IEEE fp type.

static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)

static LLVM_ABI Type * getFloatTy(LLVMContext &C)

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

A Use represents the edge between a Value definition and its users.

const Use & getOperandUse(unsigned i) const

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

user_iterator user_begin()

bool hasOneUse() const

Return true if there is exactly one use of this value.

LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const

Returns an alignment of the pointer value.

LLVM_ABI void takeName(Value *V)

Transfer the name from V to this value.

Base class of all SIMD vector types.

ElementCount getElementCount() const

Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...

static VectorType * getInteger(VectorType *VTy)

This static method gets a VectorType with the same number of elements as the input type,...

static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)

This static method is the primary way to construct an VectorType.

Type * getElementType() const

constexpr ScalarTy getFixedValue() const

static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)

constexpr bool isScalable() const

Returns whether the quantity is scaled by a runtime quantity (vscale).

constexpr bool isFixed() const

Returns true if the quantity is not scaled by vscale.

constexpr ScalarTy getKnownMinValue() const

Returns the minimum value this quantity can represent.

constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const

We do not provide the '/' operator here because division for polynomial types does not work in the sa...

const ParentTy * getParent() const

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

static bool isLogicalImmediate(uint64_t imm, unsigned regSize)

isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...

void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)

Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...

static constexpr unsigned SVEBitsPerBlock

LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

@ C

The default llvm calling convention, compatible with C.

ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...

@ ADD

Simple integer binary arithmetic operators.

@ SINT_TO_FP

[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...

@ FADD

Simple binary floating point operators.

@ SIGN_EXTEND

Conversion operators.

@ SHL

Shift and rotation operations.

@ ZERO_EXTEND

ZERO_EXTEND - Used for integer types, zeroing the new bits.

@ FP_TO_SINT

FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.

@ AND

Bitwise operators - logical and, logical or, logical xor.

@ FP_ROUND

X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...

@ TRUNCATE

TRUNCATE - Completely drop the high bits.

This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)

Matches a register not-ed by a G_XOR.

OneUse_match< SubPat > m_OneUse(const SubPat &SP)

cst_pred_ty< is_all_ones > m_AllOnes()

Match an integer or vector with all bits set.

BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)

Matches an And with LHS and RHS in either order.

specific_intval< false > m_SpecificInt(const APInt &V)

Match a specific integer value or vector with all elements equal to the value.

BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

bind_ty< Instruction > m_Instruction(Instruction *&I)

Match an instruction, capturing it if we match.

specificval_ty m_Specific(const Value *V)

Match if we have a specific specified value.

TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)

Matches ExtractElementInst.

cst_pred_ty< is_nonnegative > m_NonNegative()

Match an integer or vector of non-negative values.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

cst_pred_ty< is_one > m_One()

Match an integer 1 or a vector with all elements equal to 1.

IntrinsicID_match m_Intrinsic()

Match intrinsic calls like this: m_IntrinsicIntrinsic::fabs(m_Value(X))

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

IntrinsicID_match m_VScale()

Matches a call to llvm.vscale().

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)

Matches LoadInst.

CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)

Matches ZExt.

class_match< CmpInst > m_Cmp()

Matches any compare instruction and ignore it.

brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)

BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)

Matches a Add with LHS and RHS in either order.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)

match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)

BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)

auto m_Undef()

Match an arbitrary undef constant.

CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)

Matches SExt.

is_zero m_Zero()

Match any null constant or a vector with all elements equal to 0.

BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)

Matches an Or with LHS and RHS in either order.

initializer< Ty > init(const Ty &Val)

LocationClass< Ty > location(Ty &L)

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

FunctionAddr VTableAddr Value

std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)

isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)

Find in cost table.

LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)

Returns true if Name is applied to TheLoop and enabled.

bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)

Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...

TailFoldingOpts

An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...

constexpr bool isInt(int64_t x)

Checks if an integer fits into the given bit width.

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)

isDUPFirstSegmentMask - matches a splat of the first 128b segment.

TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty

LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)

Find string metadata for loop.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

LLVM_ABI Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)

Return true if 'V & Mask' is known to be zero.

unsigned M1(unsigned Val)

auto dyn_cast_or_null(const Y &Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)

unsigned Log2_32(uint32_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)

Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...

bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)

isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

constexpr int PoisonMaskElem

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)

Given operands for a BinaryOperator, fold the result or return null.

@ UMin

Unsigned integer min implemented in terms of select(cmp()).

@ Or

Bitwise or logical OR of integers.

@ AnyOf

AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...

@ Xor

Bitwise or logical XOR of integers.

@ FMax

FP max implemented in terms of select(cmp()).

@ FMulAdd

Sum of float products with llvm.fmuladd(a * b + sum).

@ SMax

Signed integer max implemented in terms of select(cmp()).

@ And

Bitwise or logical AND of integers.

@ SMin

Signed integer min implemented in terms of select(cmp()).

@ FMin

FP min implemented in terms of select(cmp()).

@ Sub

Subtraction of integers.

@ AddChainWithSubs

A chain of adds and subs.

@ UMax

Unsigned integer max implemented in terms of select(cmp()).

DWARFExpression::Operation Op

CostTblEntryT< unsigned > CostTblEntry

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)

Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.

auto predecessors(const MachineBasicBlock *BB)

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

Type * toVectorTy(Type *Scalar, ElementCount EC)

A helper function for converting Scalar types to vector types.

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)

Find in type conversion cost table.

constexpr uint64_t NextPowerOf2(uint64_t A)

Returns the next power of two (in 64-bits) that is strictly greater than A.

unsigned getMatchingIROpode() const

Definition AArch64TargetTransformInfo.cpp:1211

bool inactiveLanesAreUnused() const

Definition AArch64TargetTransformInfo.cpp:1252

bool inactiveLanesAreNotDefined() const

Definition AArch64TargetTransformInfo.cpp:1242

bool hasMatchingUndefIntrinsic() const

Definition AArch64TargetTransformInfo.cpp:1194

static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()

Definition AArch64TargetTransformInfo.cpp:1133

static SVEIntrinsicInfo defaultZeroingOp()

Definition AArch64TargetTransformInfo.cpp:1156

bool hasGoverningPredicate() const

Definition AArch64TargetTransformInfo.cpp:1171

SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)

Definition AArch64TargetTransformInfo.cpp:1235

static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)

Definition AArch64TargetTransformInfo.cpp:1117

SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)

Definition AArch64TargetTransformInfo.cpp:1287

unsigned getOperandIdxWithNoActiveLanes() const

Definition AArch64TargetTransformInfo.cpp:1282

SVEIntrinsicInfo & setInactiveLanesAreUnused()

Definition AArch64TargetTransformInfo.cpp:1256

SVEIntrinsicInfo & setInactiveLanesAreNotDefined()

Definition AArch64TargetTransformInfo.cpp:1246

SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)

Definition AArch64TargetTransformInfo.cpp:1180

bool inactiveLanesTakenFromOperand() const

Definition AArch64TargetTransformInfo.cpp:1226

static SVEIntrinsicInfo defaultUndefOp()

Definition AArch64TargetTransformInfo.cpp:1140

bool hasOperandWithNoActiveLanes() const

Definition AArch64TargetTransformInfo.cpp:1278

Intrinsic::ID getMatchingUndefIntrinsic() const

Definition AArch64TargetTransformInfo.cpp:1198

SVEIntrinsicInfo & setResultIsZeroInitialized()

Definition AArch64TargetTransformInfo.cpp:1267

static SVEIntrinsicInfo defaultMergingUnaryOp()

Definition AArch64TargetTransformInfo.cpp:1125

SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)

Definition AArch64TargetTransformInfo.cpp:1203

unsigned getGoverningPredicateOperandIdx() const

Definition AArch64TargetTransformInfo.cpp:1175

bool hasMatchingIROpode() const

Definition AArch64TargetTransformInfo.cpp:1209

bool resultIsZeroInitialized() const

Definition AArch64TargetTransformInfo.cpp:1265

SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)

Definition AArch64TargetTransformInfo.cpp:1216

unsigned getOperandIdxInactiveLanesTakenFrom() const

Definition AArch64TargetTransformInfo.cpp:1230

static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)

Definition AArch64TargetTransformInfo.cpp:1148

This struct is a compact representation of a valid (non-zero power of two) alignment.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)

Returns the EVT that represents a vector NumElements in length, where each element is of type VT.

bool bitsGT(EVT VT) const

Return true if this has more bits than VT.

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

uint64_t getScalarSizeInBits() const

static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.

bool isFixedLengthVector() const

EVT getScalarType() const

If this is a vector type, return the element type, otherwise return this.

LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const

This method returns an LLVM type corresponding to the specified EVT.

bool isScalableVector() const

Return true if this is a vector type where the runtime length is machine dependent.

EVT getVectorElementType() const

Given a vector type, return the type of each element.

unsigned getVectorNumElements() const

Given a vector type, return the number of elements it contains.

Summarize the scheduling resources required for an instruction of a particular scheduling class.

Machine model for scheduling, bundling, and heuristics.

static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)

Information about a load/store intrinsic defined by the target.

InterleavedAccessInfo * IAI

LoopVectorizationLegality * LVL

This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...

unsigned Insns

TODO: Some of these could be merged.

Returns options for expansion of memcmp. IsZeroCmp is.

bool isNegatedPowerOf2() const

OperandValueInfo getNoProps() const

Parameters that control the generic loop unrolling transformation.

bool UpperBound

Allow using trip count upper bound to unroll loops.

bool Force

Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).

unsigned PartialOptSizeThreshold

The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...

unsigned DefaultUnrollRuntimeCount

Default unroll count for loops with run-time trip count.

bool RuntimeUnrollMultiExit

Allow runtime unrolling multi-exit loops.

unsigned SCEVExpansionBudget

Don't allow runtime unrolling if expanding the trip count takes more than SCEVExpansionBudget.

bool AddAdditionalAccumulators

Allow unrolling to add parallel reduction phis.

unsigned UnrollAndJamInnerLoopThreshold

Threshold for unroll and jam, for inner loop size.

bool UnrollAndJam

Allow unroll and jam. Used to enable unroll and jam for the target.

bool UnrollRemainder

Allow unrolling of all the iterations of the runtime loop remainder.

unsigned PartialThreshold

The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...

bool Runtime

Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...

bool Partial

Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...