LLVM: lib/Analysis/VectorUtils.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

30

31#define DEBUG_TYPE "vectorutils"

32

33using namespace llvm;

35

36

38 "max-interleave-group-factor", cl::Hidden,

39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),

41

42

43

44

45

47 switch (ID) {

48 case Intrinsic::abs:

49 case Intrinsic::bswap:

50 case Intrinsic::bitreverse:

51 case Intrinsic::ctpop:

52 case Intrinsic::ctlz:

53 case Intrinsic::cttz:

54 case Intrinsic::fshl:

55 case Intrinsic::fshr:

56 case Intrinsic::smax:

57 case Intrinsic::smin:

58 case Intrinsic::umax:

59 case Intrinsic::umin:

60 case Intrinsic::sadd_sat:

61 case Intrinsic::ssub_sat:

62 case Intrinsic::uadd_sat:

63 case Intrinsic::usub_sat:

64 case Intrinsic::smul_fix:

65 case Intrinsic::smul_fix_sat:

66 case Intrinsic::umul_fix:

67 case Intrinsic::umul_fix_sat:

68 case Intrinsic::sqrt:

69 case Intrinsic::asin:

70 case Intrinsic::acos:

71 case Intrinsic::atan:

72 case Intrinsic::atan2:

73 case Intrinsic::sin:

74 case Intrinsic::cos:

75 case Intrinsic::tan:

76 case Intrinsic::sinh:

77 case Intrinsic::cosh:

78 case Intrinsic::tanh:

79 case Intrinsic::exp:

80 case Intrinsic::exp10:

81 case Intrinsic::exp2:

82 case Intrinsic:🪵

83 case Intrinsic::log10:

84 case Intrinsic::log2:

85 case Intrinsic::fabs:

86 case Intrinsic::minnum:

87 case Intrinsic::maxnum:

88 case Intrinsic::minimum:

89 case Intrinsic::maximum:

90 case Intrinsic::copysign:

91 case Intrinsic:🤣

92 case Intrinsic::ceil:

93 case Intrinsic::trunc:

94 case Intrinsic::rint:

95 case Intrinsic::nearbyint:

96 case Intrinsic::round:

97 case Intrinsic::roundeven:

98 case Intrinsic::pow:

99 case Intrinsic::fma:

100 case Intrinsic::fmuladd:

101 case Intrinsic::is_fpclass:

102 case Intrinsic::powi:

103 case Intrinsic::canonicalize:

104 case Intrinsic::fptosi_sat:

105 case Intrinsic::fptoui_sat:

106 case Intrinsic::lrint:

107 case Intrinsic::llrint:

108 case Intrinsic::ucmp:

109 case Intrinsic::scmp:

110 return true;

111 default:

112 return false;

113 }

114}

115

119 return true;

120

123

124

125

126 switch (ID) {

127 case Intrinsic::frexp:

128 return true;

129 }

130 return false;

131}

132

133

135 unsigned ScalarOpdIdx,

137

140

141 switch (ID) {

142 case Intrinsic::abs:

143 case Intrinsic::vp_abs:

144 case Intrinsic::ctlz:

145 case Intrinsic::vp_ctlz:

146 case Intrinsic::cttz:

147 case Intrinsic::vp_cttz:

148 case Intrinsic::is_fpclass:

149 case Intrinsic::vp_is_fpclass:

150 case Intrinsic::powi:

151 return (ScalarOpdIdx == 1);

152 case Intrinsic::smul_fix:

153 case Intrinsic::smul_fix_sat:

154 case Intrinsic::umul_fix:

155 case Intrinsic::umul_fix_sat:

156 return (ScalarOpdIdx == 2);

157 default:

158 return false;

159 }

160}

161

165

168

170 return OpdIdx == -1 || OpdIdx == 0;

171

172 switch (ID) {

173 case Intrinsic::fptosi_sat:

174 case Intrinsic::fptoui_sat:

175 case Intrinsic::lrint:

176 case Intrinsic::llrint:

177 case Intrinsic::vp_lrint:

178 case Intrinsic::vp_llrint:

179 case Intrinsic::ucmp:

180 case Intrinsic::scmp:

181 return OpdIdx == -1 || OpdIdx == 0;

182 case Intrinsic::is_fpclass:

183 case Intrinsic::vp_is_fpclass:

184 return OpdIdx == 0;

185 case Intrinsic::powi:

186 return OpdIdx == -1 || OpdIdx == 1;

187 default:

188 return OpdIdx == -1;

189 }

190}

191

194

197

198 switch (ID) {

199 case Intrinsic::frexp:

200 return RetIdx == 0 || RetIdx == 1;

201 default:

202 return RetIdx == 0;

203 }

204}

205

206

207

208

214

216 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||

217 ID == Intrinsic::experimental_noalias_scope_decl ||

218 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)

219 return ID;

221}

222

223

224

225

227 assert(V->getType()->isVectorTy() && "Not looking at a vector?");

228 VectorType *VTy = cast(V->getType());

229

230 if (auto *FVTy = dyn_cast(VTy)) {

231 unsigned Width = FVTy->getNumElements();

232 if (EltNo >= Width)

234 }

235

236 if (Constant *C = dyn_cast(V))

237 return C->getAggregateElement(EltNo);

238

240

241 if (!isa(III->getOperand(2)))

242 return nullptr;

243 unsigned IIElt = cast(III->getOperand(2))->getZExtValue();

244

245

246

247 if (EltNo == IIElt)

248 return III->getOperand(1);

249

250

251 if (III == III->getOperand(0))

252 return nullptr;

253

254

255

257 }

258

260

261 if (SVI && isa(SVI->getType())) {

262 unsigned LHSWidth =

263 cast(SVI->getOperand(0)->getType())->getNumElements();

265 if (InEl < 0)

267 if (InEl < (int)LHSWidth)

270 }

271

272

273

276 if (Constant *Elt = C->getAggregateElement(EltNo))

277 if (Elt->isNullValue())

279

280

281 if (isa(VTy))

283 if (EltNo < VTy->getElementCount().getKnownMinValue())

285

286

287 return nullptr;

288}

289

291 int SplatIndex = -1;

292 for (int M : Mask) {

293

294 if (M < 0)

295 continue;

296

297

298 if (SplatIndex != -1 && SplatIndex != M)

299 return -1;

300

301

302 SplatIndex = M;

303 }

304 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");

305 return SplatIndex;

306}

307

308

309

310

311

313 if (isa(V->getType()))

314 if (auto *C = dyn_cast(V))

315 return C->getSplatValue();

316

317

323

324 return nullptr;

325}

326

329

330 if (isa(V->getType())) {

331 if (isa(V))

332 return true;

333

334

335 if (auto *C = dyn_cast(V))

336 return C->getSplatValue() != nullptr;

337 }

338

339 if (auto *Shuf = dyn_cast(V)) {

340

341

342 if (all\_equal(Shuf->getShuffleMask()))

343 return false;

344

345

346 if (Index == -1)

347 return true;

348

349

350

351 return Shuf->getMaskValue(Index) == Index;

352 }

353

354

356 return false;

357

358

362

363

367

368

369

370 return false;

371}

372

374 const APInt &DemandedElts, APInt &DemandedLHS,

375 APInt &DemandedRHS, bool AllowUndefElts) {

376 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);

377

378

379 if (DemandedElts.isZero())

380 return true;

381

382

383 if (all_of(Mask, [](int Elt) { return Elt == 0; })) {

384 DemandedLHS.setBit(0);

385 return true;

386 }

387

388 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {

389 int M = Mask[I];

390 assert((-1 <= M) && (M < (SrcWidth * 2)) &&

391 "Invalid shuffle mask constant");

392

393 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))

394 continue;

395

396

397

398 if (M < 0)

399 return false;

400

401 if (M < SrcWidth)

402 DemandedLHS.setBit(M);

403 else

404 DemandedRHS.setBit(M - SrcWidth);

405 }

406

407 return true;

408}

409

412 assert(Scale > 0 && "Unexpected scaling factor");

413

414

415 if (Scale == 1) {

416 ScaledMask.assign(Mask.begin(), Mask.end());

417 return;

418 }

419

420 ScaledMask.clear();

421 for (int MaskElt : Mask) {

422 if (MaskElt >= 0) {

423 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&

424 "Overflowed 32-bits");

425 }

426 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)

427 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);

428 }

429}

430

433 assert(Scale > 0 && "Unexpected scaling factor");

434

435

436 if (Scale == 1) {

437 ScaledMask.assign(Mask.begin(), Mask.end());

438 return true;

439 }

440

441

442 int NumElts = Mask.size();

443 if (NumElts % Scale != 0)

444 return false;

445

446 ScaledMask.clear();

447 ScaledMask.reserve(NumElts / Scale);

448

449

450 do {

451 ArrayRef MaskSlice = Mask.take_front(Scale);

452 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");

453

454

455 int SliceFront = MaskSlice.front();

456 if (SliceFront < 0) {

457

458

460 return false;

461 ScaledMask.push_back(SliceFront);

462 } else {

463

464 if (SliceFront % Scale != 0)

465 return false;

466

467 for (int i = 1; i < Scale; ++i)

468 if (MaskSlice[i] != SliceFront + i)

469 return false;

470 ScaledMask.push_back(SliceFront / Scale);

471 }

472 Mask = Mask.drop_front(Scale);

473 } while (!Mask.empty());

474

475 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");

476

477

478

479 return true;

480}

481

484 unsigned NumElts = M.size();

485 if (NumElts % 2 != 0)

486 return false;

487

489 for (unsigned i = 0; i < NumElts; i += 2) {

490 int M0 = M[i];

491 int M1 = M[i + 1];

492

493

494 if (M0 == -1 && M1 == -1) {

496 continue;

497 }

498

499 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {

501 continue;

502 }

503

504 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {

506 continue;

507 }

508

510 return false;

511 }

512

513 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");

514 return true;

515}

516

519 unsigned NumSrcElts = Mask.size();

520 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");

521

522

523 if (NumSrcElts == NumDstElts) {

524 ScaledMask.assign(Mask.begin(), Mask.end());

525 return true;

526 }

527

528

529 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&

530 "Unexpected scaling factor");

531

532 if (NumSrcElts > NumDstElts) {

533 int Scale = NumSrcElts / NumDstElts;

535 }

536

537 int Scale = NumDstElts / NumSrcElts;

539 return true;

540}

541

544 std::array<SmallVector<int, 16>, 2> TmpMasks;

547 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {

549 InputMask = *Output;

551 }

552 }

553 ScaledMask.assign(InputMask.begin(), InputMask.end());

554}

555

557 ArrayRef Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,

558 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,

561 ManyInputsAction) {

563

564

565

566

567 int Sz = Mask.size();

568 unsigned SzDest = Sz / NumOfDestRegs;

569 unsigned SzSrc = Sz / NumOfSrcRegs;

570 for (unsigned I = 0; I < NumOfDestRegs; ++I) {

571 auto &RegMasks = Res[I];

572 RegMasks.assign(2 * NumOfSrcRegs, {});

573

574

575 for (unsigned K = 0; K < SzDest; ++K) {

576 int Idx = I * SzDest + K;

577 if (Idx == Sz)

578 break;

580 continue;

581 int MaskIdx = Mask[Idx] % Sz;

582 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);

583

584

585 if (RegMasks[SrcRegIdx].empty())

587 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;

588 }

589 }

590

591 for (unsigned I : seq(NumOfUsedRegs)) {

592 auto &Dest = Res[I];

593 int NumSrcRegs =

595 switch (NumSrcRegs) {

596 case 0:

597

598 NoInputAction();

599 break;

600 case 1: {

601

602 auto *It =

604 unsigned SrcReg = std::distance(Dest.begin(), It);

605 SingleInputAction(*It, SrcReg, I);

606 break;

607 }

608 default: {

609

610

611

612

613

614

617 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {

620 "Expected undefined mask element.");

621 FirstMask[Idx] = SecondMask[Idx] + VF;

622 }

623 }

624 };

626 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {

629 }

630 };

631 int SecondIdx;

632 bool NewReg = true;

633 do {

634 int FirstIdx = -1;

635 SecondIdx = -1;

637 for (unsigned I : seq(2 * NumOfSrcRegs)) {

639 if (RegMask.empty())

640 continue;

641

642 if (FirstIdx == SecondIdx) {

643 FirstIdx = I;

644 FirstMask = RegMask;

645 continue;

646 }

647 SecondIdx = I;

648 SecondMask = RegMask;

649 CombineMasks(FirstMask, SecondMask);

650 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);

651 NewReg = false;

652 NormalizeMask(FirstMask);

654 SecondMask = FirstMask;

655 SecondIdx = FirstIdx;

656 }

657 if (FirstIdx != SecondIdx && SecondIdx >= 0) {

658 CombineMasks(SecondMask, FirstMask);

659 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);

660 NewReg = false;

661 Dest[FirstIdx].clear();

662 NormalizeMask(SecondMask);

663 }

664 } while (SecondIdx >= 0);

665 break;

666 }

667 }

668 }

669}

670

672 const APInt &DemandedElts,

673 APInt &DemandedLHS,

674 APInt &DemandedRHS) {

675 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");

676 int NumLanes = VectorBitWidth / 128;

677 int NumElts = DemandedElts.getBitWidth();

678 int NumEltsPerLane = NumElts / NumLanes;

679 int HalfEltsPerLane = NumEltsPerLane / 2;

680

683

684

685 for (int Idx = 0; Idx != NumElts; ++Idx) {

686 if (!DemandedElts[Idx])

687 continue;

688 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;

689 int LocalIdx = Idx % NumEltsPerLane;

690 if (LocalIdx < HalfEltsPerLane) {

691 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);

692 } else {

693 LocalIdx -= HalfEltsPerLane;

694 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);

695 }

696 }

697}

698

702

703

704

705

713

714

715 bool SeenExtFromIllegalType = false;

716 for (auto *BB : Blocks)

717 for (auto &I : *BB) {

718 InstructionSet.insert(&I);

719

720 if (TTI && (isa(&I) || isa(&I)) &&

722 SeenExtFromIllegalType = true;

723

724

725 if ((isa(&I) || isa(&I)) &&

726 I.getType()->isVectorTy() &&

727 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {

728

729

731 continue;

732

735 }

736 }

737

738 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))

739 return MinBWs;

740

741

742 while (!Worklist.empty()) {

745

746 if (!Visited.insert(Val).second)

747 continue;

748

749

750 if (!isa(Val))

751 continue;

753

754

755

756 if (DB.getDemandedBits(I).getBitWidth() > 64)

758

759 uint64_t V = DB.getDemandedBits(I).getZExtValue();

760 DBits[Leader] |= V;

761 DBits[I] = V;

762

763

764

765 if (isa(I) || isa(I) || isa(I) ||

766 !InstructionSet.count(I))

767 continue;

768

769

770

771

772 if (isa(I) || isa(I) || isa(I) ||

773 I->getType()->isIntegerTy()) {

774 DBits[Leader] |= ~0ULL;

775 continue;

776 }

777

778

779

780

781 if (isa(I))

782 continue;

783

784 if (DBits[Leader] == ~0ULL)

785

786 continue;

787

788 for (Value *O : cast(I)->operands()) {

791 }

792 }

793

794

795

796

797 for (auto &I : DBits)

798 for (auto *U : I.first->users())

799 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)

801

802 for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {

803 uint64_t LeaderDemandedBits = 0;

805 LeaderDemandedBits |= DBits[M];

806

808

810

811

812

813

814

815 bool Abort = false;

818 Abort = true;

819 break;

820 }

821 if (Abort)

822 continue;

823

825 auto *MI = dyn_cast(M);

826 if (MI)

827 continue;

828 Type *Ty = M->getType();

829 if (Roots.count(M))

830 Ty = MI->getOperand(0)->getType();

831

833 continue;

834

835

836

837 if (any_of(MI->operands(), [&DB, MinBW](Use &U) {

838 auto *CI = dyn_cast(U);

839

840

841 if (CI &&

842 isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&

843 U.getOperandNo() == 1)

844 return CI->uge(MinBW);

845 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());

846 return bit_ceil(BW) > MinBW;

847 }))

848 continue;

849

850 MinBWs[MI] = MinBW;

851 }

852 }

853

854 return MinBWs;

855}

856

857

858template

860

863 List.insert(AccGroups);

864 return;

865 }

866

867 for (const auto &AccGroupListOp : AccGroups->operands()) {

868 auto *Item = cast(AccGroupListOp.get());

870 List.insert(Item);

871 }

872}

873

875 if (!AccGroups1)

876 return AccGroups2;

877 if (!AccGroups2)

878 return AccGroups1;

879 if (AccGroups1 == AccGroups2)

880 return AccGroups1;

881

885

886 if (Union.size() == 0)

887 return nullptr;

888 if (Union.size() == 1)

889 return cast(Union.front());

890

892 return MDNode::get(Ctx, Union.getArrayRef());

893}

894

899

900 if (!MayAccessMem1 && !MayAccessMem2)

901 return nullptr;

902 if (!MayAccessMem1)

903 return Inst2->getMetadata(LLVMContext::MD_access_group);

904 if (!MayAccessMem2)

905 return Inst1->getMetadata(LLVMContext::MD_access_group);

906

909 if (!MD1 || !MD2)

910 return nullptr;

911 if (MD1 == MD2)

912 return MD1;

913

914

917

921 if (AccGroupSet2.count(MD1))

923 } else {

925 auto *Item = cast(Node.get());

927 if (AccGroupSet2.count(Item))

929 }

930 }

931

932 if (Intersection.size() == 0)

933 return nullptr;

934 if (Intersection.size() == 1)

935 return cast(Intersection.front());

936

939}

940

941

944 return Inst;

945 Instruction *I0 = cast(VL[0]);

948

949 for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

950 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,

951 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,

952 LLVMContext::MD_access_group, LLVMContext::MD_mmra}) {

954 for (int J = 1, E = VL.size(); MD && J != E; ++J) {

955 const Instruction *IJ = cast(VL[J]);

957

958 switch (Kind) {

959 case LLVMContext::MD_mmra: {

961 break;

962 }

963 case LLVMContext::MD_tbaa:

965 break;

966 case LLVMContext::MD_alias_scope:

968 break;

969 case LLVMContext::MD_fpmath:

971 break;

972 case LLVMContext::MD_noalias:

973 case LLVMContext::MD_nontemporal:

974 case LLVMContext::MD_invariant_load:

976 break;

977 case LLVMContext::MD_access_group:

979 break;

980 default:

982 }

983 }

984

986 }

987

988 return Inst;

989}

990

994

996 return nullptr;

997

998

999 assert(!Group.isReverse() && "Reversed group not supported.");

1000

1002 for (unsigned i = 0; i < VF; i++)

1003 for (unsigned j = 0; j < Group.getFactor(); ++j) {

1004 unsigned HasMember = Group.getMember(j) ? 1 : 0;

1005 Mask.push_back(Builder.getInt1(HasMember));

1006 }

1007

1009}

1010

1014 for (unsigned i = 0; i < VF; i++)

1015 for (unsigned j = 0; j < ReplicationFactor; j++)

1017

1018 return MaskVec;

1019}

1020

1022 unsigned NumVecs) {

1024 for (unsigned i = 0; i < VF; i++)

1025 for (unsigned j = 0; j < NumVecs; j++)

1026 Mask.push_back(j * VF + i);

1027

1028 return Mask;

1029}

1030

1034 for (unsigned i = 0; i < VF; i++)

1035 Mask.push_back(Start + i * Stride);

1036

1037 return Mask;

1038}

1039

1041 unsigned NumInts,

1042 unsigned NumUndefs) {

1044 for (unsigned i = 0; i < NumInts; i++)

1045 Mask.push_back(Start + i);

1046

1047 for (unsigned i = 0; i < NumUndefs; i++)

1048 Mask.push_back(-1);

1049

1050 return Mask;

1051}

1052

1054 unsigned NumElts) {

1055

1056 int NumEltsSigned = NumElts;

1057 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");

1058

1059

1060

1062 for (int MaskElt : Mask) {

1063 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");

1064 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;

1066 }

1067 return UnaryMask;

1068}

1069

1070

1071

1072

1076 VectorType *VecTy2 = dyn_cast(V2->getType());

1077 assert(VecTy1 && VecTy2 &&

1078 VecTy1->getScalarType() == VecTy2->getScalarType() &&

1079 "Expect two vectors with the same element type");

1080

1081 unsigned NumElts1 = cast(VecTy1)->getNumElements();

1082 unsigned NumElts2 = cast(VecTy2)->getNumElements();

1083 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");

1084

1085 if (NumElts1 > NumElts2) {

1086

1089 }

1090

1093}

1094

1097 unsigned NumVecs = Vecs.size();

1098 assert(NumVecs > 1 && "Should be at least two vectors");

1099

1102 do {

1104 for (unsigned i = 0; i < NumVecs - 1; i += 2) {

1105 Value *V0 = ResList[i], *V1 = ResList[i + 1];

1106 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&

1107 "Only the last vector may have a different type");

1108

1110 }

1111

1112

1113 if (NumVecs % 2 != 0)

1114 TmpList.push_back(ResList[NumVecs - 1]);

1115

1116 ResList = TmpList;

1117 NumVecs = ResList.size();

1118 } while (NumVecs > 1);

1119

1120 return ResList[0];

1121}

1122

1124 assert(isa(Mask->getType()) &&

1125 isa(Mask->getType()->getScalarType()) &&

1126 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1127 1 &&

1128 "Mask must be a vector of i1");

1129

1130 auto *ConstMask = dyn_cast(Mask);

1131 if (!ConstMask)

1132 return false;

1133 if (ConstMask->isNullValue() || isa(ConstMask))

1134 return true;

1135 if (isa(ConstMask->getType()))

1136 return false;

1137 for (unsigned

1138 I = 0,

1139 E = cast(ConstMask->getType())->getNumElements();

1140 I != E; ++I) {

1141 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1142 if (MaskElt->isNullValue() || isa(MaskElt))

1143 continue;

1144 return false;

1145 }

1146 return true;

1147}

1148

1150 assert(isa(Mask->getType()) &&

1151 isa(Mask->getType()->getScalarType()) &&

1152 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1153 1 &&

1154 "Mask must be a vector of i1");

1155

1156 auto *ConstMask = dyn_cast(Mask);

1157 if (!ConstMask)

1158 return false;

1159 if (ConstMask->isAllOnesValue() || isa(ConstMask))

1160 return true;

1161 if (isa(ConstMask->getType()))

1162 return false;

1163 for (unsigned

1164 I = 0,

1165 E = cast(ConstMask->getType())->getNumElements();

1166 I != E; ++I) {

1167 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1168 if (MaskElt->isAllOnesValue() || isa(MaskElt))

1169 continue;

1170 return false;

1171 }

1172 return true;

1173}

1174

1176 assert(isa(Mask->getType()) &&

1177 isa(Mask->getType()->getScalarType()) &&

1178 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1179 1 &&

1180 "Mask must be a vector of i1");

1181

1182 auto *ConstMask = dyn_cast(Mask);

1183 if (!ConstMask)

1184 return false;

1185 if (ConstMask->isAllOnesValue() || isa(ConstMask))

1186 return true;

1187 if (isa(ConstMask->getType()))

1188 return false;

1189 for (unsigned

1190 I = 0,

1191 E = cast(ConstMask->getType())->getNumElements();

1192 I != E; ++I) {

1193 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1194 if (MaskElt->isAllOnesValue() || isa(MaskElt))

1195 return true;

1196 }

1197 return false;

1198}

1199

1200

1201

1203 assert(isa(Mask->getType()) &&

1204 isa(Mask->getType()->getScalarType()) &&

1205 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1206 1 &&

1207 "Mask must be a fixed width vector of i1");

1208

1209 const unsigned VWidth =

1210 cast(Mask->getType())->getNumElements();

1212 if (auto *CV = dyn_cast(Mask))

1213 for (unsigned i = 0; i < VWidth; i++)

1214 if (CV->getAggregateElement(i)->isNullValue())

1216 return DemandedElts;

1217}

1218

1219bool InterleavedAccessInfo::isStrided(int Stride) {

1220 unsigned Factor = std::abs(Stride);

1222}

1223

1224void InterleavedAccessInfo::collectConstStrideAccesses(

1228

1229

1230

1231

1232

1233

1234

1236 DFS.perform(LI);

1238 for (auto &I : *BB) {

1240 if (Ptr)

1241 continue;

1243

1244

1245

1247 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))

1248 continue;

1249

1250

1251

1252

1253

1254

1255

1256

1257 int64_t Stride =

1259 true, false).value_or(0);

1260

1262 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,

1264 }

1265}

1266

1267

1268

1269

1270

1271

1272

1273

1274

1275

1276

1277

1278

1279

1280

1281

1282

1283

1284

1285

1286

1287

1288

1289

1290

1291

1292

1293

1294

1295

1296

1297

1298

1299

1300

1301

1302

1304 bool EnablePredicatedInterleavedMemAccesses) {

1305 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");

1307

1308

1310 collectConstStrideAccesses(AccessStrideInfo, Strides);

1311

1312 if (AccessStrideInfo.empty())

1313 return;

1314

1315

1316 collectDependences();

1317

1318

1320

1322

1324

1325

1326

1327

1328

1329

1330

1331

1332

1333

1334

1335

1336

1337 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();

1338 BI != E; ++BI) {

1340 StrideDescriptor DesB = BI->second;

1341

1342

1343

1344

1346 if (isStrided(DesB.Stride) &&

1347 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {

1349 if (!GroupB) {

1350 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B

1351 << '\n');

1352 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);

1353 if (B->mayWriteToMemory())

1354 StoreGroups.insert(GroupB);

1355 else

1356 LoadGroups.insert(GroupB);

1357 }

1358 }

1359

1360 for (auto AI = std::next(BI); AI != E; ++AI) {

1362 StrideDescriptor DesA = AI->second;

1363

1364

1365

1366

1367

1368

1369

1370

1371

1372

1373

1374

1375

1376

1377

1378

1379

1380

1381

1382

1387 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(

1388 A, &*AccessStrideInfo.find(MemberOfGroupB)))

1389 return MemberOfGroupB;

1390 }

1391 return nullptr;

1392 };

1393

1395

1396

1397

1398

1399 if (A->mayWriteToMemory() && GroupA != GroupB) {

1401

1402

1403

1404

1405

1406

1407 if (GroupB && LoadGroups.contains(GroupB))

1408 DependentInst = DependentMember(GroupB, &*AI);

1409 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))

1410 DependentInst = B;

1411

1412 if (DependentInst) {

1413

1414

1415

1416

1417 if (GroupA && StoreGroups.contains(GroupA)) {

1418 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "

1419 "dependence between "

1420 << *A << " and " << *DependentInst << '\n');

1421 StoreGroups.remove(GroupA);

1422 releaseGroup(GroupA);

1423 }

1424

1425

1426

1427

1428 if (GroupB && LoadGroups.contains(GroupB)) {

1429 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B

1430 << " as complete.\n");

1431 CompletedLoadGroups.insert(GroupB);

1432 }

1433 }

1434 }

1435 if (CompletedLoadGroups.contains(GroupB)) {

1436

1437

1438 continue;

1439 }

1440

1441

1442

1443 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))

1444 continue;

1445

1446

1447

1448

1449

1450

1451

1453 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||

1454 (A->mayWriteToMemory() != B->mayWriteToMemory()))

1455 continue;

1456

1457

1458

1459 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)

1460 continue;

1461

1462

1463

1465 continue;

1466

1467

1468 const SCEVConstant *DistToB = dyn_cast(

1470 if (!DistToB)

1471 continue;

1473

1474

1475

1476 if (DistanceToB % static_cast<int64_t>(DesB.Size))

1477 continue;

1478

1479

1480

1483 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&

1484 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))

1485 continue;

1486

1487

1488

1489 int IndexA =

1490 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

1491

1492

1493 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {

1495 << " into the interleave group with" << *B

1496 << '\n');

1497 InterleaveGroupMap[A] = GroupB;

1498

1499

1500 if (A->mayReadFromMemory())

1502 }

1503 }

1504 }

1505

1507 int Index,

1508 const char *FirstOrLast) -> bool {

1510 assert(Member && "Group member does not exist");

1513 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,

1514 false, true).value_or(0))

1515 return false;

1516 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "

1517 << FirstOrLast

1518 << " group member potentially pointer-wrapping.\n");

1519 releaseGroup(Group);

1520 return true;

1521 };

1522

1523

1524

1525

1526

1527

1528

1529

1530

1531

1532

1533

1534

1535

1536

1537 for (auto *Group : LoadGroups) {

1538

1539

1540

1542 continue;

1543

1544

1545

1546

1547

1548

1549 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

1550 continue;

1552 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");

1553 else {

1554

1555

1556

1557

1558

1561 dbgs() << "LV: Invalidate candidate interleaved group due to "

1562 "a reverse access with gaps.\n");

1563 releaseGroup(Group);

1564 continue;

1565 }

1567 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");

1568 RequiresScalarEpilogue = true;

1569 }

1570 }

1571

1572 for (auto *Group : StoreGroups) {

1573

1574

1575

1577 continue;

1578

1579

1580

1581

1582 if (!EnablePredicatedInterleavedMemAccesses) {

1584 dbgs() << "LV: Invalidate candidate interleaved store group due "

1585 "to gaps.\n");

1586 releaseGroup(Group);

1587 continue;

1588 }

1589

1590

1591

1592

1593

1594

1595

1596 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

1597 continue;

1598 for (int Index = Group->getFactor() - 1; Index > 0; Index--)

1600 InvalidateGroupIfMemberMayWrap(Group, Index, "last");

1601 break;

1602 }

1603 }

1604}

1605

1607

1608

1610 return;

1611

1612

1613

1614 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {

1615 if (!Group->requiresScalarEpilogue())

1616 return false;

1619 << "LV: Invalidate candidate interleaved group due to gaps that "

1620 "require a scalar epilogue (not allowed under optsize) and cannot "

1621 "be masked (not enabled). \n");

1622 releaseGroupWithoutRemovingFromSet(Group);

1623 return true;

1624 });

1625 assert(ReleasedGroup && "At least one group must be invalidated, as a "

1626 "scalar epilogue was required");

1627 (void)ReleasedGroup;

1628 RequiresScalarEpilogue = false;

1629}

1630

1631template

1633 llvm_unreachable("addMetadata can only be used for Instruction");

1634}

1635

1636namespace llvm {

1637template <>

1640 std::transform(Members.begin(), Members.end(), std::back_inserter(VL),

1641 [](std::pair<int, Instruction *> p) { return p.second; });

1643}

1644}

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

DenseMap< Block *, BlockRelaxAux > Blocks

Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

This file provides utility for Memory Model Relaxation Annotations (MMRAs).

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file defines the SmallVector class.

static unsigned getScalarSizeInBits(Type *Ty)

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)

A helper function for concatenating vectors.

static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))

Maximum factor for an interleaved memory access.

static void addToAccessGroupList(ListT &List, MDNode *AccGroups)

Add all access groups in AccGroups to List.

Class for arbitrary precision integers.

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

unsigned getBitWidth() const

Return the number of bits in the APInt.

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

int64_t getSExtValue() const

Get sign extended value.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

const T & front() const

front - Get the first element.

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

const DataLayout & getDataLayout() const

Get the data layout of the module this basic block belongs to.

This class represents a function call, abstracting a target machine's calling convention.

static Constant * get(ArrayRef< Constant * > V)

This is an important base class in LLVM.

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...

const ElemTy & getOrInsertLeaderValue(const ElemTy &V)

getOrInsertLeaderValue - Return the leader for the specified value that is in the set.

member_iterator member_end() const

member_iterator member_begin(iterator I) const

member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)

union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...

Common base class shared among various IRBuilders.

ConstantInt * getInt1(bool V)

Get a constant value representing either true or false.

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

This instruction inserts a single (scalar) element into a VectorType value.

bool mayReadOrWriteMemory() const

Return true if this instruction may read or write memory.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const

This does the same thing as getAllMetadata, except that it filters out the debug location.

The group of interleaved loads/stores sharing the same stride and close to each other.

uint32_t getFactor() const

InstTy * getMember(uint32_t Index) const

Get the member with the given index Index.

uint32_t getIndex(const InstTy *Instr) const

Get the index for the given member.

void setInsertPos(InstTy *Inst)

void addMetadata(InstTy *NewInst) const

Add metadata (e.g.

bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)

Try to insert a new member Instr with index Index and alignment NewAlign.

uint32_t getNumMembers() const

InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const

Get the interleave group that Instr belongs to.

bool requiresScalarEpilogue() const

Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...

bool isInterleaved(Instruction *Instr) const

Check if Instr belongs to any interleave group.

void analyzeInterleaving(bool EnableMaskedInterleavedGroup)

Analyze the interleaved accesses and collect them in interleave groups.

void invalidateGroupsRequiringScalarEpilogue()

Invalidate groups that require a scalar epilogue (due to gaps).

This is an important class for using LLVM in a threaded context.

const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const

If an access has a symbolic strides, this maps the pointer value to the stride symbol.

BlockT * getHeader() const

Store the result of a depth first search within basic blocks contained by a single loop.

static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)

static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)

ArrayRef< MDOperand > operands() const

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)

unsigned getNumOperands() const

Return number of MDNode operands.

static MDNode * intersect(MDNode *A, MDNode *B)

LLVMContext & getContext() const

Tracking metadata reference owned by Metadata.

This class implements a map that also provides access to all stored values in a deterministic order.

iterator find(const KeyT &Key)

reverse_iterator rbegin()

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

static PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

ScalarEvolution * getSE() const

Returns the ScalarEvolution analysis used.

This class represents a constant integer value.

const APInt & getAPInt() const

This class represents an analyzed expression in the program.

const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)

Return LHS-RHS.

bool remove(const value_type &X)

Remove an item from the set vector.

bool insert(const value_type &X)

Insert a new element into the SetVector.

bool contains(const key_type &key) const

Check if the SetVector contains the given key.

This instruction constructs a fixed permutation of two input vectors.

int getMaskValue(unsigned Elt) const

Return the shuffle mask value of this instruction for the given element index.

VectorType * getType() const

Overload to return most specific vector type.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void assign(size_type NumElts, ValueParamT Elt)

void reserve(size_type N)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Provides information about what library functions are available for the current target.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

bool isTypeLegal(Type *Ty) const

Return true if this type is legal.

bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const

Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...

bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const

bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const

Identifies if the vector form of the intrinsic has a scalar operand.

bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const

Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...

The instances of the Type class are immutable: once they are created, they are never changed.

unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

A Use represents the edge between a Value definition and its users.

Value * getOperand(unsigned i) const

static bool isVPCast(Intrinsic::ID ID)

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVMContext & getContext() const

All values hold a context through their type.

Base class of all SIMD vector types.

Type * getElementType() const

An efficient, type-erasing, non-owning reference to a callable.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

@ C

The default llvm calling convention, compatible with C.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

bool isTargetIntrinsic(ID IID)

isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

bool match(Val *V, const Pattern &P)

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

cst_pred_ty< is_zero_int > m_ZeroInt()

Match an integer 0 or a vector with all elements equal to 0.

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)

Identify if the intrinsic is trivially scalarizable.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

APInt possiblyDemandedEltsInMask(Value *Mask)

Given a mask vector of the form , return an APInt (of bitwidth Y) for each lane which may be ...

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)

Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

int bit_width(T Value)

Returns the number of bits needed to represent Value if Value is nonzero.

Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)

Concatenate a list of vectors.

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...

Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

T bit_ceil(T Value)

Returns the smallest integral power of two no smaller than Value if Value is nonzero.

MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)

Compute the access-group list of access groups that Inst1 and Inst2 are both in.

unsigned M1(unsigned Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)

Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...

bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)

Create a mask that filters the members of an interleave group where there are gaps.

constexpr unsigned MaxAnalysisRecursionDepth

llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)

Create a stride shuffle mask.

void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)

Compute the demanded elements mask of horizontal binary operations.

llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)

Create a mask with replicated elements.

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

bool maskIsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

constexpr int PoisonMaskElem

bool isValidAsAccessGroup(MDNode *AccGroup)

Return whether an MDNode might represent an access group.

Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)

Map a call instruction to an intrinsic ID.

bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...

void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...

llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)

Create an interleave shuffle mask.

bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)

Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...

Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)

Compute the union of two access-group lists.

unsigned M0(unsigned Val)

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

bool maskIsAllZeroOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)

Splits and processes shuffle mask depending on the number of input and output registers.

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

bool maskContainsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...

bool isTriviallyVectorizable(Intrinsic::ID ID)

Identify if the intrinsic is trivially vectorizable.

llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)

Create a sequential shuffle mask.

bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...

MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)

Compute a map of integer instructions to their minimum legal type size.

bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.

int getSplatIndex(ArrayRef< int > Mask)

If all non-negative Mask elements are the same value, return that value.

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.