LLVM: lib/Analysis/VectorUtils.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

30

31#define DEBUG_TYPE "vectorutils"

32

33using namespace llvm;

35

36

38 "max-interleave-group-factor", cl::Hidden,

39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),

41

42

43

44

45

47 switch (ID) {

48 case Intrinsic::abs:

49 case Intrinsic::bswap:

50 case Intrinsic::bitreverse:

51 case Intrinsic::ctpop:

52 case Intrinsic::ctlz:

53 case Intrinsic::cttz:

54 case Intrinsic::fshl:

55 case Intrinsic::fshr:

56 case Intrinsic::smax:

57 case Intrinsic::smin:

58 case Intrinsic::umax:

59 case Intrinsic::umin:

60 case Intrinsic::sadd_sat:

61 case Intrinsic::ssub_sat:

62 case Intrinsic::uadd_sat:

63 case Intrinsic::usub_sat:

64 case Intrinsic::smul_fix:

65 case Intrinsic::smul_fix_sat:

66 case Intrinsic::umul_fix:

67 case Intrinsic::umul_fix_sat:

68 case Intrinsic::sqrt:

69 case Intrinsic::asin:

70 case Intrinsic::acos:

71 case Intrinsic::atan:

72 case Intrinsic::atan2:

73 case Intrinsic::sin:

74 case Intrinsic::cos:

75 case Intrinsic::sincos:

76 case Intrinsic::sincospi:

77 case Intrinsic::tan:

78 case Intrinsic::sinh:

79 case Intrinsic::cosh:

80 case Intrinsic::tanh:

81 case Intrinsic::exp:

82 case Intrinsic::exp10:

83 case Intrinsic::exp2:

84 case Intrinsic::ldexp:

85 case Intrinsic:🪵

86 case Intrinsic::log10:

87 case Intrinsic::log2:

88 case Intrinsic::fabs:

89 case Intrinsic::minnum:

90 case Intrinsic::maxnum:

91 case Intrinsic::minimum:

92 case Intrinsic::maximum:

93 case Intrinsic::minimumnum:

94 case Intrinsic::maximumnum:

95 case Intrinsic::modf:

96 case Intrinsic::copysign:

97 case Intrinsic:🤣

98 case Intrinsic::ceil:

99 case Intrinsic::trunc:

100 case Intrinsic::rint:

101 case Intrinsic::nearbyint:

102 case Intrinsic::round:

103 case Intrinsic::roundeven:

104 case Intrinsic::pow:

105 case Intrinsic::fma:

106 case Intrinsic::fmuladd:

107 case Intrinsic::is_fpclass:

108 case Intrinsic::powi:

109 case Intrinsic::canonicalize:

110 case Intrinsic::fptosi_sat:

111 case Intrinsic::fptoui_sat:

112 case Intrinsic::lround:

113 case Intrinsic::llround:

114 case Intrinsic::lrint:

115 case Intrinsic::llrint:

116 case Intrinsic::ucmp:

117 case Intrinsic::scmp:

118 return true;

119 default:

120 return false;

121 }

122}

123

127 return true;

128

130 return TTI->isTargetIntrinsicTriviallyScalarizable(ID);

131

132

133

134 switch (ID) {

135 case Intrinsic::frexp:

136 case Intrinsic::uadd_with_overflow:

137 case Intrinsic::sadd_with_overflow:

138 case Intrinsic::ssub_with_overflow:

139 case Intrinsic::usub_with_overflow:

140 case Intrinsic::umul_with_overflow:

141 case Intrinsic::smul_with_overflow:

142 return true;

143 }

144 return false;

145}

146

147

149 unsigned ScalarOpdIdx,

151

153 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);

154

155

157 return true;

158

159 switch (ID) {

160 case Intrinsic::abs:

161 case Intrinsic::vp_abs:

162 case Intrinsic::ctlz:

163 case Intrinsic::vp_ctlz:

164 case Intrinsic::cttz:

165 case Intrinsic::vp_cttz:

166 case Intrinsic::is_fpclass:

167 case Intrinsic::vp_is_fpclass:

168 case Intrinsic::powi:

169 case Intrinsic::vector_extract:

170 return (ScalarOpdIdx == 1);

171 case Intrinsic::smul_fix:

172 case Intrinsic::smul_fix_sat:

173 case Intrinsic::umul_fix:

174 case Intrinsic::umul_fix_sat:

175 return (ScalarOpdIdx == 2);

176 case Intrinsic::experimental_vp_splice:

177 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;

178 default:

179 return false;

180 }

181}

182

186

188 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);

189

191 return OpdIdx == -1 || OpdIdx == 0;

192

193 switch (ID) {

194 case Intrinsic::fptosi_sat:

195 case Intrinsic::fptoui_sat:

196 case Intrinsic::lround:

197 case Intrinsic::llround:

198 case Intrinsic::lrint:

199 case Intrinsic::llrint:

200 case Intrinsic::vp_lrint:

201 case Intrinsic::vp_llrint:

202 case Intrinsic::ucmp:

203 case Intrinsic::scmp:

204 case Intrinsic::vector_extract:

205 return OpdIdx == -1 || OpdIdx == 0;

206 case Intrinsic::modf:

207 case Intrinsic::sincos:

208 case Intrinsic::sincospi:

209 case Intrinsic::is_fpclass:

210 case Intrinsic::vp_is_fpclass:

211 return OpdIdx == 0;

212 case Intrinsic::powi:

213 case Intrinsic::ldexp:

214 return OpdIdx == -1 || OpdIdx == 1;

215 default:

216 return OpdIdx == -1;

217 }

218}

219

222

224 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);

225

226 switch (ID) {

227 case Intrinsic::frexp:

228 return RetIdx == 0 || RetIdx == 1;

229 default:

230 return RetIdx == 0;

231 }

232}

233

234

235

236

242

244 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||

245 ID == Intrinsic::experimental_noalias_scope_decl ||

246 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)

247 return ID;

249}

250

252 switch (ID) {

253 case Intrinsic::vector_interleave2:

254 return 2;

255 case Intrinsic::vector_interleave3:

256 return 3;

257 case Intrinsic::vector_interleave4:

258 return 4;

259 case Intrinsic::vector_interleave5:

260 return 5;

261 case Intrinsic::vector_interleave6:

262 return 6;

263 case Intrinsic::vector_interleave7:

264 return 7;

265 case Intrinsic::vector_interleave8:

266 return 8;

267 default:

268 return 0;

269 }

270}

271

273 switch (ID) {

274 case Intrinsic::vector_deinterleave2:

275 return 2;

276 case Intrinsic::vector_deinterleave3:

277 return 3;

278 case Intrinsic::vector_deinterleave4:

279 return 4;

280 case Intrinsic::vector_deinterleave5:

281 return 5;

282 case Intrinsic::vector_deinterleave6:

283 return 6;

284 case Intrinsic::vector_deinterleave7:

285 return 7;

286 case Intrinsic::vector_deinterleave8:

287 return 8;

288 default:

289 return 0;

290 }

291}

292

294 [[maybe_unused]] unsigned Factor =

297 assert(Factor && Factor == DISubtypes.size() &&

298 "unexpected deinterleave factor or result type");

300}

301

302

303

304

306 assert(V->getType()->isVectorTy() && "Not looking at a vector?");

308

310 unsigned Width = FVTy->getNumElements();

311 if (EltNo >= Width)

313 }

314

316 return C->getAggregateElement(EltNo);

317

319

322 return nullptr;

323

324

325

326 if (EltNo == IIElt)

327 return III->getOperand(1);

328

329

330 if (III == III->getOperand(0))

331 return nullptr;

332

333

334

336 }

337

339

341 unsigned LHSWidth =

344 if (InEl < 0)

346 if (InEl < (int)LHSWidth)

349 }

350

351

352

355 if (Constant *Elt = C->getAggregateElement(EltNo))

356 if (Elt->isNullValue())

358

359

362 if (EltNo < VTy->getElementCount().getKnownMinValue())

364

365

366 return nullptr;

367}

368

370 int SplatIndex = -1;

371 for (int M : Mask) {

372

373 if (M < 0)

374 continue;

375

376

377 if (SplatIndex != -1 && SplatIndex != M)

378 return -1;

379

380

381 SplatIndex = M;

382 }

383 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");

384 return SplatIndex;

385}

386

387

388

389

390

394 return C->getSplatValue();

395

396

402

403 return nullptr;

404}

405

408

411 return true;

412

413

415 return C->getSplatValue() != nullptr;

416 }

417

419

420

421 if (all\_equal(Shuf->getShuffleMask()))

422 return false;

423

424

425 if (Index == -1)

426 return true;

427

428

429

430 return Shuf->getMaskValue(Index) == Index;

431 }

432

433

435 return false;

436

437

441

442

446

447

448

449 return false;

450}

451

453 const APInt &DemandedElts, APInt &DemandedLHS,

454 APInt &DemandedRHS, bool AllowUndefElts) {

455 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);

456

457

458 if (DemandedElts.isZero())

459 return true;

460

461

462 if (all_of(Mask, [](int Elt) { return Elt == 0; })) {

463 DemandedLHS.setBit(0);

464 return true;

465 }

466

467 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {

468 int M = Mask[I];

469 assert((-1 <= M) && (M < (SrcWidth * 2)) &&

470 "Invalid shuffle mask constant");

471

472 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))

473 continue;

474

475

476

477 if (M < 0)

478 return false;

479

480 if (M < SrcWidth)

481 DemandedLHS.setBit(M);

482 else

483 DemandedRHS.setBit(M - SrcWidth);

484 }

485

486 return true;

487}

488

490 std::array<std::pair<int, int>, 2> &SrcInfo) {

491 const int SignalValue = NumElts * 2;

492 SrcInfo[0] = {-1, SignalValue};

493 SrcInfo[1] = {-1, SignalValue};

494 for (auto [i, M] : enumerate(Mask)) {

495 if (M < 0)

496 continue;

497 int Src = M >= NumElts;

498 int Diff = (int)i - (M % NumElts);

499 bool Match = false;

500 for (int j = 0; j < 2; j++) {

501 auto &[SrcE, DiffE] = SrcInfo[j];

502 if (SrcE == -1) {

503 assert(DiffE == SignalValue);

504 SrcE = Src;

505 DiffE = Diff;

506 }

507 if (SrcE == Src && DiffE == Diff) {

508 Match = true;

509 break;

510 }

511 }

512 if (!Match)

513 return false;

514 }

515

516 return SrcInfo[0].first != -1;

517}

518

521 assert(Scale > 0 && "Unexpected scaling factor");

522

523

524 if (Scale == 1) {

525 ScaledMask.assign(Mask.begin(), Mask.end());

526 return;

527 }

528

529 ScaledMask.clear();

530 for (int MaskElt : Mask) {

531 if (MaskElt >= 0) {

532 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&

533 "Overflowed 32-bits");

534 }

535 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)

536 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);

537 }

538}

539

542 assert(Scale > 0 && "Unexpected scaling factor");

543

544

545 if (Scale == 1) {

546 ScaledMask.assign(Mask.begin(), Mask.end());

547 return true;

548 }

549

550

551 int NumElts = Mask.size();

552 if (NumElts % Scale != 0)

553 return false;

554

555 ScaledMask.clear();

556 ScaledMask.reserve(NumElts / Scale);

557

558

559 do {

560 ArrayRef MaskSlice = Mask.take_front(Scale);

561 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");

562

563

564 int SliceFront = MaskSlice.front();

565 if (SliceFront < 0) {

566

567

569 return false;

570 ScaledMask.push_back(SliceFront);

571 } else {

572

573 if (SliceFront % Scale != 0)

574 return false;

575

576 for (int i = 1; i < Scale; ++i)

577 if (MaskSlice[i] != SliceFront + i)

578 return false;

579 ScaledMask.push_back(SliceFront / Scale);

580 }

581 Mask = Mask.drop_front(Scale);

582 } while (!Mask.empty());

583

584 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");

585

586

587

588 return true;

589}

590

593 unsigned NumElts = M.size();

594 if (NumElts % 2 != 0)

595 return false;

596

598 for (unsigned i = 0; i < NumElts; i += 2) {

599 int M0 = M[i];

600 int M1 = M[i + 1];

601

602

603 if (M0 == -1 && M1 == -1) {

605 continue;

606 }

607

608 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {

610 continue;

611 }

612

613 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {

615 continue;

616 }

617

619 return false;

620 }

621

622 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");

623 return true;

624}

625

628 unsigned NumSrcElts = Mask.size();

629 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");

630

631

632 if (NumSrcElts == NumDstElts) {

633 ScaledMask.assign(Mask.begin(), Mask.end());

634 return true;

635 }

636

637

638 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&

639 "Unexpected scaling factor");

640

641 if (NumSrcElts > NumDstElts) {

642 int Scale = NumSrcElts / NumDstElts;

644 }

645

646 int Scale = NumDstElts / NumSrcElts;

648 return true;

649}

650

653 std::array<SmallVector<int, 16>, 2> TmpMasks;

656 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {

658 InputMask = *Output;

660 }

661 }

662 ScaledMask.assign(InputMask.begin(), InputMask.end());

663}

664

666 ArrayRef Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,

667 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,

670 ManyInputsAction) {

672

673

674

675

676 int Sz = Mask.size();

677 unsigned SzDest = Sz / NumOfDestRegs;

678 unsigned SzSrc = Sz / NumOfSrcRegs;

679 for (unsigned I = 0; I < NumOfDestRegs; ++I) {

680 auto &RegMasks = Res[I];

681 RegMasks.assign(2 * NumOfSrcRegs, {});

682

683

684 for (unsigned K = 0; K < SzDest; ++K) {

685 int Idx = I * SzDest + K;

686 if (Idx == Sz)

687 break;

688 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)

689 continue;

690 int MaskIdx = Mask[Idx] % Sz;

691 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);

692

693

694 if (RegMasks[SrcRegIdx].empty())

696 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;

697 }

698 }

699

701 auto &Dest = Res[I];

702 int NumSrcRegs =

704 switch (NumSrcRegs) {

705 case 0:

706

707 NoInputAction();

708 break;

709 case 1: {

710

711 auto *It =

713 unsigned SrcReg = std::distance(Dest.begin(), It);

714 SingleInputAction(*It, SrcReg, I);

715 break;

716 }

717 default: {

718

719

720

721

722

723

726 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {

729 "Expected undefined mask element.");

730 FirstMask[Idx] = SecondMask[Idx] + VF;

731 }

732 }

733 };

735 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {

737 Mask[Idx] = Idx;

738 }

739 };

740 int SecondIdx;

741 bool NewReg = true;

742 do {

743 int FirstIdx = -1;

744 SecondIdx = -1;

748 if (RegMask.empty())

749 continue;

750

751 if (FirstIdx == SecondIdx) {

752 FirstIdx = I;

753 FirstMask = RegMask;

754 continue;

755 }

756 SecondIdx = I;

757 SecondMask = RegMask;

758 CombineMasks(FirstMask, SecondMask);

759 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);

760 NewReg = false;

761 NormalizeMask(FirstMask);

763 SecondMask = FirstMask;

764 SecondIdx = FirstIdx;

765 }

766 if (FirstIdx != SecondIdx && SecondIdx >= 0) {

767 CombineMasks(SecondMask, FirstMask);

768 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);

769 NewReg = false;

770 Dest[FirstIdx].clear();

771 NormalizeMask(SecondMask);

772 }

773 } while (SecondIdx >= 0);

774 break;

775 }

776 }

777 }

778}

779

781 const APInt &DemandedElts,

782 APInt &DemandedLHS,

783 APInt &DemandedRHS) {

784 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");

785 int NumLanes = VectorBitWidth / 128;

786 int NumElts = DemandedElts.getBitWidth();

787 int NumEltsPerLane = NumElts / NumLanes;

788 int HalfEltsPerLane = NumEltsPerLane / 2;

789

792

793

794 for (int Idx = 0; Idx != NumElts; ++Idx) {

795 if (!DemandedElts[Idx])

796 continue;

797 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;

798 int LocalIdx = Idx % NumEltsPerLane;

799 if (LocalIdx < HalfEltsPerLane) {

800 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);

801 } else {

802 LocalIdx -= HalfEltsPerLane;

803 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);

804 }

805 }

806}

807

811

812

813

814

822

823

824 bool SeenExtFromIllegalType = false;

825 for (auto *BB : Blocks)

826 for (auto &I : *BB) {

827 InstructionSet.insert(&I);

828

830 TTI->isTypeLegal(I.getOperand(0)->getType()))

831 SeenExtFromIllegalType = true;

832

833

835 I.getType()->isVectorTy() &&

836 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {

837

838

840 continue;

841

844 }

845 }

846

847 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))

848 return MinBWs;

849

850

851 while (!Worklist.empty()) {

854

855 if (!Visited.insert(I).second)

856 continue;

857

858

859

860 if (DB.getDemandedBits(I).getBitWidth() > 64)

862

863 uint64_t V = DB.getDemandedBits(I).getZExtValue();

864 DBits[Leader] |= V;

865 DBits[I] = V;

866

867

868

870 !InstructionSet.count(I))

871 continue;

872

873

874

875

877 I->getType()->isIntegerTy()) {

878 DBits[Leader] |= ~0ULL;

879 continue;

880 }

881

882

883

884

886 continue;

887

888

889

891 continue;

892

893 if (DBits[Leader] == ~0ULL)

894

895 continue;

896

897 for (Value *O : I->operands()) {

901 }

902 }

903

904

905

906

907 for (auto &I : DBits)

908 for (auto *U : I.first->users())

909 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)

911

912 for (const auto &E : ECs) {

913 if (!E->isLeader())

914 continue;

915 uint64_t LeaderDemandedBits = 0;

917 LeaderDemandedBits |= DBits[M];

918

920

922

923

924

925

926

927 bool Abort = false;

930 Abort = true;

931 break;

932 }

933 if (Abort)

934 continue;

935

938 if (MI)

939 continue;

940 Type *Ty = M->getType();

942 Ty = MI->getOperand(0)->getType();

943

944 if (MinBW >= Ty->getScalarSizeInBits())

945 continue;

946

947

948

953

954

955 if (CI &&

957 U.getOperandNo() == 1)

958 return CI->uge(MinBW);

961 }))

962 continue;

963

964 MinBWs[MI] = MinBW;

965 }

966 }

967

968 return MinBWs;

969}

970

971

972template

974

977 List.insert(AccGroups);

978 return;

979 }

980

981 for (const auto &AccGroupListOp : AccGroups->operands()) {

982 auto *Item = cast(AccGroupListOp.get());

984 List.insert(Item);

985 }

986}

987

989 if (!AccGroups1)

990 return AccGroups2;

991 if (!AccGroups2)

992 return AccGroups1;

993 if (AccGroups1 == AccGroups2)

994 return AccGroups1;

995

999

1000 if (Union.size() == 0)

1001 return nullptr;

1002 if (Union.size() == 1)

1004

1006 return MDNode::get(Ctx, Union.getArrayRef());

1007}

1008

1013

1014 if (!MayAccessMem1 && !MayAccessMem2)

1015 return nullptr;

1016 if (!MayAccessMem1)

1017 return Inst2->getMetadata(LLVMContext::MD_access_group);

1018 if (!MayAccessMem2)

1019 return Inst1->getMetadata(LLVMContext::MD_access_group);

1020

1023 if (!MD1 || !MD2)

1024 return nullptr;

1025 if (MD1 == MD2)

1026 return MD1;

1027

1028

1031

1035 if (AccGroupSet2.count(MD1))

1037 } else {

1041 if (AccGroupSet2.count(Item))

1043 }

1044 }

1045

1046 if (Intersection.size() == 0)

1047 return nullptr;

1048 if (Intersection.size() == 1)

1050

1053}

1054

1055

1056

1061 static const unsigned SupportedIDs[] = {

1062 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,

1063 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,

1064 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,

1065 LLVMContext::MD_access_group, LLVMContext::MD_mmra};

1066

1067

1068 for (unsigned Idx = 0; Idx != Metadata.size();) {

1070 ++Idx;

1071 } else {

1072

1075 }

1076 }

1077}

1078

1079

1082 return Inst;

1085

1086 for (auto &[Kind, MD] : Metadata) {

1087 for (int J = 1, E = VL.size(); MD && J != E; ++J) {

1090

1091 switch (Kind) {

1092 case LLVMContext::MD_mmra: {

1094 break;

1095 }

1096 case LLVMContext::MD_tbaa:

1098 break;

1099 case LLVMContext::MD_alias_scope:

1101 break;

1102 case LLVMContext::MD_fpmath:

1104 break;

1105 case LLVMContext::MD_noalias:

1106 case LLVMContext::MD_nontemporal:

1107 case LLVMContext::MD_invariant_load:

1109 break;

1110 case LLVMContext::MD_access_group:

1112 break;

1113 default:

1115 }

1116 }

1117

1119 }

1120

1121 return Inst;

1122}

1123

1127

1129 return nullptr;

1130

1131

1132 assert(!Group.isReverse() && "Reversed group not supported.");

1133

1135 for (unsigned i = 0; i < VF; i++)

1136 for (unsigned j = 0; j < Group.getFactor(); ++j) {

1137 unsigned HasMember = Group.getMember(j) ? 1 : 0;

1138 Mask.push_back(Builder.getInt1(HasMember));

1139 }

1140

1142}

1143

1147 for (unsigned i = 0; i < VF; i++)

1148 for (unsigned j = 0; j < ReplicationFactor; j++)

1150

1151 return MaskVec;

1152}

1153

1155 unsigned NumVecs) {

1157 for (unsigned i = 0; i < VF; i++)

1158 for (unsigned j = 0; j < NumVecs; j++)

1159 Mask.push_back(j * VF + i);

1160

1161 return Mask;

1162}

1163

1167 for (unsigned i = 0; i < VF; i++)

1168 Mask.push_back(Start + i * Stride);

1169

1170 return Mask;

1171}

1172

1174 unsigned NumInts,

1175 unsigned NumUndefs) {

1177 for (unsigned i = 0; i < NumInts; i++)

1178 Mask.push_back(Start + i);

1179

1180 for (unsigned i = 0; i < NumUndefs; i++)

1181 Mask.push_back(-1);

1182

1183 return Mask;

1184}

1185

1187 unsigned NumElts) {

1188

1189 int NumEltsSigned = NumElts;

1190 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");

1191

1192

1193

1195 for (int MaskElt : Mask) {

1196 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");

1197 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;

1199 }

1200 return UnaryMask;

1201}

1202

1203

1204

1205

1210 assert(VecTy1 && VecTy2 &&

1211 VecTy1->getScalarType() == VecTy2->getScalarType() &&

1212 "Expect two vectors with the same element type");

1213

1216 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");

1217

1218 if (NumElts1 > NumElts2) {

1219

1220 V2 = Builder.CreateShuffleVector(

1222 }

1223

1224 return Builder.CreateShuffleVector(

1226}

1227

1230 unsigned NumVecs = Vecs.size();

1231 assert(NumVecs > 1 && "Should be at least two vectors");

1232

1235 do {

1237 for (unsigned i = 0; i < NumVecs - 1; i += 2) {

1238 Value *V0 = ResList[i], *V1 = ResList[i + 1];

1239 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&

1240 "Only the last vector may have a different type");

1241

1243 }

1244

1245

1246 if (NumVecs % 2 != 0)

1247 TmpList.push_back(ResList[NumVecs - 1]);

1248

1249 ResList = TmpList;

1250 NumVecs = ResList.size();

1251 } while (NumVecs > 1);

1252

1253 return ResList[0];

1254}

1255

1259 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1260 1 &&

1261 "Mask must be a vector of i1");

1262

1264 if (!ConstMask)

1265 return false;

1266 if (ConstMask->isNullValue() || isa(ConstMask))

1267 return true;

1269 return false;

1270 for (unsigned

1271 I = 0,

1273 I != E; ++I) {

1274 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1276 continue;

1277 return false;

1278 }

1279 return true;

1280}

1281

1285 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1286 1 &&

1287 "Mask must be a vector of i1");

1288

1290 if (!ConstMask)

1291 return false;

1292 if (ConstMask->isAllOnesValue() || isa(ConstMask))

1293 return true;

1295 return false;

1296 for (unsigned

1297 I = 0,

1299 I != E; ++I) {

1300 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1301 if (MaskElt->isAllOnesValue() || isa(MaskElt))

1302 continue;

1303 return false;

1304 }

1305 return true;

1306}

1307

1311 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1312 1 &&

1313 "Mask must be a vector of i1");

1314

1316 if (!ConstMask)

1317 return false;

1318 if (ConstMask->isAllOnesValue() || isa(ConstMask))

1319 return true;

1321 return false;

1322 for (unsigned

1323 I = 0,

1325 I != E; ++I) {

1326 if (auto *MaskElt = ConstMask->getAggregateElement(I))

1327 if (MaskElt->isAllOnesValue() || isa(MaskElt))

1328 return true;

1329 }

1330 return false;

1331}

1332

1333

1334

1338 cast(Mask->getType()->getScalarType())->getBitWidth() ==

1339 1 &&

1340 "Mask must be a fixed width vector of i1");

1341

1342 const unsigned VWidth =

1346 for (unsigned i = 0; i < VWidth; i++)

1347 if (CV->getAggregateElement(i)->isNullValue())

1349 return DemandedElts;

1350}

1351

1352bool InterleavedAccessInfo::isStrided(int Stride) {

1353 unsigned Factor = std::abs(Stride);

1355}

1356

1357void InterleavedAccessInfo::collectConstStrideAccesses(

1360 auto &DL = TheLoop->getHeader()->getDataLayout();

1361

1362

1363

1364

1365

1366

1367

1368 LoopBlocksDFS DFS(TheLoop);

1369 DFS.perform(LI);

1370 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))

1371 for (auto &I : *BB) {

1373 if (!Ptr)

1374 continue;

1376

1377

1378

1379 uint64_t Size = DL.getTypeAllocSize(ElementTy);

1380 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))

1381 continue;

1382

1383

1384

1385

1386

1387

1388

1389

1390 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,

1391 true, false)

1392 .value_or(0);

1393

1395 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,

1397 }

1398}

1399

1400

1401

1402

1403

1404

1405

1406

1407

1408

1409

1410

1411

1412

1413

1414

1415

1416

1417

1418

1419

1420

1421

1422

1423

1424

1425

1426

1427

1428

1429

1430

1431

1432

1433

1434

1435

1437 bool EnablePredicatedInterleavedMemAccesses) {

1438 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");

1439 const auto &Strides = LAI->getSymbolicStrides();

1440

1441

1443 collectConstStrideAccesses(AccessStrideInfo, Strides);

1444

1445 if (AccessStrideInfo.empty())

1446 return;

1447

1448

1449 collectDependences();

1450

1451

1453

1455

1457

1458

1459

1460

1461

1462

1463

1464

1465

1466

1467

1468

1469

1470 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();

1471 BI != E; ++BI) {

1473 StrideDescriptor DesB = BI->second;

1474

1475

1476

1477

1479 if (isStrided(DesB.Stride) &&

1480 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {

1482 if (!GroupB) {

1483 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B

1484 << '\n');

1485 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);

1486 if (B->mayWriteToMemory())

1487 StoreGroups.insert(GroupB);

1488 else

1489 LoadGroups.insert(GroupB);

1490 }

1491 }

1492

1493 for (auto AI = std::next(BI); AI != E; ++AI) {

1495 StrideDescriptor DesA = AI->second;

1496

1497

1498

1499

1500

1501

1502

1503

1504

1505

1506

1507

1508

1509

1510

1511

1512

1513

1514

1515

1520 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(

1521 A, &*AccessStrideInfo.find(MemberOfGroupB)))

1522 return MemberOfGroupB;

1523 }

1524 return nullptr;

1525 };

1526

1528

1529

1530

1531

1532 if (A->mayWriteToMemory() && GroupA != GroupB) {

1534

1535

1536

1537

1538

1539

1540 if (GroupB && LoadGroups.contains(GroupB))

1541 DependentInst = DependentMember(GroupB, &*AI);

1542 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))

1543 DependentInst = B;

1544

1545 if (DependentInst) {

1546

1547

1548

1549

1550 if (GroupA && StoreGroups.contains(GroupA)) {

1551 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "

1552 "dependence between "

1553 << *A << " and " << *DependentInst << '\n');

1554 StoreGroups.remove(GroupA);

1555 releaseGroup(GroupA);

1556 }

1557

1558

1559

1560

1561 if (GroupB && LoadGroups.contains(GroupB)) {

1562 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B

1563 << " as complete.\n");

1564 CompletedLoadGroups.insert(GroupB);

1565 }

1566 }

1567 }

1568 if (CompletedLoadGroups.contains(GroupB)) {

1569

1570

1571 continue;

1572 }

1573

1574

1575

1576 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))

1577 continue;

1578

1579

1580

1581

1582

1583

1584

1586 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||

1587 (A->mayWriteToMemory() != B->mayWriteToMemory()))

1588 continue;

1589

1590

1591

1592 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)

1593 continue;

1594

1595

1596

1598 continue;

1599

1600

1602 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));

1603 if (!DistToB)

1604 continue;

1606

1607

1608

1609 if (DistanceToB % static_cast<int64_t>(DesB.Size))

1610 continue;

1611

1612

1613

1616 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&

1617 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))

1618 continue;

1619

1620

1621

1622 int IndexA =

1623 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

1624

1625

1626 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {

1628 << " into the interleave group with" << *B

1629 << '\n');

1630 InterleaveGroupMap[A] = GroupB;

1631

1632

1633 if (A->mayReadFromMemory())

1635 }

1636 }

1637 }

1638

1640 int Index,

1641 const char *FirstOrLast) -> bool {

1643 assert(Member && "Group member does not exist");

1646 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,

1647 false, true)

1648 .value_or(0))

1649 return false;

1650 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "

1651 << FirstOrLast

1652 << " group member potentially pointer-wrapping.\n");

1653 releaseGroup(Group);

1654 return true;

1655 };

1656

1657

1658

1659

1660

1661

1662

1663

1664

1665

1666

1667

1668

1669

1670

1671 for (auto *Group : LoadGroups) {

1672

1673

1674

1675 if (Group->isFull())

1676 continue;

1677

1678

1679

1680

1681

1682

1683 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

1684 continue;

1686 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");

1687 else {

1688

1689

1690

1691

1692

1695 dbgs() << "LV: Invalidate candidate interleaved group due to "

1696 "a reverse access with gaps.\n");

1697 releaseGroup(Group);

1698 continue;

1699 }

1701 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");

1702 RequiresScalarEpilogue = true;

1703 }

1704 }

1705

1706 for (auto *Group : StoreGroups) {

1707

1708

1709

1710 if (Group->isFull())

1711 continue;

1712

1713

1714

1715

1716 if (!EnablePredicatedInterleavedMemAccesses) {

1718 dbgs() << "LV: Invalidate candidate interleaved store group due "

1719 "to gaps.\n");

1720 releaseGroup(Group);

1721 continue;

1722 }

1723

1724

1725

1726

1727

1728

1729

1730 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))

1731 continue;

1732 for (int Index = Group->getFactor() - 1; Index > 0; Index--)

1734 InvalidateGroupIfMemberMayWrap(Group, Index, "last");

1735 break;

1736 }

1737 }

1738}

1739

1741

1742

1744 return;

1745

1746

1747

1748 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {

1749 if (!Group->requiresScalarEpilogue())

1750 return false;

1753 << "LV: Invalidate candidate interleaved group due to gaps that "

1754 "require a scalar epilogue (not allowed under optsize) and cannot "

1755 "be masked (not enabled). \n");

1756 releaseGroupWithoutRemovingFromSet(Group);

1757 return true;

1758 });

1759 assert(ReleasedGroup && "At least one group must be invalidated, as a "

1760 "scalar epilogue was required");

1761 (void)ReleasedGroup;

1762 RequiresScalarEpilogue = false;

1763}

1764

1765template

1769

1770namespace llvm {

1771template <>

1776}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

This file contains the declarations for the subclasses of Constant, which represent the different fla...

Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...

const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]

This file provides utility for Memory Model Relaxation Annotations (MMRAs).

This file defines the SmallVector class.

static unsigned getScalarSizeInBits(Type *Ty)

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

This pass exposes codegen information to IR-level passes.

static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)

A helper function for concatenating vectors.

Definition VectorUtils.cpp:1206

static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))

Maximum factor for an interleaved memory access.

static void addToAccessGroupList(ListT &List, MDNode *AccGroups)

Add all access groups in AccGroups to List.

Definition VectorUtils.cpp:973

Class for arbitrary precision integers.

static APInt getAllOnes(unsigned numBits)

Return an APInt of a specified width with all bits set.

void clearBit(unsigned BitPosition)

Set a given bit to 0.

void setBit(unsigned BitPosition)

Set the given bit to 1 whose position is given as "bitPosition".

bool isZero() const

Determine if this value is zero, i.e. all bits are clear.

unsigned getBitWidth() const

Return the number of bits in the APInt.

static APInt getZero(unsigned numBits)

Get the '0' value for the specified bit-width.

int64_t getSExtValue() const

Get sign extended value.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

const T & front() const

front - Get the first element.

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

LLVM Basic Block Representation.

This class represents a function call, abstracting a target machine's calling convention.

static LLVM_ABI Constant * get(ArrayRef< Constant * > V)

This is an important base class in LLVM.

size_type count(const_arg_type_t< KeyT > Val) const

Return 1 if the specified key is in the map, 0 otherwise.

EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...

iterator_range< member_iterator > members(const ECValue &ECV) const

const ElemTy & getOrInsertLeaderValue(const ElemTy &V)

getOrInsertLeaderValue - Return the leader for the specified value that is in the set.

member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)

union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...

Common base class shared among various IRBuilders.

This instruction inserts a single (scalar) element into a VectorType value.

bool mayReadOrWriteMemory() const

Return true if this instruction may read or write memory.

MDNode * getMetadata(unsigned KindID) const

Get the metadata of given kind attached to this Instruction.

LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)

Set the metadata of the specified kind to the specified node.

void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const

This does the same thing as getAllMetadata, except that it filters out the debug location.

The group of interleaved loads/stores sharing the same stride and close to each other.

uint32_t getFactor() const

InstTy * getMember(uint32_t Index) const

Get the member with the given index Index.

bool isFull() const

Return true if this group is full, i.e. it has no gaps.

uint32_t getIndex(const InstTy *Instr) const

Get the index for the given member.

void setInsertPos(InstTy *Inst)

void addMetadata(InstTy *NewInst) const

Add metadata (e.g.

Definition VectorUtils.cpp:1766

bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)

Try to insert a new member Instr with index Index and alignment NewAlign.

InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const

Get the interleave group that Instr belongs to.

bool requiresScalarEpilogue() const

Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...

bool isInterleaved(Instruction *Instr) const

Check if Instr belongs to any interleave group.

LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)

Analyze the interleaved accesses and collect them in interleave groups.

Definition VectorUtils.cpp:1436

LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()

Invalidate groups that require a scalar epilogue (due to gaps).

Definition VectorUtils.cpp:1740

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)

static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)

ArrayRef< MDOperand > operands() const

static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)

static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)

unsigned getNumOperands() const

Return number of MDNode operands.

static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)

LLVMContext & getContext() const

Tracking metadata reference owned by Metadata.

This class implements a map that also provides access to all stored values in a deterministic order.

iterator find(const KeyT &Key)

reverse_iterator rbegin()

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

This class represents a constant integer value.

const APInt & getAPInt() const

bool remove(const value_type &X)

Remove an item from the set vector.

bool contains(const_arg_type key) const

Check if the SetVector contains the given key.

bool insert(const value_type &X)

Insert a new element into the SetVector.

This instruction constructs a fixed permutation of two input vectors.

int getMaskValue(unsigned Elt) const

Return the shuffle mask value of this instruction for the given element index.

VectorType * getType() const

Overload to return most specific vector type.

size_type count(ConstPtrType Ptr) const

count - Return 1 if the specified pointer is in the set, 0 otherwise.

bool remove_if(UnaryPredicate P)

Remove elements that match the given predicate.

std::pair< iterator, bool > insert(PtrType Ptr)

Inserts Ptr if and only if there is no element in the container equal to Ptr.

bool contains(ConstPtrType Ptr) const

SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.

A SetVector that performs no allocations if smaller than a certain size.

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void assign(size_type NumElts, ValueParamT Elt)

void reserve(size_type N)

void append(ItTy in_start, ItTy in_end)

Add the specified range to the end of the SmallVector.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

Provides information about what library functions are available for the current target.

This pass provides access to the codegen interfaces that are needed for IR-level transformations.

The instances of the Type class are immutable: once they are created, they are never changed.

ArrayRef< Type * > subtypes() const

A Use represents the edge between a Value definition and its users.

Value * getOperand(unsigned i) const

static LLVM_ABI bool isVPCast(Intrinsic::ID ID)

static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI LLVMContext & getContext() const

All values hold a context through their type.

Base class of all SIMD vector types.

Type * getElementType() const

An efficient, type-erasing, non-owning reference to a callable.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

LLVM_ABI bool isTargetIntrinsic(ID IID)

isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.

SpecificConstantMatch m_ZeroInt()

Convenience matchers for specific integer values.

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

class_match< BinaryOperator > m_BinOp()

Match an arbitrary binary operation and ignore it.

class_match< Constant > m_Constant()

Match an arbitrary Constant and ignore it.

bool match(Val *V, const Pattern &P)

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)

Matches SelectInst.

TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)

Matches ShuffleVectorInst independently of mask value.

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)

Matches InsertElementInst.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)

Identify if the intrinsic is trivially scalarizable.

Definition VectorUtils.cpp:124

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

unsigned getLoadStoreAddressSpace(const Value *I)

A helper function that returns the address space of the pointer operand of load or store instruction.

LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)

Returns intrinsic ID for call.

Definition VectorUtils.cpp:237

LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)

Given a mask vector of the form , return an APInt (of bitwidth Y) for each lane which may be ...

Definition VectorUtils.cpp:1335

auto enumerate(FirstRange &&First, RestRanges &&...Rest)

Given two or more input ranges, returns a new range whose values are tuples (A, B,...

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)

Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...

Definition VectorUtils.cpp:1186

LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)

Add metadata from Inst to Metadata, if it can be preserved after vectorization.

Definition VectorUtils.cpp:1057

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

int bit_width(T Value)

Returns the number of bits needed to represent Value if Value is nonzero.

LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)

Concatenate a list of vectors.

Definition VectorUtils.cpp:1228

Align getLoadStoreAlignment(const Value *I)

A helper function that returns the alignment of load or store instruction.

LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...

Definition VectorUtils.cpp:540

LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

Definition VectorUtils.cpp:1080

LLVM_ABI Value * getSplatValue(const Value *V)

Get splat value if the input is a splat vector or return nullptr.

Definition VectorUtils.cpp:391

T bit_ceil(T Value)

Returns the smallest integral power of two no smaller than Value if Value is nonzero.

LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)

Compute the access-group list of access groups that Inst1 and Inst2 are both in.

Definition VectorUtils.cpp:1009

unsigned M1(unsigned Val)

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)

Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...

Definition VectorUtils.cpp:452

LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)

Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...

Definition VectorUtils.cpp:406

LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)

Create a mask that filters the members of an interleave group where there are gaps.

Definition VectorUtils.cpp:1125

constexpr unsigned MaxAnalysisRecursionDepth

LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)

Create a stride shuffle mask.

Definition VectorUtils.cpp:1165

LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)

Compute the demanded elements mask of horizontal binary operations.

Definition VectorUtils.cpp:780

LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)

Create a mask with replicated elements.

Definition VectorUtils.cpp:1145

LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)

Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.

Definition VectorUtils.cpp:272

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)

Returns the corresponding factor of llvm.vector.interleaveN intrinsics.

Definition VectorUtils.cpp:251

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

Definition VectorUtils.cpp:1282

constexpr int PoisonMaskElem

LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)

Return whether an MDNode might represent an access group.

LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)

Map a call instruction to an intrinsic ID.

LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...

Definition VectorUtils.cpp:220

LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...

Definition VectorUtils.cpp:519

LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)

Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...

Definition VectorUtils.cpp:489

LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)

Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.

Definition VectorUtils.cpp:293

LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)

Create an interleave shuffle mask.

Definition VectorUtils.cpp:1154

LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic has a scalar operand.

Definition VectorUtils.cpp:148

LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)

Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...

LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)

Given a vector and an element number, see if the scalar value is already around as a register,...

Definition VectorUtils.cpp:305

LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)

Compute the union of two access-group lists.

Definition VectorUtils.cpp:988

unsigned M0(unsigned Val)

auto make_second_range(ContainerTy &&c)

Given a container of pairs, return a range over the second elements.

auto count_if(R &&Range, UnaryPredicate P)

Wrapper function around std::count_if to count the number of times an element satisfying a given pred...

LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)

Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...

Definition VectorUtils.cpp:1256

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

auto find_if(R &&Range, UnaryPredicate P)

Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.

LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...

Definition VectorUtils.cpp:651

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)

Splits and processes shuffle mask depending on the number of input and output registers.

Definition VectorUtils.cpp:665

bool all_equal(std::initializer_list< T > Values)

Returns true if all Values in the initializer lists are equal or the list.

LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)

Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...

Definition VectorUtils.cpp:1308

auto seq(T Begin, T End)

Iterate over an integral type from Begin up to - but not including - End.

LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)

Identify if the intrinsic is trivially vectorizable.

Definition VectorUtils.cpp:46

LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)

Create a sequential shuffle mask.

Definition VectorUtils.cpp:1173

LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)

If the pointer has a constant stride return it in units of the access type size.

LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)

Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...

Definition VectorUtils.cpp:183

LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)

Compute a map of integer instructions to their minimum legal type size.

Definition VectorUtils.cpp:809

LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)

Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.

Definition VectorUtils.cpp:626

LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)

If all non-negative Mask elements are the same value, return that value.

Definition VectorUtils.cpp:369

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.