LLVM: lib/Target/Hexagon/HexagonVectorCombine.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

34#include "llvm/IR/IntrinsicsHexagon.h"

45

49

50#include

51#include

52#include

53#include

54#include

55#include

56#include

57

58#define DEBUG_TYPE "hexagon-vc"

59

60

61

62

63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072

64

65using namespace llvm;

66

67namespace {

72

77

78class HexagonVectorCombine {

79public:

83 : F(F_), DL(F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),

84 SE(SE_), TLI(TLI_),

86

87 bool run();

88

89

91

92

93 Type *getByteTy(int ElemCount = 0) const;

94

95

96 Type *getBoolTy(int ElemCount = 0) const;

97

99

100 std::optional getIntValue(const Value *Val) const;

101

103

105

106 bool isTrue(const Value *Val) const;

107

108 bool isFalse(const Value *Val) const;

109

110

111 VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;

112

113 enum SizeKind {

114 Store,

115 Alloc,

116 };

117 int getSizeOf(const Value *Val, SizeKind Kind = Store) const;

118 int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;

119 int getTypeAlignment(Type *Ty) const;

120 size_t length(Value *Val) const;

121 size_t length(Type *Ty) const;

122

124

126 int Length, int Where) const;

128 Value *Amt) const;

130 Value *Amt) const;

133 Value *Pad) const;

135 Type *ToTy) const;

139 unsigned Length) const;

144

150 unsigned ToWidth) const;

153

154 std::optional calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;

155

156 unsigned getNumSignificantBits(const Value *V,

157 const Instruction *CtxI = nullptr) const;

159 const Instruction *CtxI = nullptr) const;

160

161 bool isSafeToClone(const Instruction &In) const;

162

163 template <typename T = std::vector<Instruction *>>

164 bool isSafeToMoveBeforeInBB(const Instruction &In,

166 const T &IgnoreInsts = {}) const;

167

168

169 [[maybe_unused]] bool isByteVecTy(Type *Ty) const;

170

179

180private:

182 int Start, int Length) const;

183};

184

185class AlignVectors {

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200public:

201 AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}

202

203 bool run();

204

205private:

206 using InstList = std::vector<Instruction *>;

208

209 struct AddrInfo {

210 AddrInfo(const AddrInfo &) = default;

213 : Inst(I), Addr(A), ValTy(T), HaveAlign(H),

214 NeedAlign(HVC.getTypeAlignment(ValTy)) {}

215 AddrInfo &operator=(const AddrInfo &) = default;

216

217

223 int Offset = 0;

224

225 };

226 using AddrList = std::vector;

227

228 struct InstrLess {

230 return A->comesBefore(B);

231 }

232 };

233 using DepList = std::set<Instruction *, InstrLess>;

234

235 struct MoveGroup {

236 MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)

237 : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}

238 MoveGroup() = default;

239 Instruction *Base;

240 InstList Main;

241 InstList Deps;

242 InstMap Clones;

243 bool IsHvx;

244 bool IsLoad;

245 };

246 using MoveList = std::vector;

247

248 struct ByteSpan {

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264 struct Segment {

265

266 Segment(Value *Val, int Begin, int Len)

267 : Val(Val), Start(Begin), Size(Len) {}

268 Segment(const Segment &Seg) = default;

269 Segment &operator=(const Segment &Seg) = default;

270 Value *Val;

271 int Start;

272 int Size;

273 };

274

276 Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}

277 Block(Value *Val, int Off, int Len, int Pos)

278 : Seg(Val, Off, Len), Pos(Pos) {}

280 Block &operator=(const Block &Blk) = default;

281 Segment Seg;

282 int Pos;

283 };

284

285 int extent() const;

286 ByteSpan section(int Start, int Length) const;

287 ByteSpan &shift(int Offset);

289

290 int size() const { return Blocks.size(); }

291 Block &operator[](int i) { return Blocks[i]; }

292 const Block &operator[](int i) const { return Blocks[i]; }

293

294 std::vector Blocks;

295

297 iterator begin() { return Blocks.begin(); }

298 iterator end() { return Blocks.end(); }

300 const_iterator begin() const { return Blocks.begin(); }

302 };

303

304 std::optional getAddrInfo(Instruction &In) const;

305 bool isHvx(const AddrInfo &AI) const;

306

307 [[maybe_unused]] bool isSectorTy(Type *Ty) const;

308

311 Value *getPassThrough(Value *Val) const;

312

314 int Adjust,

315 const InstMap &CloneMap = InstMap()) const;

317 int Alignment,

318 const InstMap &CloneMap = InstMap()) const;

319

324 int Alignment,

326

331 int Alignment,

333

340

342 bool createAddressGroups();

343 MoveList createLoadGroups(const AddrList &Group) const;

344 MoveList createStoreGroups(const AddrList &Group) const;

345 bool moveTogether(MoveGroup &Move) const;

346 template

348

349 void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,

350 int ScLen, Value *AlignVal, Value *AlignAddr) const;

351 void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,

352 int ScLen, Value *AlignVal, Value *AlignAddr) const;

353 bool realignGroup(const MoveGroup &Move) const;

354

356 int Alignment) const;

357

362

363 std::map<Instruction *, AddrList> AddrGroups;

364 const HexagonVectorCombine &HVC;

365};

366

368 const AlignVectors::AddrInfo &AI) {

369 OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';

370 OS << "Addr: " << *AI.Addr << '\n';

371 OS << "Type: " << *AI.ValTy << '\n';

372 OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';

373 OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';

374 OS << "Offset: " << AI.Offset;

375 return OS;

376}

377

379 const AlignVectors::MoveGroup &MG) {

380 OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");

381 OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';

382 OS << "Main\n";

384 OS << " " << *I << '\n';

385 OS << "Deps\n";

387 OS << " " << *I << '\n';

388 OS << "Clones\n";

389 for (auto [K, V] : MG.Clones) {

390 OS << " ";

391 K->printAsOperand(OS, false);

392 OS << "\t-> " << *V << '\n';

393 }

394 return OS;

395}

396

399 OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";

400 if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {

401 OS << "(self:" << B.Seg.Val << ')';

402 } else if (B.Seg.Val != nullptr) {

403 OS << *B.Seg.Val;

404 } else {

405 OS << "(null)";

406 }

407 return OS;

408}

409

411 const AlignVectors::ByteSpan &BS) {

412 OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';

413 for (const AlignVectors::ByteSpan::Block &B : BS)

414 OS << B << '\n';

415 OS << ']';

416 return OS;

417}

418

419class HvxIdioms {

420public:

421 enum DstQualifier {

422 Undefined = 0,

423 Arithmetic,

424 LdSt,

425 LLVM_Gather,

426 LLVM_Scatter,

427 HEX_Gather_Scatter,

428 HEX_Gather,

429 HEX_Scatter,

431 };

432

433 HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {

434 auto *Int32Ty = HVC.getIntTy(32);

435 HvxI32Ty = HVC.getHvxTy(Int32Ty, false);

436 HvxP32Ty = HVC.getHvxTy(Int32Ty, true);

437 }

438

439 bool run();

440

441private:

443

444

445

446

447 struct SValue {

449 Signedness Sgn;

450 };

451

452 struct FxpOp {

453 unsigned Opcode;

454 unsigned Frac;

455 SValue X, Y;

456

457 std::optional RoundAt;

459 };

460

462 -> std::pair<unsigned, Signedness>;

463 auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;

464

465 auto matchFxpMul(Instruction &In) const -> std::optional;

466 auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;

467

469 const FxpOp &Op) const -> Value *;

470 auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,

471 bool Rounding) const -> Value *;

472 auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,

473 bool Rounding) const -> Value *;

474

476 Value *CarryIn = nullptr) const

477 -> std::pair<Value *, Value *>;

478 auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;

479 auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const

481 auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const

482 -> std::pair<Value *, Value *>;

488

489 bool matchScatter(Instruction &In) const;

493

496 const HexagonVectorCombine &HVC;

497

499};

500

502 const HvxIdioms::FxpOp &Op) {

503 static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};

505 if (Op.RoundAt.has_value()) {

506 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {

507 OS << ":rnd";

508 } else {

509 OS << " + 1<<" << *Op.RoundAt;

510 }

511 }

512 OS << "\n X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"

513 << " Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;

514 return OS;

515}

516

517}

518

519namespace {

520

521template T *getIfUnordered(T *MaybeT) {

522 return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;

523}

526}

529}

532}

533

534#if !defined(_MSC_VER) || _MSC_VER >= 1926

535

536

537

538template <typename Pred, typename... Ts>

539void erase_if(std::map<Ts...> &map, Pred p)

540#else

541template <typename Pred, typename T, typename U>

542void erase_if(std::map<T, U> &map, Pred p)

543#endif

544{

545 for (auto i = map.begin(), e = map.end(); i != e;) {

546 if (p(*i))

547 i = map.erase(i);

548 else

549 i = std::next(i);

550 }

551}

552

553

554template <typename Pred, typename T> void erase_if(T &&container, Pred p) {

556}

557

558}

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592auto AlignVectors::ByteSpan::extent() const -> int {

593 if (size() == 0)

594 return 0;

595 int Min = Blocks[0].Pos;

596 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;

597 for (int i = 1, e = size(); i != e; ++i) {

598 Min = std::min(Min, Blocks[i].Pos);

599 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);

600 }

601 return Max - Min;

602}

603

604auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {

606 for (const ByteSpan::Block &B : Blocks) {

607 int L = std::max(B.Pos, Start);

608 int R = std::min(B.Pos + B.Seg.Size, Start + Length);

609 if (L < R) {

610

611 int Off = L > B.Pos ? L - B.Pos : 0;

612 Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);

613 }

614 }

616}

617

618auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {

619 for (Block &B : Blocks)

621 return *this;

622}

623

625 SmallVector<Value *, 8> Values(Blocks.size());

626 for (int i = 0, e = Blocks.size(); i != e; ++i)

627 Values[i] = Blocks[i].Seg.Val;

628 return Values;

629}

630

631auto AlignVectors::getAddrInfo(Instruction &In) const

632 -> std::optional {

634 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),

635 L->getAlign());

637 return AddrInfo(HVC, S, S->getPointerOperand(),

638 S->getValueOperand()->getType(), S->getAlign());

641 switch (ID) {

642 case Intrinsic::masked_load:

643 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),

644 II->getParamAlign(0).valueOrOne());

645 case Intrinsic::masked_store:

646 return AddrInfo(HVC, II, II->getArgOperand(1),

647 II->getArgOperand(0)->getType(),

648 II->getParamAlign(1).valueOrOne());

649 }

650 }

651 return std::nullopt;

652}

653

654auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {

656}

657

658auto AlignVectors::getPayload(Value *Val) const -> Value * {

662 ID = II->getIntrinsicID();

664 return In->getOperand(0);

665 }

666 return Val;

667}

668

669auto AlignVectors::getMask(Value *Val) const -> Value * {

671 switch (II->getIntrinsicID()) {

672 case Intrinsic::masked_load:

673 return II->getArgOperand(1);

674 case Intrinsic::masked_store:

675 return II->getArgOperand(2);

676 }

677 }

678

679 Type *ValTy = getPayload(Val)->getType();

683}

684

685auto AlignVectors::getPassThrough(Value *Val) const -> Value * {

687 if (II->getIntrinsicID() == Intrinsic::masked_load)

688 return II->getArgOperand(2);

689 }

691}

692

693auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,

694 Type *ValTy, int Adjust,

695 const InstMap &CloneMap) const

698 if (Instruction *New = CloneMap.lookup(I))

699 Ptr = New;

700 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust), "gep");

701}

702

703auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,

704 Type *ValTy, int Alignment,

705 const InstMap &CloneMap) const

709 for (auto [Old, New] : CloneMap)

710 I->replaceUsesOfWith(Old, New);

711 return I;

712 }

713 return V;

714 };

715 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");

716 Value *Mask = HVC.getConstInt(-Alignment);

717 Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");

718 return Builder.CreateIntToPtr(

719 And, PointerType::getUnqual(ValTy->getContext()), "itp");

720}

721

722auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,

723 Value *Predicate, int Alignment, Value *Mask,

726 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();

727

728 if (Predicate) {

730 "Expectning scalar predicate");

731 if (HVC.isFalse(Predicate))

733 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {

734 Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,

735 Alignment, MDSources);

736 return Builder.CreateSelect(Mask, Load, PassThru);

737 }

738

739 }

740 assert(!HVC.isUndef(Mask));

741 if (HVC.isZero(Mask))

742 return PassThru;

743 if (HVC.isTrue(Mask))

744 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);

745

747 Mask, PassThru, "mld");

750}

751

752auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,

753 Value *Ptr, int Alignment,

757 Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");

760}

761

762auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,

764 int Alignment,

768 "Predicates 'scalar' vector loads not yet supported");

770 assert(Predicate->getType()->isVectorTy() && "Expectning scalar predicate");

771 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);

772 if (HVC.isFalse(Predicate))

774 if (HVC.isTrue(Predicate))

775 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);

776

777 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);

778

779 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,

780 {Predicate, Ptr, HVC.getConstInt(0)}, {},

781 MDSources);

782}

783

784auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,

785 Value *Predicate, int Alignment, Value *Mask,

787 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))

789 assert(!Predicate || (Predicate->getType()->isVectorTy() &&

790 "Expectning scalar predicate"));

791 if (Predicate) {

792 if (HVC.isFalse(Predicate))

794 if (HVC.isTrue(Predicate))

796 }

797

798

799 if (HVC.isTrue(Mask)) {

800 if (Predicate) {

801 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,

802 MDSources);

803 }

804

805 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);

806 }

807

808

809 if (!Predicate) {

811 Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);

814 }

815

816

817

818 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,

819 Predicate, Alignment, MDSources);

820 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);

821 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,

822 MDSources);

823}

824

825auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,

826 Value *Ptr, int Alignment,

832}

833

834auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,

836 int Alignment,

840 "Predicates 'scalar' vector stores not yet supported");

842 if (HVC.isFalse(Predicate))

844 if (HVC.isTrue(Predicate))

845 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);

846

847 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);

848 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);

849

850 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,

851 {Predicate, Ptr, HVC.getConstInt(0), Val}, {},

852 MDSources);

853}

854

855auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const

856 -> DepList {

858 assert(In->getParent() == Parent &&

859 "Base and In should be in the same block");

860 assert(Base->comesBefore(In) && "Base should come before In");

861

862 DepList Deps;

863 std::deque<Instruction *> WorkQ = {In};

864 while (!WorkQ.empty()) {

866 WorkQ.pop_front();

867 if (D != In)

868 Deps.insert(D);

869 for (Value *Op : D->operands()) {

871 if (I->getParent() == Parent && Base->comesBefore(I))

872 WorkQ.push_back(I);

873 }

874 }

875 }

876 return Deps;

877}

878

879auto AlignVectors::createAddressGroups() -> bool {

880

881

882 AddrList WorkStack;

883

884 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {

885 for (AddrInfo &W : WorkStack) {

886 if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))

887 return std::make_pair(W.Inst, *D);

888 }

889 return std::make_pair(nullptr, 0);

890 };

891

892 auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {

894 for (Instruction &I : Block) {

895 auto AI = this->getAddrInfo(I);

896 if (!AI)

897 continue;

898 auto F = findBaseAndOffset(*AI);

900 if (Instruction *BI = F.first) {

901 AI->Offset = F.second;

902 GroupInst = BI;

903 } else {

904 WorkStack.push_back(*AI);

905 GroupInst = AI->Inst;

906 }

907 AddrGroups[GroupInst].push_back(*AI);

908 }

909

911 Visit(C, Visit);

912

913 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)

914 WorkStack.pop_back();

915 };

916

917 traverseBlock(HVC.DT.getRootNode(), traverseBlock);

918 assert(WorkStack.empty());

919

920

921

922

923 erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });

924

925 erase_if(AddrGroups, [&](auto &G) {

927 G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });

928 });

929

930 return !AddrGroups.empty();

931}

932

933auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {

934

935

936

937 unsigned SizeLimit = VAGroupSizeLimit;

939 return {};

940

941 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {

942 assert(!Move.Main.empty() && "Move group should have non-empty Main");

943 if (Move.Main.size() >= SizeLimit)

944 return false;

945

946 if (Move.IsHvx != isHvx(Info))

947 return false;

948

950 if (Base->getParent() != Info.Inst->getParent())

951 return false;

952

953 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))

954 return false;

955

956 auto isSafeToCopyAtBase = [&](const Instruction *I) {

957 return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&

958 HVC.isSafeToClone(*I);

959 };

960 DepList Deps = getUpwardDeps(Info.Inst, Base);

962 return false;

963

964 Move.Main.push_back(Info.Inst);

966 return true;

967 };

968

969 MoveList LoadGroups;

970

971 for (const AddrInfo &Info : Group) {

972 if (Info.Inst->mayReadFromMemory())

973 continue;

974 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))

975 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);

976 }

977

978

979 erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });

980

981

983 erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });

984

985 return LoadGroups;

986}

987

988auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {

989

990

991

992 unsigned SizeLimit = VAGroupSizeLimit;

994 return {};

995

996 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {

997 assert(!Move.Main.empty() && "Move group should have non-empty Main");

998 if (Move.Main.size() >= SizeLimit)

999 return false;

1000

1001

1002 assert(Info.Inst->getType()->isVoidTy() &&

1003 "Not handling stores with return values");

1004

1005 if (Move.IsHvx != isHvx(Info))

1006 return false;

1007

1008

1009

1011 if (Base->getParent() != Info.Inst->getParent())

1012 return false;

1013 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))

1014 return false;

1015 Move.Main.push_back(Info.Inst);

1016 return true;

1017 };

1018

1019 MoveList StoreGroups;

1020

1021 for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {

1022 const AddrInfo &Info = *I;

1023 if (Info.Inst->mayWriteToMemory())

1024 continue;

1025 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))

1026 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);

1027 }

1028

1029

1030 erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });

1031

1032

1034 erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });

1035

1036

1037

1038

1039 if (!VADoFullStores) {

1040 erase_if(StoreGroups, [this](const MoveGroup &G) {

1041 return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {

1042 auto MaybeInfo = this->getAddrInfo(*S);

1043 assert(MaybeInfo.has_value());

1044 return HVC.HST.isHVXVectorType(

1045 EVT::getEVT(MaybeInfo->ValTy, false));

1046 });

1047 });

1048 }

1049

1050 return StoreGroups;

1051}

1052

1053auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {

1054

1055 assert(!Move.Main.empty() && "Move group should have non-empty Main");

1057

1058 if (Move.IsLoad) {

1059

1060

1061 Move.Clones = cloneBefore(Where->getIterator(), Move.Deps);

1062

1064 for (Instruction *M : Main) {

1065 if (M != Where)

1066 M->moveAfter(Where);

1067 for (auto [Old, New] : Move.Clones)

1068 M->replaceUsesOfWith(Old, New);

1069 Where = M;

1070 }

1071

1072 for (int i = 0, e = Move.Deps.size(); i != e; ++i)

1073 Move.Deps[i] = Move.Clones[Move.Deps[i]];

1074 } else {

1075

1076

1077

1078 assert(Move.Deps.empty());

1079

1081 for (Instruction *M : Main.drop_front(1)) {

1083 Where = M;

1084 }

1085 }

1086

1087 return Move.Main.size() + Move.Deps.size() > 1;

1088}

1089

1090template

1092 -> InstMap {

1093 InstMap Map;

1094

1095 for (Instruction *I : Insts) {

1096 assert(HVC.isSafeToClone(*I));

1098 C->setName(Twine("c.") + I->getName() + ".");

1099 C->insertBefore(To);

1100

1101 for (auto [Old, New] : Map)

1102 C->replaceUsesOfWith(Old, New);

1103 Map.insert(std::make_pair(I, C));

1104 }

1105 return Map;

1106}

1107

1108auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,

1109 const ByteSpan &VSpan, int ScLen,

1110 Value *AlignVal, Value *AlignAddr) const

1111 -> void {

1113

1114 Type *SecTy = HVC.getByteTy(ScLen);

1115 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;

1116 bool DoAlign = !HVC.isZero(AlignVal);

1118 BasicBlock *BaseBlock = Builder.GetInsertBlock();

1119

1120 ByteSpan ASpan;

1123

1124

1126

1127

1128

1129

1130

1131

1132

1133

1134

1135

1136

1137

1138

1139

1140

1141

1142

1143

1144

1145

1146

1147

1148 for (int Index = 0; Index != NumSectors; ++Index)

1149 ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);

1150 for (int Index = 0; Index != NumSectors; ++Index) {

1151 ASpan.Blocks[Index].Seg.Val =

1152 reinterpret_cast<Value *>(&ASpan.Blocks[Index]);

1153 }

1154

1155

1156

1157

1158 DenseMap<void *, Instruction *> EarliestUser;

1160 if (B == nullptr)

1161 return true;

1162 if (A == nullptr)

1163 return false;

1164 assert(A->getParent() == B->getParent());

1165 return A->comesBefore(B);

1166 };

1167 auto earliestUser = [&](const auto &Uses) {

1169 for (const Use &U : Uses) {

1171 assert(I != nullptr && "Load used in a non-instruction?");

1172

1173

1174

1175 if (I->getParent() == BaseBlock) {

1177 User = std::min(User, I, isEarlier);

1178 } else {

1180 }

1181 }

1182 return User;

1183 };

1184

1185 for (const ByteSpan::Block &B : VSpan) {

1186 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);

1187 for (const ByteSpan::Block &S : ASection) {

1188 auto &EU = EarliestUser[S.Seg.Val];

1189 EU = std::min(EU, earliestUser(B.Seg.Val->uses()), isEarlier);

1190 }

1191 }

1192

1194 dbgs() << "ASpan:\n" << ASpan << '\n';

1195 dbgs() << "Earliest users of ASpan:\n";

1196 for (auto &[Val, User] : EarliestUser) {

1197 dbgs() << Val << "\n ->" << *User << '\n';

1198 }

1199 });

1200

1201 auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,

1202 int Index, bool MakePred) {

1204 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);

1206 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;

1207

1208

1209

1210 int Start = (Index - DoAlign) * ScLen;

1211 int Width = (1 + DoAlign) * ScLen;

1212 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,

1213 VSpan.section(Start, Width).values());

1214 };

1215

1217

1218 assert(In->getParent() == To->getParent());

1219 DepList Deps = getUpwardDeps(&*In, &*To);

1220 In->moveBefore(To);

1221

1222 InstMap Map = cloneBefore(In, Deps);

1223 for (auto [Old, New] : Map)

1224 In->replaceUsesOfWith(Old, New);

1225 };

1226

1227

1228 LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");

1229 for (int Index = 0; Index != NumSectors + 1; ++Index) {

1230

1231

1232

1233

1234

1235

1237 DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;

1239 Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;

1240 if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {

1243 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);

1244

1245

1246

1247

1248

1249

1251 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))

1252 moveBefore(Load->getIterator(), BasePos);

1253 }

1254 LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');

1255 }

1256 }

1257

1258

1259 LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");

1260 for (int Index = 0; Index != NumSectors; ++Index) {

1261 ASpan[Index].Seg.Val = nullptr;

1262 if (auto *Where = EarliestUser[&ASpan[Index]]) {

1265 assert(Val != nullptr);

1266 if (DoAlign) {

1268 assert(NextLoad != nullptr);

1269 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);

1270 }

1271 ASpan[Index].Seg.Val = Val;

1272 LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');

1273 }

1274 }

1275

1276 for (const ByteSpan::Block &B : VSpan) {

1277 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);

1280

1281

1282

1283

1284 std::vector<ByteSpan::Block *> ABlocks;

1285 for (ByteSpan::Block &S : ASection) {

1286 if (S.Seg.Val != nullptr)

1287 ABlocks.push_back(&S);

1288 }

1290 [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {

1293 });

1294 for (ByteSpan::Block *S : ABlocks) {

1295

1296

1299 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));

1300 Accum =

1301 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);

1302 }

1303

1304

1305

1306

1307

1308

1309 Type *ValTy = getPayload(B.Seg.Val)->getType();

1312 getPassThrough(B.Seg.Val), "sel");

1314 }

1315}

1316

1317auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,

1318 const ByteSpan &VSpan, int ScLen,

1319 Value *AlignVal, Value *AlignAddr) const

1320 -> void {

1322

1323 Type *SecTy = HVC.getByteTy(ScLen);

1324 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;

1325 bool DoAlign = !HVC.isZero(AlignVal);

1326

1327

1328 ByteSpan ASpanV, ASpanM;

1329

1330

1331

1332 auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {

1335 return Val;

1336 auto *VecTy = VectorType::get(Ty, 1, false);

1337 return Builder.CreateBitCast(Val, VecTy, "cst");

1338 };

1339

1340

1341

1342 for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {

1343

1344

1345 ByteSpan VSection =

1346 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);

1351 for (ByteSpan::Block &S : VSection) {

1352 Value *Pay = getPayload(S.Seg.Val);

1353 Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),

1354 Pay->getType(), HVC.getByteTy());

1355 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),

1356 S.Seg.Start, S.Seg.Size, S.Pos);

1357 AccumM = Builder.CreateOr(AccumM, PartM);

1358

1359 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),

1360 S.Seg.Start, S.Seg.Size, S.Pos);

1361

1364 }

1365 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);

1366 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);

1367 }

1368

1370 dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';

1371 dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';

1372 });

1373

1374

1375 if (DoAlign) {

1376 for (int Index = 1; Index != NumSectors + 2; ++Index) {

1377 Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;

1378 Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;

1380 ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);

1381 ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);

1382 }

1383 }

1384

1386 dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';

1387 dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';

1388 });

1389

1390 auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,

1391 const ByteSpan &ASpanM, int Index, bool MakePred) {

1394 if (HVC.isUndef(Val) || HVC.isZero(Mask))

1395 return;

1397 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);

1399 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;

1400

1401

1402

1403 int Start = (Index - DoAlign) * ScLen;

1404 int Width = (1 + DoAlign) * ScLen;

1405 this->createStore(Builder, Val, Ptr, Predicate, ScLen,

1406 HVC.vlsb(Builder, Mask),

1407 VSpan.section(Start, Width).values());

1408 };

1409

1410 for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {

1411 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);

1412 }

1413}

1414

1415auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {

1416 LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');

1417

1418

1419 if (!Move.IsHvx)

1420 return false;

1421

1422

1423

1424 auto getMaxOf = [](auto Range, auto GetValue) {

1426 return GetValue(A) < GetValue(B);

1427 });

1428 };

1429

1430 const AddrList &BaseInfos = AddrGroups.at(Move.Base);

1431

1432

1433

1434

1435

1436

1437

1438

1439

1440

1441

1442

1443

1444

1445 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());

1446 AddrList MoveInfos;

1448 BaseInfos, std::back_inserter(MoveInfos),

1449 [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });

1450

1451

1452 const AddrInfo &WithMaxAlign =

1453 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });

1454 Align MaxGiven = WithMaxAlign.HaveAlign;

1455

1456

1457 const AddrInfo &WithMinOffset =

1458 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });

1459

1460 const AddrInfo &WithMaxNeeded =

1461 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });

1462 Align MinNeeded = WithMaxNeeded.NeedAlign;

1463

1464

1465

1466

1467 Instruction *InsertAt = Move.Main.front();

1468 if (!Move.IsLoad) {

1469

1471 InsertAt = &*std::next(InsertAt->getIterator());

1472 }

1473

1475 InstSimplifyFolder(HVC.DL));

1476 Value *AlignAddr = nullptr;

1477 Value *AlignVal = nullptr;

1478

1479 if (MinNeeded <= MaxGiven) {

1480 int Start = WithMinOffset.Offset;

1481 int OffAtMax = WithMaxAlign.Offset;

1482

1483

1484

1485

1486

1487

1488 int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());

1489 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,

1490 WithMaxAlign.ValTy, Adjust, Move.Clones);

1491 int Diff = Start - (OffAtMax + Adjust);

1492 AlignVal = HVC.getConstInt(Diff);

1494 assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());

1495 } else {

1496

1497

1498

1499

1500

1501

1502

1503 AlignAddr =

1504 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,

1505 MinNeeded.value(), Move.Clones);

1506 AlignVal =

1507 Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");

1509 for (auto [Old, New] : Move.Clones)

1510 I->replaceUsesOfWith(Old, New);

1511 }

1512 }

1513

1514 ByteSpan VSpan;

1515 for (const AddrInfo &AI : MoveInfos) {

1516 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),

1517 AI.Offset - WithMinOffset.Offset);

1518 }

1519

1520

1521

1522

1524 : std::max(MinNeeded.value(), 4);

1525 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);

1526 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);

1527

1529 dbgs() << "ScLen: " << ScLen << "\n";

1530 dbgs() << "AlignVal:" << *AlignVal << "\n";

1531 dbgs() << "AlignAddr:" << *AlignAddr << "\n";

1532 dbgs() << "VSpan:\n" << VSpan << '\n';

1533 });

1534

1535 if (Move.IsLoad)

1536 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);

1537 else

1538 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);

1539

1540 for (auto *Inst : Move.Main)

1541 Inst->eraseFromParent();

1542

1543 return true;

1544}

1545

1546auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,

1547 int Alignment) const -> Value * {

1548 auto *AlignTy = AlignVal->getType();

1550 AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");

1551 Value *Zero = ConstantInt::get(AlignTy, 0);

1553}

1554

1555auto AlignVectors::isSectorTy(Type *Ty) const -> bool {

1556 if (!HVC.isByteVecTy(Ty))

1557 return false;

1558 int Size = HVC.getSizeOf(Ty);

1561 return Size == 4 || Size == 8;

1562}

1563

1564auto AlignVectors::run() -> bool {

1566 << '\n');

1567 if (!createAddressGroups())

1568 return false;

1569

1571 dbgs() << "Address groups(" << AddrGroups.size() << "):\n";

1572 for (auto &[In, AL] : AddrGroups) {

1573 for (const AddrInfo &AI : AL)

1574 dbgs() << "---\n" << AI << '\n';

1575 }

1576 });

1577

1579 MoveList LoadGroups, StoreGroups;

1580

1581 for (auto &G : AddrGroups) {

1584 }

1585

1587 dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";

1588 for (const MoveGroup &G : LoadGroups)

1589 dbgs() << G << "\n";

1590 dbgs() << "Store groups(" << StoreGroups.size() << "):\n";

1591 for (const MoveGroup &G : StoreGroups)

1592 dbgs() << G << "\n";

1593 });

1594

1595

1596 unsigned CountLimit = VAGroupCountLimit;

1597 if (CountLimit == 0)

1598 return false;

1599

1600 if (LoadGroups.size() > CountLimit) {

1601 LoadGroups.resize(CountLimit);

1602 StoreGroups.clear();

1603 } else {

1604 unsigned StoreLimit = CountLimit - LoadGroups.size();

1605 if (StoreGroups.size() > StoreLimit)

1606 StoreGroups.resize(StoreLimit);

1607 }

1608

1609 for (auto &M : LoadGroups)

1610 Changed |= moveTogether(M);

1611 for (auto &M : StoreGroups)

1612 Changed |= moveTogether(M);

1613

1614 LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);

1615

1616 for (auto &M : LoadGroups)

1617 Changed |= realignGroup(M);

1618 for (auto &M : StoreGroups)

1619 Changed |= realignGroup(M);

1620

1622}

1623

1624

1625

1626

1627

1628auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const

1629 -> std::pair<unsigned, Signedness> {

1630 unsigned Bits = HVC.getNumSignificantBits(V, In);

1631

1632

1633

1634

1635

1636 KnownBits Known = HVC.getKnownBits(V, In);

1637 Signedness Sign = Signed;

1638 unsigned NumToTest = 0;

1640 NumToTest = Bits;

1642 NumToTest = Bits - 1;

1643

1646 Bits = NumToTest;

1647 }

1648

1649

1650

1653 Sign = Positive;

1654 }

1655 return {Bits, Sign};

1656}

1657

1658auto HvxIdioms::canonSgn(SValue X, SValue Y) const

1659 -> std::pair<SValue, SValue> {

1660

1661

1662

1663

1666 return {X, Y};

1667}

1668

1669

1670

1671

1672auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional {

1673 using namespace PatternMatch;

1674 auto *Ty = In.getType();

1675

1677 return std::nullopt;

1678

1680

1681 FxpOp Op;

1683

1684

1685

1686 auto m_Shr = [](auto &&V, auto &&S) {

1688 };

1689

1690 uint64_t Qn = 0;

1692 Op.Frac = Qn;

1694 } else {

1695 Op.Frac = 0;

1696 }

1697

1698 if (Op.Frac > Width)

1699 return std::nullopt;

1700

1701

1702 uint64_t CV;

1706 return std::nullopt;

1707 if (CV != 0)

1710 }

1711

1712

1714 Op.Opcode = Instruction::Mul;

1715

1716 Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;

1717 Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;

1719 return Op;

1720 }

1721

1722 return std::nullopt;

1723}

1724

1725auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const

1727 assert(Op.X.Val->getType() == Op.Y.Val->getType());

1728

1730 if (VecTy == nullptr)

1731 return nullptr;

1733 unsigned ElemWidth = ElemTy->getBitWidth();

1734

1735

1736 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)

1737 return nullptr;

1738

1739

1740

1741

1742 if (ElemWidth <= 8)

1743 return nullptr;

1744

1745

1746 if (ElemWidth <= 32 && Op.Frac == 0)

1747 return nullptr;

1748

1749 auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);

1750 auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);

1751

1752

1753

1755 IRBuilder Builder(In.getParent(), In.getIterator(),

1756 InstSimplifyFolder(HVC.DL));

1757

1758 auto roundUpWidth = [](unsigned Width) -> unsigned {

1760

1761

1763 }

1764 if (Width > 32 && Width % 32 != 0) {

1765

1766 return alignTo(Width, 32u);

1767 }

1768 return Width;

1769 };

1770

1771 BitsX = roundUpWidth(BitsX);

1772 BitsY = roundUpWidth(BitsY);

1773

1774

1775

1776

1777 unsigned Width = std::max(BitsX, BitsY);

1778

1779 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);

1780 if (Width < ElemWidth) {

1783 } else if (Width > ElemWidth) {

1785 : Builder.CreateZExt(X, ResizeTy, "zxt");

1787 : Builder.CreateZExt(Y, ResizeTy, "zxt");

1788 };

1789

1790 assert(X->getType() == Y->getType() && X->getType() == ResizeTy);

1791

1792 unsigned VecLen = HVC.length(ResizeTy);

1793 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);

1794

1796 FxpOp ChopOp = Op;

1797 ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);

1798

1799 for (unsigned V = 0; V != VecLen / ChopLen; ++V) {

1800 ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);

1801 ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);

1802 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));

1804 break;

1805 }

1806

1808 return nullptr;

1809

1812 ? Builder.CreateSExt(Cat, VecTy, "sxt")

1813 : Builder.CreateZExt(Cat, VecTy, "zxt");

1814 return Ext;

1815}

1816

1817inline bool HvxIdioms::matchScatter(Instruction &In) const {

1819 if (II)

1820 return false;

1821 return (II->getIntrinsicID() == Intrinsic::masked_scatter);

1822}

1823

1824inline bool HvxIdioms::matchGather(Instruction &In) const {

1826 if (II)

1827 return false;

1828 return (II->getIntrinsicID() == Intrinsic::masked_gather);

1829}

1830

1832

1833

1835 switch (Opc) {

1836 case Instruction::Add:

1837 case Instruction::Sub:

1838 case Instruction::Mul:

1839 case Instruction::And:

1840 case Instruction::Or:

1841 case Instruction::Xor:

1842 case Instruction::AShr:

1843 case Instruction::LShr:

1844 case Instruction::Shl:

1845 case Instruction::UDiv:

1846 return true;

1847 }

1848 return false;

1849}

1850

1851

1853 assert(Ptr && "Unable to extract pointer");

1855 return Ptr;

1859 if (II->getIntrinsicID() == Intrinsic::masked_store)

1860 return II->getOperand(1);

1861 }

1862 return nullptr;

1863}

1864

1866 HvxIdioms::DstQualifier &Qual) {

1868 if (!In)

1869 return Destination;

1871 Destination = In;

1872 Qual = HvxIdioms::LdSt;

1874 if (II->getIntrinsicID() == Intrinsic::masked_gather) {

1875 Destination = In;

1876 Qual = HvxIdioms::LLVM_Gather;

1877 } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {

1878 Destination = In;

1879 Qual = HvxIdioms::LLVM_Scatter;

1880 } else if (II->getIntrinsicID() == Intrinsic::masked_store) {

1881 Destination = In;

1882 Qual = HvxIdioms::LdSt;

1883 } else if (II->getIntrinsicID() ==

1884 Intrinsic::hexagon_V6_vgather_vscattermh) {

1885 Destination = In;

1886 Qual = HvxIdioms::HEX_Gather_Scatter;

1887 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {

1888 Destination = In;

1889 Qual = HvxIdioms::HEX_Scatter;

1890 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {

1891 Destination = In;

1892 Qual = HvxIdioms::HEX_Gather;

1893 }

1899 Destination = In;

1900 Qual = HvxIdioms::Call;

1904 Destination = In;

1905 Qual = HvxIdioms::Arithmetic;

1906 } else {

1907 LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");

1908 }

1909 return Destination;

1910}

1911

1912

1913

1914

1915

1916

1917

1920 if (!In)

1921 return Destination;

1922

1924

1925 for (auto &U : In->uses()) {

1928 if (Destination)

1929 Users.push_back(Destination);

1930 }

1931 }

1932

1933 for (auto *I : Users)

1935 return I;

1936 return Destination;

1937}

1938

1939

1941 assert(In && "Bad instruction");

1944 IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&

1945 "Not a gather Intrinsic");

1947 if (IIn->getIntrinsicID() == Intrinsic::masked_gather)

1949 else

1951 return GEPIndex;

1952}

1953

1954

1955

1956

1959 if (!GEPIndex) {

1961 return nullptr;

1962 }

1965 if (IndexLoad)

1966 return IndexLoad;

1967

1969 if (IndexZEx) {

1971 if (IndexLoad)

1972 return IndexLoad;

1974 if (II && II->getIntrinsicID() == Intrinsic::masked_gather)

1976 }

1978 if (BaseShuffle) {

1980 if (IndexLoad)

1981 return IndexLoad;

1983 if (IE) {

1984 auto *Src = IE->getOperand(1);

1986 if (IndexLoad)

1987 return IndexLoad;

1989 if (Alloca)

1990 return Alloca;

1992 return Src;

1993 }

1995 return Src;

1996 }

1997 }

1998 }

1999 LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");

2000 return nullptr;

2001}

2002

2004 if (!In)

2005 return nullptr;

2006

2009

2011 if (II->getIntrinsicID() == Intrinsic::masked_load)

2012 return II->getType();

2013 if (II->getIntrinsicID() == Intrinsic::masked_store)

2014 return II->getOperand(0)->getType();

2015 }

2016 return In->getType();

2017}

2018

2020 if (!In)

2021 return nullptr;

2023 return In;

2025 if (II->getIntrinsicID() == Intrinsic::masked_load)

2026 return In;

2027 if (II->getIntrinsicID() == Intrinsic::masked_gather)

2028 return In;

2029 }

2039 return cstDataVector;

2041 return GEPIndex->getOperand(0);

2042 return nullptr;

2043}

2044

2045

2046

2049 if (!GEPIndex) {

2051 return nullptr;

2052 }

2055 return IndexLoad;

2056

2057 LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");

2058 return nullptr;

2059}

2060

2061

2062

2063

2067 assert(I && "Unable to reinterprete cast");

2068 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2069 std::vector shuffleMask;

2070 for (unsigned i = 0; i < 64; ++i)

2071 shuffleMask.push_back(i);

2073 Value *CastShuffle =

2074 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");

2075 return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");

2076}

2077

2078

2082 assert(I && "Unable to reinterprete cast");

2083 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2084 std::vector shuffleMask;

2085 for (unsigned i = 0; i < 128; ++i)

2086 shuffleMask.push_back(i);

2088 Value *CastShuffle =

2089 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");

2090 return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");

2091}

2092

2093

2096 unsigned int pattern) {

2097 std::vector byteMask;

2098 for (unsigned i = 0; i < 32; ++i)

2099 byteMask.push_back(pattern);

2100

2101 return Builder.CreateIntrinsic(

2102 HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),

2103 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},

2104 nullptr);

2105}

2106

2107Value *HvxIdioms::processVScatter(Instruction &In) const {

2109 assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");

2110 unsigned InpSize = HVC.getSizeOf(InpTy);

2111 auto *F = In.getFunction();

2112 LLVMContext &Ctx = F->getContext();

2114 assert(ElemTy && "llvm.scatter needs integer type argument");

2117 unsigned Elements = HVC.length(InpTy);

2118 dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";

2119 dbgs() << " Input type(" << *InpTy << ") elements(" << Elements

2120 << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("

2121 << ElemWidth << ")\n";

2122 });

2123

2124 IRBuilder Builder(In.getParent(), In.getIterator(),

2125 InstSimplifyFolder(HVC.DL));

2126

2127 auto *ValueToScatter = In.getOperand(0);

2128 LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");

2129

2131 LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize

2132 << ") for vscatter\n");

2133 return nullptr;

2134 }

2135

2136

2138 if (!IndexLoad)

2139 return nullptr;

2140 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");

2141

2142

2144 if (!Ptr)

2145 return nullptr;

2147

2149 if (!Indexes)

2150 return nullptr;

2151 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");

2153 "cst_ptr_to_i32");

2154 LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");

2155

2157 Value *CastIndex = nullptr;

2158 if (cstDataVector) {

2159

2160 AllocaInst *IndexesAlloca =

2161 Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));

2162 [[maybe_unused]] auto *StoreIndexes =

2163 Builder.CreateStore(cstDataVector, IndexesAlloca);

2164 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");

2166 IndexesAlloca, "reload_index");

2167 } else {

2168 if (ElemWidth == 2)

2170 else

2171 CastIndex = Indexes;

2172 }

2173 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");

2174

2175 if (ElemWidth == 1) {

2176

2177

2178 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2179

2180

2181 Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");

2182 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);

2184 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);

2185 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");

2186

2187 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);

2188 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);

2189 [[maybe_unused]] Value *IndexHi =

2190 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);

2191 [[maybe_unused]] Value *IndexLo =

2192 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);

2193 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");

2194 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");

2195

2196 Value *CastSrc =

2198 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");

2200 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);

2201 LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter

2202 << ")\n");

2203

2204 [[maybe_unused]] Value *UVSHi =

2205 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);

2206 [[maybe_unused]] Value *UVSLo =

2207 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);

2208 LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");

2209 LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");

2210

2211

2212 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);

2213 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");

2215 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,

2217 IndexHi, UVSHi},

2218 nullptr);

2219 LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");

2221 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,

2223 IndexLo, UVSLo},

2224 nullptr);

2225 } else if (ElemWidth == 2) {

2226 Value *CastSrc =

2228 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");

2230 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,

2232 CastSrc},

2233 nullptr);

2234 } else if (ElemWidth == 4) {

2236 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,

2238 ValueToScatter},

2239 nullptr);

2240 } else {

2241 LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");

2242 return nullptr;

2243 }

2244}

2245

2246Value *HvxIdioms::processVGather(Instruction &In) const {

2247 [[maybe_unused]] auto *InpTy =

2249 assert(InpTy && "Cannot handle no vector type for llvm.gather");

2250 [[maybe_unused]] auto *ElemTy =

2252 assert(ElemTy && "llvm.gather needs vector of ptr argument");

2253 auto *F = In.getFunction();

2254 LLVMContext &Ctx = F->getContext();

2255 LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"

2256 << *In.getParent() << "\n");

2257 LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("

2258 << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)

2259 << ") type(" << *ElemTy << ") Access alignment("

2260 << *In.getOperand(1) << ") AddressSpace("

2261 << ElemTy->getAddressSpace() << ")\n");

2262

2263

2265 "llvm.gather needs vector for mask");

2266 IRBuilder Builder(In.getParent(), In.getIterator(),

2267 InstSimplifyFolder(HVC.DL));

2268

2269

2270

2271

2272 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;

2274 if (!Dst) {

2275 LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");

2276 return nullptr;

2277 }

2278 LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual

2279 << ")\n");

2280

2281

2283 if (!Ptr) {

2284 LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");

2285 return nullptr;

2286 }

2287

2288

2290 assert(DstType && "Cannot handle non vector dst type for llvm.gather");

2291

2292

2294 if (!IndexLoad)

2295 return nullptr;

2296 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");

2297

2298

2300 if (!Indexes)

2301 return nullptr;

2302 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");

2303

2305 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2306 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {

2307

2308

2309

2310 unsigned OutputSize = HVC.getSizeOf(DstType);

2314 << " Address space ("

2316 << " Result type : " << *DstType

2317 << "\n Size in bytes : " << OutputSize

2318 << " element type(" << *DstElemTy

2319 << ")\n ElemWidth : " << ElemWidth << " bytes\n");

2320

2322 assert(IndexType && "Cannot handle non vector index type for llvm.gather");

2323 unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());

2324 LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");

2325

2326

2328 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2329

2330

2331

2332

2333

2334

2335

2337 if (ElemWidth == 1) {

2338

2339

2340

2341

2342 Value *CastIndexes =

2343 Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");

2344 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);

2345 auto *UnpackedIndexes =

2346 Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),

2347 V6_vunpack, CastIndexes, nullptr);

2348 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes

2349 << ")\n");

2350

2351 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);

2352 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);

2353 [[maybe_unused]] Value *IndexHi =

2354 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);

2355 [[maybe_unused]] Value *IndexLo =

2356 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);

2357 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");

2358 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");

2359

2360 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);

2361 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");

2362

2363

2364 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);

2365 [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(

2366 Type::getVoidTy(Ctx), V6_vgather,

2367 {Ptr, QByteMask, CastedPtr,

2369 nullptr);

2370 LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");

2371

2372 [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(

2373 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");

2374 LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");

2375

2376

2377

2379 Type::getVoidTy(Ctx), V6_vgather,

2380 {Ptr, QByteMask, CastedPtr,

2382 nullptr);

2383 LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");

2385 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");

2386 LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");

2387

2388

2389

2390

2391 auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);

2393 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);

2394 LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");

2395 [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);

2396 LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");

2397 } else if (ElemWidth == 2) {

2398

2399 if (IndexWidth == 2) {

2400

2401 Value *CastIndex =

2403 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");

2404

2405

2406 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);

2407 Value *AdjustedIndex = HVC.createHvxIntrinsic(

2408 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});

2410 << " Shifted half index: " << *AdjustedIndex << ")\n");

2411

2412 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);

2413

2414

2416 Type::getVoidTy(Ctx), V6_vgather,

2418 AdjustedIndex},

2419 nullptr);

2420 for (auto &U : Dst->uses()) {

2422 dbgs() << " dst used by: " << *UI << "\n";

2423 }

2424 for (auto &U : In.uses()) {

2426 dbgs() << " In used by : " << *UI << "\n";

2427 }

2428

2429

2431 HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");

2432 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2433 In.replaceAllUsesWith(LoadedResult);

2434 } else {

2435 dbgs() << " Unhandled index type for vgather\n";

2436 return nullptr;

2437 }

2438 } else if (ElemWidth == 4) {

2439 if (IndexWidth == 4) {

2440

2441 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);

2442 Value *AdjustedIndex = HVC.createHvxIntrinsic(

2443 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});

2445 << " Shifted word index: " << *AdjustedIndex << ")\n");

2447 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,

2449 AdjustedIndex},

2450 nullptr);

2451 } else {

2452 LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");

2453 return nullptr;

2454 }

2455 } else {

2456 LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");

2457 return nullptr;

2458 }

2459 } else if (HVC.HST.getVectorLength() == OutputSize * 2) {

2460

2461 LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");

2462 return nullptr;

2463 } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {

2464 LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");

2465 return nullptr;

2466 }

2467

2468

2469

2470 Dst->eraseFromParent();

2471 } else if (Qual == HvxIdioms::LLVM_Scatter) {

2472

2474 auto *DstInpTy = cast(Dst->getOperand(1)->getType());

2475 assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");

2476 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);

2477 unsigned DstElements = HVC.length(DstInpTy);

2478 auto *DstElemTy = cast(DstInpTy->getElementType());

2479 assert(DstElemTy && "llvm.scatter needs vector of ptr argument");

2480 dbgs() << " Gather feeds into scatter\n Values to scatter : "

2481 << *Dst->getOperand(0) << "\n";

2482 dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements

2483 << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy

2484 << ") Access alignment(" << *Dst->getOperand(2) << ")\n";

2485 });

2486

2488 if (!Src)

2489 return nullptr;

2491

2493 LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");

2494 return nullptr;

2495 }

2496

2498 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2499 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");

2500

2502 if (!DstLoad) {

2503 LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");

2504 return nullptr;

2505 }

2506 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");

2507

2509 if (!Ptr)

2510 return nullptr;

2512 Value *CastIndex =

2514 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");

2515

2516

2517 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);

2518 Value *AdjustedIndex = HVC.createHvxIntrinsic(

2519 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});

2520 LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");

2521

2523 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2525 AdjustedIndex},

2526 nullptr);

2527 } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {

2528

2529

2530

2531

2532

2535 if (cstDataVector) {

2536

2537

2538

2539 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);

2540 [[maybe_unused]] auto *StoreIndexes =

2541 Builder.CreateStore(cstDataVector, IndexesAlloca);

2542 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");

2544 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");

2545 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);

2546 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");

2547

2549 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2550 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");

2551

2553 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2554 {ResultAlloca, CastedSrc,

2556 nullptr);

2558 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");

2559 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2560 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");

2561 In.replaceAllUsesWith(LoadedResult);

2562 }

2563 } else {

2564

2566 if (!Src)

2567 return nullptr;

2569

2571 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2572 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");

2573

2575 if (!DstLoad)

2576 return nullptr;

2577 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");

2579 if (!Ptr)

2580 return nullptr;

2582

2584 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,

2586 Indexes},

2587 nullptr);

2588 }

2589 return Gather;

2590 } else if (Qual == HvxIdioms::HEX_Scatter) {

2591

2592

2593

2594

2595 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);

2597 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2598 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");

2599 Value *CastIndex =

2601 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");

2602

2604 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2606 CastIndex},

2607 nullptr);

2609 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");

2610 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2611 In.replaceAllUsesWith(LoadedResult);

2612 } else if (Qual == HvxIdioms::HEX_Gather) {

2613

2614

2617 if (cstDataVector) {

2618

2619 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);

2620

2621 [[maybe_unused]] auto *StoreIndexes =

2622 Builder.CreateStore(cstDataVector, IndexesAlloca);

2623 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");

2625 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");

2626 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);

2627 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca

2628 << "\n AddressSpace: "

2630

2632 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2633 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");

2634

2636 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2637 {ResultAlloca, CastedSrc,

2639 nullptr);

2641 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");

2642 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2643 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");

2644 In.replaceAllUsesWith(LoadedResult);

2645 }

2646 }

2647 } else if (Qual == HvxIdioms::LLVM_Gather) {

2648

2649 errs() << " Underimplemented vgather to vgather sequence\n";

2650 return nullptr;

2651 } else

2653

2654 return Gather;

2655}

2656

2657auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,

2658 const FxpOp &Op) const -> Value * {

2659 assert(Op.X.Val->getType() == Op.Y.Val->getType());

2661 unsigned Width = InpTy->getScalarSizeInBits();

2662 bool Rounding = Op.RoundAt.has_value();

2663

2664 if (Op.RoundAt || *Op.RoundAt == Op.Frac - 1) {

2665

2667 Value *QMul = nullptr;

2668 if (Width == 16) {

2669 QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);

2670 } else if (Width == 32) {

2671 QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);

2672 }

2673 if (QMul != nullptr)

2674 return QMul;

2675 }

2676 }

2677

2678 assert(Width >= 32 || isPowerOf2_32(Width));

2679 assert(Width < 32 || Width % 32 == 0);

2680

2681

2682 if (Width < 32) {

2683 if (Width < 16)

2684 return nullptr;

2685

2686

2687

2688 assert(Width == 16);

2689 assert(Op.Frac != 0 && "Unshifted mul should have been skipped");

2690 if (Op.Frac == 16) {

2691

2692 if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))

2693 return MulH;

2694 }

2695

2696 Value *Prod32 = createMul16(Builder, Op.X, Op.Y);

2697 if (Rounding) {

2698 Value *RoundVal = ConstantInt::get(Prod32->getType(), 1 << *Op.RoundAt);

2699 Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");

2700 }

2701

2702 Value *ShiftAmt = ConstantInt::get(Prod32->getType(), Op.Frac);

2704 ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")

2705 : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");

2706 return Builder.CreateTrunc(Shifted, InpTy, "trn");

2707 }

2708

2709

2710

2711

2712

2713 auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, 32);

2714 auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, 32);

2715 auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);

2716

2717 auto *HvxWordTy = cast(WordP.front()->getType());

2718

2719

2720 if (Op.RoundAt.has_value()) {

2723 RoundV[*Op.RoundAt / 32] =

2724 ConstantInt::get(HvxWordTy, 1 << (*Op.RoundAt % 32));

2725 WordP = createAddLong(Builder, WordP, RoundV);

2726 }

2727

2728

2729

2730

2731 unsigned SkipWords = Op.Frac / 32;

2732 Constant *ShiftAmt = ConstantInt::get(HvxWordTy, Op.Frac % 32);

2733

2734 for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {

2735 int Src = Dst + SkipWords;

2737 if (Src + 1 < End) {

2738 Value *Hi = WordP[Src + 1];

2739 WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,

2740 {Hi, Lo, ShiftAmt},

2741 nullptr, "int");

2742 } else {

2743

2744 WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");

2745 }

2746 }

2747 if (SkipWords != 0)

2748 WordP.resize(WordP.size() - SkipWords);

2749

2750 return HVC.joinVectorElements(Builder, WordP, Op.ResTy);

2751}

2752

2753auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,

2754 bool Rounding) const -> Value * {

2755 assert(X.Val->getType() == Y.Val->getType());

2756 assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));

2758

2759

2761 return nullptr;

2762

2763 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);

2764 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),

2765 {X.Val, Y.Val});

2766}

2767

2768auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,

2769 bool Rounding) const -> Value * {

2770 Type *InpTy = X.Val->getType();

2771 assert(InpTy == Y.Val->getType());

2774

2776 return nullptr;

2777

2778 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);

2779 auto V6_vmpyo_acc = Rounding

2780 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)

2783 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});

2784 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,

2785 {V1, X.Val, Y.Val});

2786}

2787

2788auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,

2789 Value *CarryIn) const

2790 -> std::pair<Value *, Value *> {

2791 assert(X->getType() == Y->getType());

2793 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {

2796 if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {

2797 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);

2798 } else {

2799 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);

2800 if (CarryIn == nullptr)

2802 Args.push_back(CarryIn);

2803 }

2804 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,

2805 nullptr, Args);

2808 return {Result, CarryOut};

2809 }

2810

2811

2812

2813

2814 Value *Result1 = X;

2815 if (CarryIn != nullptr) {

2816 unsigned Width = VecTy->getScalarSizeInBits();

2817 uint32_t Mask = 1;

2818 if (Width < 32) {

2819 for (unsigned i = 0, e = 32 / Width; i != e; ++i)

2820 Mask = (Mask << Width) | 1;

2821 }

2822 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);

2823 Value *ValueIn =

2824 HVC.createHvxIntrinsic(Builder, V6_vandqrt, nullptr,

2825 {CarryIn, HVC.getConstInt(Mask)});

2826 Result1 = Builder.CreateAdd(X, ValueIn, "add");

2827 }

2828

2832 return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};

2833}

2834

2835auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const

2838 std::tie(X, Y) = canonSgn(X, Y);

2839

2841 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);

2842 } else if (Y.Sgn == Signed) {

2843

2844 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);

2845 } else {

2846 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);

2847 }

2848

2849

2851 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});

2852

2853 return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));

2854}

2855

2856auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const

2858 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), false);

2859

2862 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);

2863 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,

2864 {X.Val, Y.Val});

2865 }

2866 }

2867

2868 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), true);

2870 Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");

2871 unsigned Len = HVC.length(HvxP16Ty) / 2;

2872

2873 SmallVector<int, 128> PickOdd(Len);

2874 for (int i = 0; i != static_cast<int>(Len); ++i)

2875 PickOdd[i] = 2 * i + 1;

2876

2878 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");

2879}

2880

2881auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const

2882 -> std::pair<Value *, Value *> {

2883 assert(X.Val->getType() == Y.Val->getType());

2884 assert(X.Val->getType() == HvxI32Ty);

2885

2887 std::tie(X, Y) = canonSgn(X, Y);

2888

2890 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;

2891 } else if (Y.Sgn == Signed) {

2892 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;

2893 } else {

2894 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;

2895 }

2896

2897 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,

2898 {X.Val, Y.Val}, {HvxI32Ty});

2901 return {Lo, Hi};

2902}

2903

2904auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,

2907 assert(WordX.size() == WordY.size());

2908 unsigned Idx = 0, Length = WordX.size();

2910

2911 while (Idx != Length) {

2912 if (HVC.isZero(WordX[Idx]))

2913 Sum[Idx] = WordY[Idx];

2914 else if (HVC.isZero(WordY[Idx]))

2915 Sum[Idx] = WordX[Idx];

2916 else

2917 break;

2918 ++Idx;

2919 }

2920

2921 Value *Carry = nullptr;

2922 for (; Idx != Length; ++Idx) {

2923 std::tie(Sum[Idx], Carry) =

2924 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);

2925 }

2926

2927

2928 return Sum;

2929}

2930

2931auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,

2935

2936

2937

2938 for (int i = 0, e = WordX.size(); i != e; ++i) {

2939 for (int j = 0, f = WordY.size(); j != f; ++j) {

2940

2941 Signedness SX = (i + 1 == e) ? SgnX : Unsigned;

2942 Signedness SY = (j + 1 == f) ? SgnY : Unsigned;

2943 auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});

2944 Products[i + j + 0].push_back(Lo);

2945 Products[i + j + 1].push_back(Hi);

2946 }

2947 }

2948

2950

2951 auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {

2953 return Zero;

2956 return Last;

2957 };

2958

2959 for (int i = 0, e = Products.size(); i != e; ++i) {

2960 while (Products[i].size() > 1) {

2961 Value *Carry = nullptr;

2962 for (int j = i; j != e; ++j) {

2963 auto &ProdJ = Products[j];

2964 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),

2965 pop_back_or_zero(ProdJ), Carry);

2966 ProdJ.insert(ProdJ.begin(), Sum);

2967 Carry = CarryOut;

2968 }

2969 }

2970 }

2971

2973 for (auto &P : Products) {

2974 assert(P.size() == 1 && "Should have been added together");

2976 }

2977

2978 return WordP;

2979}

2980

2981auto HvxIdioms::run() -> bool {

2983

2984 for (BasicBlock &B : HVC.F) {

2985 for (auto It = B.rbegin(); It != B.rend(); ++It) {

2986 if (auto Fxm = matchFxpMul(*It)) {

2987 Value *New = processFxpMul(*It, *Fxm);

2988

2990 if (!New)

2991 continue;

2993 It->replaceAllUsesWith(New);

2995 It = StartOver ? B.rbegin()

2998 } else if (matchGather(*It)) {

2999 Value *New = processVGather(*It);

3000 if (!New)

3001 continue;

3003

3004 It->eraseFromParent();

3008 } else if (matchScatter(*It)) {

3009 Value *New = processVScatter(*It);

3010 if (!New)

3011 continue;

3012 LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");

3013

3014 It->eraseFromParent();

3018 }

3019 }

3020 }

3021

3023}

3024

3025

3026

3027auto HexagonVectorCombine::run() -> bool {

3028 if (DumpModule)

3029 dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();

3030

3032 if (HST.useHVXOps()) {

3033 if (VAEnabled)

3034 Changed |= AlignVectors(*this).run();

3035 if (VIEnabled)

3036 Changed |= HvxIdioms(*this).run();

3037 }

3038

3039 if (DumpModule) {

3040 dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")

3041 << " after HexagonVectorCombine\n"

3042 << *F.getParent();

3043 }

3045}

3046

3047auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {

3049}

3050

3051auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {

3052 assert(ElemCount >= 0);

3053 IntegerType *ByteTy = Type::getInt8Ty(F.getContext());

3054 if (ElemCount == 0)

3055 return ByteTy;

3056 return VectorType::get(ByteTy, ElemCount, false);

3057}

3058

3059auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {

3060 assert(ElemCount >= 0);

3061 IntegerType *BoolTy = Type::getInt1Ty(F.getContext());

3062 if (ElemCount == 0)

3063 return BoolTy;

3064 return VectorType::get(BoolTy, ElemCount, false);

3065}

3066

3067auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const

3068 -> ConstantInt * {

3070}

3071

3072auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {

3074 return C->isZeroValue();

3075 return false;

3076}

3077

3078auto HexagonVectorCombine::getIntValue(const Value *Val) const

3079 -> std::optional {

3081 return CI->getValue();

3082 return std::nullopt;

3083}

3084

3085auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {

3087}

3088

3089auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {

3091}

3092

3093auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {

3095}

3096

3097auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const

3100 assert(ETy.isSimple() && "Invalid HVX element type");

3101

3102 assert(HST.isHVXElementType(ETy.getSimpleVT(), false) &&

3103 "Invalid HVX element type");

3104 unsigned HwLen = HST.getVectorLength();

3105 unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();

3106 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,

3107 false);

3108}

3109

3110auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const

3111 -> int {

3112 return getSizeOf(Val->getType(), Kind);

3113}

3114

3115auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const

3116 -> int {

3117 auto *NcTy = const_cast<Type *>(Ty);

3118 switch (Kind) {

3120 return DL.getTypeStoreSize(NcTy).getFixedValue();

3122 return DL.getTypeAllocSize(NcTy).getFixedValue();

3123 }

3125}

3126

3127auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {

3128

3129

3130 if (HST.isTypeForHVX(Ty))

3131 return HST.getVectorLength();

3132 return DL.getABITypeAlign(Ty).value();

3133}

3134

3135auto HexagonVectorCombine::length(Value *Val) const -> size_t {

3136 return length(Val->getType());

3137}

3138

3139auto HexagonVectorCombine::length(Type *Ty) const -> size_t {

3141 assert(VecTy && "Must be a vector type");

3142 return VecTy->getElementCount().getFixedValue();

3143}

3144

3145auto HexagonVectorCombine::simplify(Value *V) const -> Value * {

3147 SimplifyQuery Q(DL, &TLI, &DT, &AC, In);

3149 }

3150 return nullptr;

3151}

3152

3153

3154auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,

3156 int Where) const -> Value * {

3157 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));

3158 int SrcLen = getSizeOf(Src);

3159 int DstLen = getSizeOf(Dst);

3160 assert(0 <= Start && Start + Length <= SrcLen);

3161 assert(0 <= Where && Where + Length <= DstLen);

3162

3165 Value *P2Src = vresize(Builder, Src, P2Len, Poison);

3166 Value *P2Dst = vresize(Builder, Dst, P2Len, Poison);

3167

3169 for (int i = 0; i != P2Len; ++i) {

3170

3171

3172 SMask[i] =

3173 (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;

3174 }

3175

3177 return vresize(Builder, P2Insert, DstLen, Poison);

3178}

3179

3180auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,

3182 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");

3184 return Hi;

3185 int VecLen = getSizeOf(Hi);

3186 if (auto IntAmt = getIntValue(Amt))

3187 return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),

3188 VecLen);

3189

3190 if (HST.isTypeForHVX(Hi->getType())) {

3191 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&

3192 "Expecting an exact HVX type");

3193 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),

3194 Hi->getType(), {Hi, Lo, Amt});

3195 }

3196

3197 if (VecLen == 4) {

3202 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");

3203 return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");

3204 }

3205 if (VecLen == 8) {

3207 return vralignb(Builder, Lo, Hi, Sub);

3208 }

3210}

3211

3212auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,

3214 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");

3216 return Lo;

3217 int VecLen = getSizeOf(Lo);

3218 if (auto IntAmt = getIntValue(Amt))

3219 return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);

3220

3221 if (HST.isTypeForHVX(Lo->getType())) {

3222 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&

3223 "Expecting an exact HVX type");

3224 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),

3225 Lo->getType(), {Hi, Lo, Amt});

3226 }

3227

3228 if (VecLen == 4) {

3232 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");

3233 return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");

3234 }

3235 if (VecLen == 8) {

3236 Type *Int64Ty = Type::getInt64Ty(F.getContext());

3240 {Hi64, Lo64, Amt},

3241 nullptr, "cup");

3243 }

3245}

3246

3247

3248auto HexagonVectorCombine::concat(IRBuilderBase &Builder,

3250 assert(!Vecs.empty());

3252 std::vector<Value *> Work[2];

3253 int ThisW = 0, OtherW = 1;

3254

3255 Work[ThisW].assign(Vecs.begin(), Vecs.end());

3256 while (Work[ThisW].size() > 1) {

3258 SMask.resize(length(Ty) * 2);

3259 std::iota(SMask.begin(), SMask.end(), 0);

3260

3261 Work[OtherW].clear();

3262 if (Work[ThisW].size() % 2 != 0)

3264 for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {

3266 Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");

3267 Work[OtherW].push_back(Joined);

3268 }

3270 }

3271

3272

3273

3274

3275 SMask.resize(Vecs.size() * length(Vecs.front()->getType()));

3276 std::iota(SMask.begin(), SMask.end(), 0);

3279}

3280

3281auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,

3282 int NewSize, Value *Pad) const -> Value * {

3285 assert(ValTy->getElementType() == Pad->getType());

3286

3287 int CurSize = length(ValTy);

3288 if (CurSize == NewSize)

3289 return Val;

3290

3291 if (CurSize > NewSize)

3292 return getElementRange(Builder, Val, Val, 0, NewSize);

3293

3294 SmallVector<int, 128> SMask(NewSize);

3295 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);

3296 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);

3299}

3300

3301auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,

3303

3304

3305

3307

3308 Type *FromSTy = FromTy->getScalarType();

3309 Type *ToSTy = ToTy->getScalarType();

3310 if (FromSTy == ToSTy)

3311 return Mask;

3312

3313 int FromSize = getSizeOf(FromSTy);

3314 int ToSize = getSizeOf(ToSTy);

3315 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);

3316

3318 int FromCount = length(MaskTy);

3319 int ToCount = (FromCount * FromSize) / ToSize;

3320 assert((FromCount * FromSize) % ToSize == 0);

3321

3322 auto *FromITy = getIntTy(FromSize * 8);

3323 auto *ToITy = getIntTy(ToSize * 8);

3324

3325

3326

3328 Mask, VectorType::get(FromITy, FromCount, false), "sxt");

3330 Ext, VectorType::get(ToITy, ToCount, false), "cst");

3332 Cast, VectorType::get(getBoolTy(), ToCount, false), "trn");

3333}

3334

3335

3336auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const

3339 if (ScalarTy == getBoolTy())

3340 return Val;

3341

3342 Value *Bytes = vbytes(Builder, Val);

3344 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");

3345

3346

3347 return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");

3348}

3349

3350

3351auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const

3354 if (ScalarTy == getByteTy())

3355 return Val;

3356

3357 if (ScalarTy != getBoolTy())

3358 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");

3359

3361 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");

3362 return Builder.CreateSExt(Val, getByteTy(), "sxt");

3363}

3364

3365auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,

3366 unsigned Start, unsigned Length) const

3369 return getElementRange(Builder, Val, Val, Start, Length);

3370}

3371

3372auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const

3374 size_t Len = length(Val);

3375 assert(Len % 2 == 0 && "Length should be even");

3376 return subvector(Builder, Val, 0, Len / 2);

3377}

3378

3379auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const

3381 size_t Len = length(Val);

3382 assert(Len % 2 == 0 && "Length should be even");

3383 return subvector(Builder, Val, Len / 2, Len / 2);

3384}

3385

3386auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,

3388 assert(Val0->getType() == Val1->getType());

3389 int Len = length(Val0);

3390 SmallVector<int, 128> Mask(2 * Len);

3391

3392 for (int i = 0; i != Len; ++i) {

3393 Mask[i] = 2 * i;

3394 Mask[i + Len] = 2 * i + 1;

3395 }

3397}

3398

3399auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,

3401 assert(Val0->getType() == Val1->getType());

3402 int Len = length(Val0);

3403 SmallVector<int, 128> Mask(2 * Len);

3404

3405 for (int i = 0; i != Len; ++i) {

3406 Mask[2 * i + 0] = i;

3407 Mask[2 * i + 1] = i + Len;

3408 }

3410}

3411

3412auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,

3418 auto getCast = [&](IRBuilderBase &Builder, Value *Val,

3420 Type *SrcTy = Val->getType();

3421 if (SrcTy == DestTy)

3422 return Val;

3423

3424

3425

3426 assert(HST.isTypeForHVX(SrcTy, true));

3427

3428 Type *BoolTy = Type::getInt1Ty(F.getContext());

3430 return Builder.CreateBitCast(Val, DestTy, "cst");

3431

3432

3433 unsigned HwLen = HST.getVectorLength();

3434 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast

3435 : Intrinsic::hexagon_V6_pred_typecast_128B;

3436 return Builder.CreateIntrinsic(TC, {DestTy, Val->getType()}, {Val},

3437 nullptr, "cup");

3438 };

3439

3443

3445 for (int i = 0, e = Args.size(); i != e; ++i) {

3447 Type *T = IntrTy->getParamType(i);

3448 if (A->getType() != T) {

3449 IntrArgs.push_back(getCast(Builder, A, T));

3450 } else {

3452 }

3453 }

3454 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";

3455 CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);

3456

3460

3462 if (RetTy == nullptr || CallTy == RetTy)

3463 return Call;

3464

3465 assert(HST.isTypeForHVX(CallTy, true));

3466 return getCast(Builder, Call, RetTy);

3467}

3468

3469auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,

3471 unsigned ToWidth) const

3473

3474

3475

3476

3477

3478

3479

3480

3481

3482

3483

3484

3486 assert(VecTy->getElementType()->isIntegerTy());

3487 unsigned FromWidth = VecTy->getScalarSizeInBits();

3489 assert(ToWidth <= FromWidth && "Breaking up into wider elements?");

3490 unsigned NumResults = FromWidth / ToWidth;

3491

3494 unsigned Length = length(VecTy);

3495

3496

3497

3498 auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {

3499

3500

3501

3502 if (Begin + 1 == End)

3503 return;

3504

3507

3508 auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);

3510

3511 Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));

3512

3513 unsigned Half = (Begin + End) / 2;

3514 Results[Begin] = sublo(Builder, Res);

3515 Results[Half] = subhi(Builder, Res);

3516

3517 splitFunc(Begin, Half, splitFunc);

3518 splitFunc(Half, End, splitFunc);

3519 };

3520

3521 splitInHalf(0, NumResults, splitInHalf);

3523}

3524

3525auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,

3527 VectorType *ToType) const

3529 assert(ToType->getElementType()->isIntegerTy());

3530

3531

3532

3533

3534

3535

3536

3537

3539

3540 unsigned ToWidth = ToType->getScalarSizeInBits();

3541 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();

3542 assert(Width <= ToWidth);

3544 unsigned Length = length(Inputs.front()->getType());

3545

3546 unsigned NeedInputs = ToWidth / Width;

3547 if (Inputs.size() != NeedInputs) {

3548

3549

3552 Last, ConstantInt::get(Last->getType(), Width - 1), "asr");

3553 Inputs.resize(NeedInputs, Sign);

3554 }

3555

3556 while (Inputs.size() > 1) {

3557 Width *= 2;

3558 auto *VTy = VectorType::get(getIntTy(Width), Length, false);

3559 for (int i = 0, e = Inputs.size(); i < e; i += 2) {

3560 Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);

3561 Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");

3562 }

3563 Inputs.resize(Inputs.size() / 2);

3564 }

3565

3566 assert(Inputs.front()->getType() == ToType);

3567 return Inputs.front();

3568}

3569

3570auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,

3571 Value *Ptr1) const

3572 -> std::optional {

3573

3574 const SCEV *Scev0 = SE.getSCEV(Ptr0);

3575 const SCEV *Scev1 = SE.getSCEV(Ptr1);

3576 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);

3578 APInt V = Const->getAPInt();

3579 if (V.isSignedIntN(8 * sizeof(int)))

3580 return static_cast<int>(V.getSExtValue());

3581 }

3582

3585 ~Builder() {

3587 I->eraseFromParent();

3588 }

3589 SmallVector<Instruction *, 8> ToErase;

3590 };

3591

3592#define CallBuilder(B, F) \

3593 [&](auto &B_) { \

3594 Value *V = B_.F; \

3595 if (auto *I = dyn_cast(V)) \

3596 B_.ToErase.push_back(I); \

3597 return V; \

3598 }(B)

3599

3600 auto Simplify = [this](Value *V) {

3602 return S;

3603 return V;

3604 };

3605

3606 auto StripBitCast = [](Value *V) {

3608 V = C->getOperand(0);

3609 return V;

3610 };

3611

3612 Ptr0 = StripBitCast(Ptr0);

3613 Ptr1 = StripBitCast(Ptr1);

3615 return std::nullopt;

3616

3619 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())

3620 return std::nullopt;

3621 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())

3622 return std::nullopt;

3623

3624 Builder B(Gep0->getParent());

3625 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);

3626

3627

3628 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)

3629 return std::nullopt;

3630

3631 Value *Idx0 = Gep0->getOperand(1);

3632 Value *Idx1 = Gep1->getOperand(1);

3633

3634

3636 Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))

3637 return Diff->getSExtValue() * Scale;

3638

3639 KnownBits Known0 = getKnownBits(Idx0, Gep0);

3640 KnownBits Known1 = getKnownBits(Idx1, Gep1);

3642 if (Unknown.isAllOnes())

3643 return std::nullopt;

3644

3646 Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));

3647 Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));

3648 Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));

3649 int Diff0 = 0;

3651 Diff0 = C->getSExtValue();

3652 } else {

3653 return std::nullopt;

3654 }

3655

3657 Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));

3658 Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));

3659 Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));

3660 int Diff1 = 0;

3662 Diff1 = C->getSExtValue();

3663 } else {

3664 return std::nullopt;

3665 }

3666

3667 return (Diff0 + Diff1) * Scale;

3668

3669#undef CallBuilder

3670}

3671

3672auto HexagonVectorCombine::getNumSignificantBits(const Value *V,

3673 const Instruction *CtxI) const

3674 -> unsigned {

3676}

3677

3678auto HexagonVectorCombine::getKnownBits(const Value *V,

3679 const Instruction *CtxI) const

3680 -> KnownBits {

3682}

3683

3684auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {

3685 if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||

3686 In.isFenceLike() || In.mayReadOrWriteMemory()) {

3687 return false;

3688 }

3690 return false;

3691 return true;

3692}

3693

3694template

3695auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,

3697 const T &IgnoreInsts) const

3698 -> bool {

3699 auto getLocOrNone =

3700 [this](const Instruction &I) -> std::optional {

3702 switch (II->getIntrinsicID()) {

3703 case Intrinsic::masked_load:

3705 case Intrinsic::masked_store:

3707 }

3708 }

3710 };

3711

3712

3715

3717 return false;

3718

3720 return true;

3721 bool MayWrite = In.mayWriteToMemory();

3722 auto MaybeLoc = getLocOrNone(In);

3723

3724 auto From = In.getIterator();

3725 if (From == To)

3726 return true;

3727 bool MoveUp = (To != Block.end() && To->comesBefore(&In));

3729 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);

3730 for (auto It = Range.first; It != Range.second; ++It) {

3731 const Instruction &I = *It;

3732 if (llvm::is_contained(IgnoreInsts, &I))

3733 continue;

3734

3735 if (auto *II = dyn_cast(&I)) {

3736 if (II->getIntrinsicID() == Intrinsic::assume)

3737 continue;

3738 }

3739

3740 if (I.mayThrow())

3741 return false;

3743 if (!CB->hasFnAttr(Attribute::WillReturn))

3744 return false;

3745 if (!CB->hasFnAttr(Attribute::NoSync))

3746 return false;

3747 }

3748 if (I.mayReadOrWriteMemory()) {

3749 auto MaybeLocI = getLocOrNone(I);

3750 if (MayWrite || I.mayWriteToMemory()) {

3751 if (!MaybeLoc || !MaybeLocI)

3752 return false;

3753 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))

3754 return false;

3755 }

3756 }

3757 }

3758 return true;

3759}

3760

3761auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {

3763 return VecTy->getElementType() == getByteTy();

3764 return false;

3765}

3766

3767auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,

3770 assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));

3771 SmallVector<int, 128> SMask(Length);

3772 std::iota(SMask.begin(), SMask.end(), Start);

3774}

3775

3776

3777

3778namespace {

3779class HexagonVectorCombineLegacy : public FunctionPass {

3780public:

3781 static char ID;

3782

3783 HexagonVectorCombineLegacy() : FunctionPass(ID) {}

3784

3785 StringRef getPassName() const override { return "Hexagon Vector Combine"; }

3786

3787 void getAnalysisUsage(AnalysisUsage &AU) const override {

3790 AU.addRequired();

3791 AU.addRequired();

3792 AU.addRequired();

3793 AU.addRequired();

3795 FunctionPass::getAnalysisUsage(AU);

3796 }

3797

3799 if (skipFunction(F))

3800 return false;

3801 AliasAnalysis &AA = getAnalysis().getAAResults();

3802 AssumptionCache &AC =

3803 getAnalysis().getAssumptionCache(F);

3804 DominatorTree &DT = getAnalysis().getDomTree();

3805 ScalarEvolution &SE = getAnalysis().getSE();

3806 TargetLibraryInfo &TLI =

3807 getAnalysis().getTLI(F);

3808 auto &TM = getAnalysis().getTM();

3809 HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);

3810 return HVC.run();

3811 }

3812};

3813}

3814

3815char HexagonVectorCombineLegacy::ID = 0;

3816

3818 "Hexagon Vector Combine", false, false)

3827

3829 return new HexagonVectorCombineLegacy();

3830}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU Prepare AGPR Alloc

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Function Alias Analysis Results

static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

This file defines the DenseMap class.

static bool runOnFunction(Function &F, bool PostInlining)

static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))

static Value * locateIndexesFromIntrinsic(Instruction *In)

Definition HexagonVectorCombine.cpp:2047

Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)

Definition HexagonVectorCombine.cpp:1918

Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)

Definition HexagonVectorCombine.cpp:2079

static Value * locateIndexesFromGEP(Value *In)

Definition HexagonVectorCombine.cpp:2019

#define CallBuilder(B, F)

Value * getPointer(Value *Ptr)

Definition HexagonVectorCombine.cpp:1852

#define DEFAULT_HVX_VTCM_PAGE_SIZE

Definition HexagonVectorCombine.cpp:63

static Value * locateAddressFromIntrinsic(Instruction *In)

Definition HexagonVectorCombine.cpp:1957

static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)

Definition HexagonVectorCombine.cpp:1865

Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)

Definition HexagonVectorCombine.cpp:2094

bool isArithmetic(unsigned Opc)

Definition HexagonVectorCombine.cpp:1834

static Type * getIndexType(Value *In)

Definition HexagonVectorCombine.cpp:2003

GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)

Definition HexagonVectorCombine.cpp:1940

Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)

Definition HexagonVectorCombine.cpp:2064

iv Induction Variable Users

static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)

static bool isUndef(const MachineInstr &MI)

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Remove Loads Into Fake Uses

static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)

This file defines the SmallVector class.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

Target-Independent Code Generator Pass Configuration Options pass.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

bool isAllOnes() const

Determine if all bits are set. This is true for zero-width values.

APInt ashr(unsigned ShiftAmt) const

Arithmetic right-shift function.

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

unsigned getAddressSpace() const

Return the address space for the allocation.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

InstListType::const_iterator const_iterator

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

AttributeList getAttributes() const

Return the attributes for this call.

@ ICMP_ULT

unsigned less than

static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)

get() constructors - Return a constant with vector type with an element count and element type matchi...

This is the shared class of boolean and integer constants.

static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

This is an important base class in LLVM.

static LLVM_ABI Constant * getAllOnesValue(Type *Ty)

static LLVM_ABI Constant * getNullValue(Type *Ty)

Constructor to create a '0' constant of arbitrary type.

A parsed version of the target data layout string in and methods for querying it.

LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const

Returns the offset in bytes between successive objects of the specified type, including alignment pad...

iterator_range< iterator > children()

DomTreeNodeBase< NodeT > * getRootNode()

getRootNode - This returns the entry node for the CFG of the function.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

FunctionPass class - This class is used to implement most global optimizations.

FunctionType * getFunctionType() const

Returns the FunctionType for me.

const BasicBlock & back() const

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

Value * getPointerOperand()

bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const

bool useHVXV62Ops() const

bool useHVXV69Ops() const

unsigned getVectorLength() const

bool useHVXV66Ops() const

bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const

Intrinsic::ID getIntrinsicId(unsigned Opc) const

Common base class shared among various IRBuilders.

AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")

LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)

const char * getOpcodeName() const

Class to represent integer types.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

bool doesNotAccessMemory() const

Whether this function accesses no memory.

bool onlyAccessesInaccessibleMem() const

Whether this function only (at most) accesses inaccessible memory.

static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)

static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)

Return a location representing a particular argument of a call.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

The main scalar evolution driver.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Provides information about what library functions are available for the current target.

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

Target-Independent Code Generator Pass Configuration Options.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Abstract Attribute helper functions.

Rounding

Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

Predicate

Predicate - These are "(BI << 5) | BO" for various predicates.

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)

Matches logical shift operations.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

@ Undef

Value of the register doesn't matter.

initializer< Ty > init(const Ty &Val)

@ User

could "use" a pointer

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI Instruction * getTerminator() const

LLVM_ABI Instruction & front() const

This is an optimization pass for GlobalISel generic memory operations.

FunctionPass * createHexagonVectorCombineLegacyPass()

Definition HexagonVectorCombine.cpp:3828

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

MemoryEffectsBase< IRMemLocation > MemoryEffects

Summary of how a function affects memory in the program.

LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)

Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.

unsigned Log2_64(uint64_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)

Returns a concatenated range across two or more ranges.

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)

See if we can compute a simplified version of this instruction.

DomTreeNodeBase< BasicBlock > DomTreeNode

auto reverse(ContainerTy &&C)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

@ And

Bitwise or logical AND of integers.

@ Sub

Subtraction of integers.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Get the upper bound on bit size for this Value Op as a signed integer.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

AAResults AliasAnalysis

Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.

LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)

Returns true if the result or effects of the given instructions I depend values not reachable through...

MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)

MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.