LLVM: lib/Target/Hexagon/HexagonVectorCombine.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

34#include "llvm/IR/IntrinsicsHexagon.h"

45

49

50#include

51#include

52#include

53#include

54#include

55#include

56#include

57

58#define DEBUG_TYPE "hexagon-vc"

59

60

61

62

63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072

64

65using namespace llvm;

66

67namespace {

72

77

78class HexagonVectorCombine {

79public:

83 : F(F_), DL(F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),

84 SE(SE_), TLI(TLI_),

86

87 bool run();

88

89

91

92

93 Type *getByteTy(int ElemCount = 0) const;

94

95

96 Type *getBoolTy(int ElemCount = 0) const;

97

99

100 std::optional getIntValue(const Value *Val) const;

101

103

105

106 bool isTrue(const Value *Val) const;

107

108 bool isFalse(const Value *Val) const;

109

110

111 VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;

112

113 enum SizeKind {

114 Store,

115 Alloc,

116 };

117 int getSizeOf(const Value *Val, SizeKind Kind = Store) const;

118 int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;

119 int getTypeAlignment(Type *Ty) const;

120 size_t length(Value *Val) const;

121 size_t length(Type *Ty) const;

122

125 Constant *getConstSplat(Type *Ty, int Val) const;

126

128

130 int Length, int Where) const;

132 Value *Amt) const;

134 Value *Amt) const;

137 Value *Pad) const;

139 Type *ToTy) const;

143 unsigned Length) const;

148

154 unsigned ToWidth) const;

157

158 std::optional calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;

159

160 unsigned getNumSignificantBits(const Value *V,

161 const Instruction *CtxI = nullptr) const;

163 const Instruction *CtxI = nullptr) const;

164

165 bool isSafeToClone(const Instruction &In) const;

166

167 template <typename T = std::vector<Instruction *>>

168 bool isSafeToMoveBeforeInBB(const Instruction &In,

170 const T &IgnoreInsts = {}) const;

171

172

173 [[maybe_unused]] bool isByteVecTy(Type *Ty) const;

174

183

184private:

186 int Start, int Length) const;

187};

188

189class AlignVectors {

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204public:

205 AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}

206

207 bool run();

208

209private:

210 using InstList = std::vector<Instruction *>;

212

213 struct AddrInfo {

214 AddrInfo(const AddrInfo &) = default;

217 : Inst(I), Addr(A), ValTy(T), HaveAlign(H),

218 NeedAlign(HVC.getTypeAlignment(ValTy)) {}

219 AddrInfo &operator=(const AddrInfo &) = default;

220

221

227 int Offset = 0;

228

229 };

230 using AddrList = std::vector;

231

232 struct InstrLess {

234 return A->comesBefore(B);

235 }

236 };

237 using DepList = std::set<Instruction *, InstrLess>;

238

239 struct MoveGroup {

240 MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)

241 : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}

242 MoveGroup() = default;

243 Instruction *Base;

244 InstList Main;

245 InstList Deps;

246 InstMap Clones;

247 bool IsHvx;

248 bool IsLoad;

249 };

250 using MoveList = std::vector;

251

252 struct ByteSpan {

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268 struct Segment {

269

270 Segment(Value *Val, int Begin, int Len)

271 : Val(Val), Start(Begin), Size(Len) {}

272 Segment(const Segment &Seg) = default;

273 Segment &operator=(const Segment &Seg) = default;

274 Value *Val;

275 int Start;

276 int Size;

277 };

278

280 Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}

281 Block(Value *Val, int Off, int Len, int Pos)

282 : Seg(Val, Off, Len), Pos(Pos) {}

284 Block &operator=(const Block &Blk) = default;

285 Segment Seg;

286 int Pos;

287 };

288

289 int extent() const;

290 ByteSpan section(int Start, int Length) const;

291 ByteSpan &shift(int Offset);

293

294 int size() const { return Blocks.size(); }

295 Block &operator[](int i) { return Blocks[i]; }

296 const Block &operator[](int i) const { return Blocks[i]; }

297

298 std::vector Blocks;

299

301 iterator begin() { return Blocks.begin(); }

302 iterator end() { return Blocks.end(); }

304 const_iterator begin() const { return Blocks.begin(); }

306 };

307

308 std::optional getAddrInfo(Instruction &In) const;

309 bool isHvx(const AddrInfo &AI) const;

310

311 [[maybe_unused]] bool isSectorTy(Type *Ty) const;

312

315 Value *getPassThrough(Value *Val) const;

316

318 int Adjust,

319 const InstMap &CloneMap = InstMap()) const;

321 int Alignment,

322 const InstMap &CloneMap = InstMap()) const;

323

328 int Alignment,

330

335 int Alignment,

337

344

346 bool createAddressGroups();

347 MoveList createLoadGroups(const AddrList &Group) const;

348 MoveList createStoreGroups(const AddrList &Group) const;

349 bool moveTogether(MoveGroup &Move) const;

350 template

352

353 void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,

354 int ScLen, Value *AlignVal, Value *AlignAddr) const;

355 void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,

356 int ScLen, Value *AlignVal, Value *AlignAddr) const;

357 bool realignGroup(const MoveGroup &Move) const;

358

360 int Alignment) const;

361

366

367 std::map<Instruction *, AddrList> AddrGroups;

368 const HexagonVectorCombine &HVC;

369};

370

372 const AlignVectors::AddrInfo &AI) {

373 OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';

374 OS << "Addr: " << *AI.Addr << '\n';

375 OS << "Type: " << *AI.ValTy << '\n';

376 OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';

377 OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';

378 OS << "Offset: " << AI.Offset;

379 return OS;

380}

381

383 const AlignVectors::MoveGroup &MG) {

384 OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");

385 OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';

386 OS << "Main\n";

388 OS << " " << *I << '\n';

389 OS << "Deps\n";

391 OS << " " << *I << '\n';

392 OS << "Clones\n";

393 for (auto [K, V] : MG.Clones) {

394 OS << " ";

395 K->printAsOperand(OS, false);

396 OS << "\t-> " << *V << '\n';

397 }

398 return OS;

399}

400

403 OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";

404 if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {

405 OS << "(self:" << B.Seg.Val << ')';

406 } else if (B.Seg.Val != nullptr) {

407 OS << *B.Seg.Val;

408 } else {

409 OS << "(null)";

410 }

411 return OS;

412}

413

415 const AlignVectors::ByteSpan &BS) {

416 OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';

417 for (const AlignVectors::ByteSpan::Block &B : BS)

418 OS << B << '\n';

419 OS << ']';

420 return OS;

421}

422

423class HvxIdioms {

424public:

425 enum DstQualifier {

426 Undefined = 0,

427 Arithmetic,

428 LdSt,

429 LLVM_Gather,

430 LLVM_Scatter,

431 HEX_Gather_Scatter,

432 HEX_Gather,

433 HEX_Scatter,

435 };

436

437 HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {

438 auto *Int32Ty = HVC.getIntTy(32);

439 HvxI32Ty = HVC.getHvxTy(Int32Ty, false);

440 HvxP32Ty = HVC.getHvxTy(Int32Ty, true);

441 }

442

443 bool run();

444

445private:

447

448

449

450

451 struct SValue {

453 Signedness Sgn;

454 };

455

456 struct FxpOp {

457 unsigned Opcode;

458 unsigned Frac;

459 SValue X, Y;

460

461 std::optional RoundAt;

463 };

464

466 -> std::pair<unsigned, Signedness>;

467 auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;

468

469 auto matchFxpMul(Instruction &In) const -> std::optional;

470 auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;

471

473 const FxpOp &Op) const -> Value *;

474 auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,

475 bool Rounding) const -> Value *;

476 auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,

477 bool Rounding) const -> Value *;

478

480 Value *CarryIn = nullptr) const

481 -> std::pair<Value *, Value *>;

482 auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;

483 auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const

485 auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const

486 -> std::pair<Value *, Value *>;

492

493 bool matchScatter(Instruction &In) const;

497

500 const HexagonVectorCombine &HVC;

501

503};

504

506 const HvxIdioms::FxpOp &Op) {

507 static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};

509 if (Op.RoundAt.has_value()) {

510 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {

511 OS << ":rnd";

512 } else {

513 OS << " + 1<<" << *Op.RoundAt;

514 }

515 }

516 OS << "\n X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"

517 << " Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;

518 return OS;

519}

520

521}

522

523namespace {

524

525template T *getIfUnordered(T *MaybeT) {

526 return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;

527}

530}

533}

536}

537

538#if !defined(_MSC_VER) || _MSC_VER >= 1926

539

540

541

542template <typename Pred, typename... Ts>

543void erase_if(std::map<Ts...> &map, Pred p)

544#else

545template <typename Pred, typename T, typename U>

546void erase_if(std::map<T, U> &map, Pred p)

547#endif

548{

549 for (auto i = map.begin(), e = map.end(); i != e;) {

550 if (p(*i))

551 i = map.erase(i);

552 else

553 i = std::next(i);

554 }

555}

556

557

558template <typename Pred, typename T> void erase_if(T &&container, Pred p) {

560}

561

562}

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596auto AlignVectors::ByteSpan::extent() const -> int {

597 if (size() == 0)

598 return 0;

599 int Min = Blocks[0].Pos;

600 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;

601 for (int i = 1, e = size(); i != e; ++i) {

602 Min = std::min(Min, Blocks[i].Pos);

603 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);

604 }

605 return Max - Min;

606}

607

608auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {

610 for (const ByteSpan::Block &B : Blocks) {

611 int L = std::max(B.Pos, Start);

612 int R = std::min(B.Pos + B.Seg.Size, Start + Length);

613 if (L < R) {

614

615 int Off = L > B.Pos ? L - B.Pos : 0;

616 Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);

617 }

618 }

620}

621

622auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {

623 for (Block &B : Blocks)

625 return *this;

626}

627

629 SmallVector<Value *, 8> Values(Blocks.size());

630 for (int i = 0, e = Blocks.size(); i != e; ++i)

631 Values[i] = Blocks[i].Seg.Val;

632 return Values;

633}

634

635auto AlignVectors::getAddrInfo(Instruction &In) const

636 -> std::optional {

638 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),

639 L->getAlign());

641 return AddrInfo(HVC, S, S->getPointerOperand(),

642 S->getValueOperand()->getType(), S->getAlign());

645 switch (ID) {

646 case Intrinsic::masked_load:

647 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),

648 II->getParamAlign(0).valueOrOne());

649 case Intrinsic::masked_store:

650 return AddrInfo(HVC, II, II->getArgOperand(1),

651 II->getArgOperand(0)->getType(),

652 II->getParamAlign(1).valueOrOne());

653 }

654 }

655 return std::nullopt;

656}

657

658auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {

660}

661

662auto AlignVectors::getPayload(Value *Val) const -> Value * {

666 ID = II->getIntrinsicID();

668 return In->getOperand(0);

669 }

670 return Val;

671}

672

673auto AlignVectors::getMask(Value *Val) const -> Value * {

675 switch (II->getIntrinsicID()) {

676 case Intrinsic::masked_load:

677 return II->getArgOperand(1);

678 case Intrinsic::masked_store:

679 return II->getArgOperand(2);

680 }

681 }

682

683 Type *ValTy = getPayload(Val)->getType();

685 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));

686 return HVC.getFullValue(HVC.getBoolTy());

687}

688

689auto AlignVectors::getPassThrough(Value *Val) const -> Value * {

691 if (II->getIntrinsicID() == Intrinsic::masked_load)

692 return II->getArgOperand(2);

693 }

695}

696

697auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,

698 Type *ValTy, int Adjust,

699 const InstMap &CloneMap) const

702 if (Instruction *New = CloneMap.lookup(I))

703 Ptr = New;

704 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust), "gep");

705}

706

707auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,

708 Type *ValTy, int Alignment,

709 const InstMap &CloneMap) const

713 for (auto [Old, New] : CloneMap)

714 I->replaceUsesOfWith(Old, New);

715 return I;

716 }

717 return V;

718 };

719 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");

720 Value *Mask = HVC.getConstInt(-Alignment);

721 Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");

722 return Builder.CreateIntToPtr(

723 And, PointerType::getUnqual(ValTy->getContext()), "itp");

724}

725

726auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,

727 Value *Predicate, int Alignment, Value *Mask,

730 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();

731

732 if (Predicate) {

734 "Expectning scalar predicate");

735 if (HVC.isFalse(Predicate))

737 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {

738 Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,

739 Alignment, MDSources);

740 return Builder.CreateSelect(Mask, Load, PassThru);

741 }

742

743 }

744 assert(!HVC.isUndef(Mask));

745 if (HVC.isZero(Mask))

746 return PassThru;

747 if (HVC.isTrue(Mask))

748 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);

749

751 Mask, PassThru, "mld");

754}

755

756auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,

757 Value *Ptr, int Alignment,

761 Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");

764}

765

766auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,

768 int Alignment,

772 "Predicates 'scalar' vector loads not yet supported");

774 assert(Predicate->getType()->isVectorTy() && "Expectning scalar predicate");

775 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);

776 if (HVC.isFalse(Predicate))

778 if (HVC.isTrue(Predicate))

779 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);

780

781 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);

782

783 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,

784 {Predicate, Ptr, HVC.getConstInt(0)}, {},

785 MDSources);

786}

787

788auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,

789 Value *Predicate, int Alignment, Value *Mask,

791 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))

793 assert(!Predicate || (Predicate->getType()->isVectorTy() &&

794 "Expectning scalar predicate"));

795 if (Predicate) {

796 if (HVC.isFalse(Predicate))

798 if (HVC.isTrue(Predicate))

800 }

801

802

803 if (HVC.isTrue(Mask)) {

804 if (Predicate) {

805 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,

806 MDSources);

807 }

808

809 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);

810 }

811

812

813 if (!Predicate) {

815 Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);

818 }

819

820

821

822 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,

823 Predicate, Alignment, MDSources);

824 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);

825 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,

826 MDSources);

827}

828

829auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,

830 Value *Ptr, int Alignment,

836}

837

838auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,

840 int Alignment,

844 "Predicates 'scalar' vector stores not yet supported");

846 if (HVC.isFalse(Predicate))

848 if (HVC.isTrue(Predicate))

849 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);

850

851 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);

852 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);

853

854 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,

855 {Predicate, Ptr, HVC.getConstInt(0), Val}, {},

856 MDSources);

857}

858

859auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const

860 -> DepList {

862 assert(In->getParent() == Parent &&

863 "Base and In should be in the same block");

864 assert(Base->comesBefore(In) && "Base should come before In");

865

866 DepList Deps;

867 std::deque<Instruction *> WorkQ = {In};

868 while (!WorkQ.empty()) {

870 WorkQ.pop_front();

871 if (D != In)

872 Deps.insert(D);

873 for (Value *Op : D->operands()) {

875 if (I->getParent() == Parent && Base->comesBefore(I))

876 WorkQ.push_back(I);

877 }

878 }

879 }

880 return Deps;

881}

882

883auto AlignVectors::createAddressGroups() -> bool {

884

885

886 AddrList WorkStack;

887

888 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {

889 for (AddrInfo &W : WorkStack) {

890 if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))

891 return std::make_pair(W.Inst, *D);

892 }

893 return std::make_pair(nullptr, 0);

894 };

895

896 auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {

898 for (Instruction &I : Block) {

899 auto AI = this->getAddrInfo(I);

900 if (!AI)

901 continue;

902 auto F = findBaseAndOffset(*AI);

904 if (Instruction *BI = F.first) {

905 AI->Offset = F.second;

906 GroupInst = BI;

907 } else {

908 WorkStack.push_back(*AI);

909 GroupInst = AI->Inst;

910 }

911 AddrGroups[GroupInst].push_back(*AI);

912 }

913

915 Visit(C, Visit);

916

917 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)

918 WorkStack.pop_back();

919 };

920

921 traverseBlock(HVC.DT.getRootNode(), traverseBlock);

922 assert(WorkStack.empty());

923

924

925

926

927 erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });

928

929 erase_if(AddrGroups, [&](auto &G) {

931 G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });

932 });

933

934 return !AddrGroups.empty();

935}

936

937auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {

938

939

940

941 unsigned SizeLimit = VAGroupSizeLimit;

943 return {};

944

945 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {

946 assert(!Move.Main.empty() && "Move group should have non-empty Main");

947 if (Move.Main.size() >= SizeLimit)

948 return false;

949

950 if (Move.IsHvx != isHvx(Info))

951 return false;

952

954 if (Base->getParent() != Info.Inst->getParent())

955 return false;

956

957 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))

958 return false;

959

960 auto isSafeToCopyAtBase = [&](const Instruction *I) {

961 return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&

962 HVC.isSafeToClone(*I);

963 };

964 DepList Deps = getUpwardDeps(Info.Inst, Base);

966 return false;

967

968 Move.Main.push_back(Info.Inst);

970 return true;

971 };

972

973 MoveList LoadGroups;

974

975 for (const AddrInfo &Info : Group) {

976 if (Info.Inst->mayReadFromMemory())

977 continue;

978 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))

979 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);

980 }

981

982

983 erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });

984

985

987 erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });

988

989 return LoadGroups;

990}

991

992auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {

993

994

995

996 unsigned SizeLimit = VAGroupSizeLimit;

998 return {};

999

1000 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {

1001 assert(!Move.Main.empty() && "Move group should have non-empty Main");

1002 if (Move.Main.size() >= SizeLimit)

1003 return false;

1004

1005

1006 assert(Info.Inst->getType()->isVoidTy() &&

1007 "Not handling stores with return values");

1008

1009 if (Move.IsHvx != isHvx(Info))

1010 return false;

1011

1012

1013

1015 if (Base->getParent() != Info.Inst->getParent())

1016 return false;

1017 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))

1018 return false;

1019 Move.Main.push_back(Info.Inst);

1020 return true;

1021 };

1022

1023 MoveList StoreGroups;

1024

1025 for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {

1026 const AddrInfo &Info = *I;

1027 if (Info.Inst->mayWriteToMemory())

1028 continue;

1029 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))

1030 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);

1031 }

1032

1033

1034 erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });

1035

1036

1038 erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });

1039

1040

1041

1042

1043 if (!VADoFullStores) {

1044 erase_if(StoreGroups, [this](const MoveGroup &G) {

1045 return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {

1046 auto MaybeInfo = this->getAddrInfo(*S);

1047 assert(MaybeInfo.has_value());

1048 return HVC.HST.isHVXVectorType(

1049 EVT::getEVT(MaybeInfo->ValTy, false));

1050 });

1051 });

1052 }

1053

1054 return StoreGroups;

1055}

1056

1057auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {

1058

1059 assert(!Move.Main.empty() && "Move group should have non-empty Main");

1061

1062 if (Move.IsLoad) {

1063

1064

1065 Move.Clones = cloneBefore(Where->getIterator(), Move.Deps);

1066

1068 for (Instruction *M : Main) {

1069 if (M != Where)

1070 M->moveAfter(Where);

1071 for (auto [Old, New] : Move.Clones)

1072 M->replaceUsesOfWith(Old, New);

1073 Where = M;

1074 }

1075

1076 for (int i = 0, e = Move.Deps.size(); i != e; ++i)

1077 Move.Deps[i] = Move.Clones[Move.Deps[i]];

1078 } else {

1079

1080

1081

1082 assert(Move.Deps.empty());

1083

1085 for (Instruction *M : Main.drop_front(1)) {

1087 Where = M;

1088 }

1089 }

1090

1091 return Move.Main.size() + Move.Deps.size() > 1;

1092}

1093

1094template

1096 -> InstMap {

1097 InstMap Map;

1098

1099 for (Instruction *I : Insts) {

1100 assert(HVC.isSafeToClone(*I));

1102 C->setName(Twine("c.") + I->getName() + ".");

1103 C->insertBefore(To);

1104

1105 for (auto [Old, New] : Map)

1106 C->replaceUsesOfWith(Old, New);

1107 Map.insert(std::make_pair(I, C));

1108 }

1109 return Map;

1110}

1111

1112auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,

1113 const ByteSpan &VSpan, int ScLen,

1114 Value *AlignVal, Value *AlignAddr) const

1115 -> void {

1117

1118 Type *SecTy = HVC.getByteTy(ScLen);

1119 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;

1120 bool DoAlign = !HVC.isZero(AlignVal);

1122 BasicBlock *BaseBlock = Builder.GetInsertBlock();

1123

1124 ByteSpan ASpan;

1125 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));

1127

1128

1130

1131

1132

1133

1134

1135

1136

1137

1138

1139

1140

1141

1142

1143

1144

1145

1146

1147

1148

1149

1150

1151

1152 for (int Index = 0; Index != NumSectors; ++Index)

1153 ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);

1154 for (int Index = 0; Index != NumSectors; ++Index) {

1155 ASpan.Blocks[Index].Seg.Val =

1156 reinterpret_cast<Value *>(&ASpan.Blocks[Index]);

1157 }

1158

1159

1160

1161

1162 DenseMap<void *, Instruction *> EarliestUser;

1164 if (B == nullptr)

1165 return true;

1166 if (A == nullptr)

1167 return false;

1168 assert(A->getParent() == B->getParent());

1169 return A->comesBefore(B);

1170 };

1171 auto earliestUser = [&](const auto &Uses) {

1173 for (const Use &U : Uses) {

1175 assert(I != nullptr && "Load used in a non-instruction?");

1176

1177

1178

1179 if (I->getParent() == BaseBlock) {

1181 User = std::min(User, I, isEarlier);

1182 } else {

1184 }

1185 }

1186 return User;

1187 };

1188

1189 for (const ByteSpan::Block &B : VSpan) {

1190 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);

1191 for (const ByteSpan::Block &S : ASection) {

1192 auto &EU = EarliestUser[S.Seg.Val];

1193 EU = std::min(EU, earliestUser(B.Seg.Val->uses()), isEarlier);

1194 }

1195 }

1196

1198 dbgs() << "ASpan:\n" << ASpan << '\n';

1199 dbgs() << "Earliest users of ASpan:\n";

1200 for (auto &[Val, User] : EarliestUser) {

1201 dbgs() << Val << "\n ->" << *User << '\n';

1202 }

1203 });

1204

1205 auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,

1206 int Index, bool MakePred) {

1208 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);

1210 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;

1211

1212

1213

1214 int Start = (Index - DoAlign) * ScLen;

1215 int Width = (1 + DoAlign) * ScLen;

1216 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,

1217 VSpan.section(Start, Width).values());

1218 };

1219

1221

1222 assert(In->getParent() == To->getParent());

1223 DepList Deps = getUpwardDeps(&*In, &*To);

1224 In->moveBefore(To);

1225

1226 InstMap Map = cloneBefore(In, Deps);

1227 for (auto [Old, New] : Map)

1228 In->replaceUsesOfWith(Old, New);

1229 };

1230

1231

1232 LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");

1233 for (int Index = 0; Index != NumSectors + 1; ++Index) {

1234

1235

1236

1237

1238

1239

1241 DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;

1243 Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;

1244 if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {

1247 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);

1248

1249

1250

1251

1252

1253

1255 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))

1256 moveBefore(Load->getIterator(), BasePos);

1257 }

1258 LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');

1259 }

1260 }

1261

1262

1263 LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");

1264 for (int Index = 0; Index != NumSectors; ++Index) {

1265 ASpan[Index].Seg.Val = nullptr;

1266 if (auto *Where = EarliestUser[&ASpan[Index]]) {

1269 assert(Val != nullptr);

1270 if (DoAlign) {

1272 assert(NextLoad != nullptr);

1273 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);

1274 }

1275 ASpan[Index].Seg.Val = Val;

1276 LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');

1277 }

1278 }

1279

1280 for (const ByteSpan::Block &B : VSpan) {

1281 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);

1284

1285

1286

1287

1288 std::vector<ByteSpan::Block *> ABlocks;

1289 for (ByteSpan::Block &S : ASection) {

1290 if (S.Seg.Val != nullptr)

1291 ABlocks.push_back(&S);

1292 }

1294 [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {

1297 });

1298 for (ByteSpan::Block *S : ABlocks) {

1299

1300

1303 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));

1304 Accum =

1305 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);

1306 }

1307

1308

1309

1310

1311

1312

1313 Type *ValTy = getPayload(B.Seg.Val)->getType();

1316 getPassThrough(B.Seg.Val), "sel");

1318 }

1319}

1320

1321auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,

1322 const ByteSpan &VSpan, int ScLen,

1323 Value *AlignVal, Value *AlignAddr) const

1324 -> void {

1326

1327 Type *SecTy = HVC.getByteTy(ScLen);

1328 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;

1329 bool DoAlign = !HVC.isZero(AlignVal);

1330

1331

1332 ByteSpan ASpanV, ASpanM;

1333

1334

1335

1336 auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {

1339 return Val;

1340 auto *VecTy = VectorType::get(Ty, 1, false);

1341 return Builder.CreateBitCast(Val, VecTy, "cst");

1342 };

1343

1344

1345

1346 for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {

1347

1348

1349 ByteSpan VSection =

1350 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);

1352 Value *Zero = HVC.getNullValue(SecTy);

1355 for (ByteSpan::Block &S : VSection) {

1356 Value *Pay = getPayload(S.Seg.Val);

1357 Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),

1358 Pay->getType(), HVC.getByteTy());

1359 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),

1360 S.Seg.Start, S.Seg.Size, S.Pos);

1361 AccumM = Builder.CreateOr(AccumM, PartM);

1362

1363 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),

1364 S.Seg.Start, S.Seg.Size, S.Pos);

1365

1368 }

1369 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);

1370 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);

1371 }

1372

1374 dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';

1375 dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';

1376 });

1377

1378

1379 if (DoAlign) {

1380 for (int Index = 1; Index != NumSectors + 2; ++Index) {

1381 Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;

1382 Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;

1384 ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);

1385 ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);

1386 }

1387 }

1388

1390 dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';

1391 dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';

1392 });

1393

1394 auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,

1395 const ByteSpan &ASpanM, int Index, bool MakePred) {

1398 if (HVC.isUndef(Val) || HVC.isZero(Mask))

1399 return;

1401 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);

1403 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;

1404

1405

1406

1407 int Start = (Index - DoAlign) * ScLen;

1408 int Width = (1 + DoAlign) * ScLen;

1409 this->createStore(Builder, Val, Ptr, Predicate, ScLen,

1410 HVC.vlsb(Builder, Mask),

1411 VSpan.section(Start, Width).values());

1412 };

1413

1414 for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {

1415 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);

1416 }

1417}

1418

1419auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {

1420 LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');

1421

1422

1423 if (!Move.IsHvx)

1424 return false;

1425

1426

1427

1428 auto getMaxOf = [](auto Range, auto GetValue) {

1430 return GetValue(A) < GetValue(B);

1431 });

1432 };

1433

1434 const AddrList &BaseInfos = AddrGroups.at(Move.Base);

1435

1436

1437

1438

1439

1440

1441

1442

1443

1444

1445

1446

1447

1448

1449 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());

1450 AddrList MoveInfos;

1452 BaseInfos, std::back_inserter(MoveInfos),

1453 [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });

1454

1455

1456 const AddrInfo &WithMaxAlign =

1457 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });

1458 Align MaxGiven = WithMaxAlign.HaveAlign;

1459

1460

1461 const AddrInfo &WithMinOffset =

1462 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });

1463

1464 const AddrInfo &WithMaxNeeded =

1465 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });

1466 Align MinNeeded = WithMaxNeeded.NeedAlign;

1467

1468

1469

1470

1471 Instruction *InsertAt = Move.Main.front();

1472 if (!Move.IsLoad) {

1473

1475 InsertAt = &*std::next(InsertAt->getIterator());

1476 }

1477

1479 InstSimplifyFolder(HVC.DL));

1480 Value *AlignAddr = nullptr;

1481 Value *AlignVal = nullptr;

1482

1483 if (MinNeeded <= MaxGiven) {

1484 int Start = WithMinOffset.Offset;

1485 int OffAtMax = WithMaxAlign.Offset;

1486

1487

1488

1489

1490

1491

1492 int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());

1493 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,

1494 WithMaxAlign.ValTy, Adjust, Move.Clones);

1495 int Diff = Start - (OffAtMax + Adjust);

1496 AlignVal = HVC.getConstInt(Diff);

1498 assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());

1499 } else {

1500

1501

1502

1503

1504

1505

1506

1507 AlignAddr =

1508 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,

1509 MinNeeded.value(), Move.Clones);

1510 AlignVal =

1511 Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");

1513 for (auto [Old, New] : Move.Clones)

1514 I->replaceUsesOfWith(Old, New);

1515 }

1516 }

1517

1518 ByteSpan VSpan;

1519 for (const AddrInfo &AI : MoveInfos) {

1520 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),

1521 AI.Offset - WithMinOffset.Offset);

1522 }

1523

1524

1525

1526

1528 : std::max(MinNeeded.value(), 4);

1529 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);

1530 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);

1531

1533 dbgs() << "ScLen: " << ScLen << "\n";

1534 dbgs() << "AlignVal:" << *AlignVal << "\n";

1535 dbgs() << "AlignAddr:" << *AlignAddr << "\n";

1536 dbgs() << "VSpan:\n" << VSpan << '\n';

1537 });

1538

1539 if (Move.IsLoad)

1540 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);

1541 else

1542 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);

1543

1544 for (auto *Inst : Move.Main)

1545 Inst->eraseFromParent();

1546

1547 return true;

1548}

1549

1550auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,

1551 int Alignment) const -> Value * {

1552 auto *AlignTy = AlignVal->getType();

1554 AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");

1555 Value *Zero = ConstantInt::get(AlignTy, 0);

1557}

1558

1559auto AlignVectors::isSectorTy(Type *Ty) const -> bool {

1560 if (!HVC.isByteVecTy(Ty))

1561 return false;

1562 int Size = HVC.getSizeOf(Ty);

1565 return Size == 4 || Size == 8;

1566}

1567

1568auto AlignVectors::run() -> bool {

1570 << '\n');

1571 if (!createAddressGroups())

1572 return false;

1573

1575 dbgs() << "Address groups(" << AddrGroups.size() << "):\n";

1576 for (auto &[In, AL] : AddrGroups) {

1577 for (const AddrInfo &AI : AL)

1578 dbgs() << "---\n" << AI << '\n';

1579 }

1580 });

1581

1583 MoveList LoadGroups, StoreGroups;

1584

1585 for (auto &G : AddrGroups) {

1588 }

1589

1591 dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";

1592 for (const MoveGroup &G : LoadGroups)

1593 dbgs() << G << "\n";

1594 dbgs() << "Store groups(" << StoreGroups.size() << "):\n";

1595 for (const MoveGroup &G : StoreGroups)

1596 dbgs() << G << "\n";

1597 });

1598

1599

1600 unsigned CountLimit = VAGroupCountLimit;

1601 if (CountLimit == 0)

1602 return false;

1603

1604 if (LoadGroups.size() > CountLimit) {

1605 LoadGroups.resize(CountLimit);

1606 StoreGroups.clear();

1607 } else {

1608 unsigned StoreLimit = CountLimit - LoadGroups.size();

1609 if (StoreGroups.size() > StoreLimit)

1610 StoreGroups.resize(StoreLimit);

1611 }

1612

1613 for (auto &M : LoadGroups)

1614 Changed |= moveTogether(M);

1615 for (auto &M : StoreGroups)

1616 Changed |= moveTogether(M);

1617

1618 LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);

1619

1620 for (auto &M : LoadGroups)

1621 Changed |= realignGroup(M);

1622 for (auto &M : StoreGroups)

1623 Changed |= realignGroup(M);

1624

1626}

1627

1628

1629

1630

1631

1632auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const

1633 -> std::pair<unsigned, Signedness> {

1634 unsigned Bits = HVC.getNumSignificantBits(V, In);

1635

1636

1637

1638

1639

1640 KnownBits Known = HVC.getKnownBits(V, In);

1641 Signedness Sign = Signed;

1642 unsigned NumToTest = 0;

1644 NumToTest = Bits;

1646 NumToTest = Bits - 1;

1647

1650 Bits = NumToTest;

1651 }

1652

1653

1654

1657 Sign = Positive;

1658 }

1659 return {Bits, Sign};

1660}

1661

1662auto HvxIdioms::canonSgn(SValue X, SValue Y) const

1663 -> std::pair<SValue, SValue> {

1664

1665

1666

1667

1670 return {X, Y};

1671}

1672

1673

1674

1675

1676auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional {

1677 using namespace PatternMatch;

1678 auto *Ty = In.getType();

1679

1681 return std::nullopt;

1682

1684

1685 FxpOp Op;

1687

1688

1689

1690 auto m_Shr = [](auto &&V, auto &&S) {

1692 };

1693

1694 uint64_t Qn = 0;

1696 Op.Frac = Qn;

1698 } else {

1699 Op.Frac = 0;

1700 }

1701

1702 if (Op.Frac > Width)

1703 return std::nullopt;

1704

1705

1706 uint64_t CV;

1710 return std::nullopt;

1711 if (CV != 0)

1714 }

1715

1716

1718 Op.Opcode = Instruction::Mul;

1719

1720 Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;

1721 Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;

1723 return Op;

1724 }

1725

1726 return std::nullopt;

1727}

1728

1729auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const

1731 assert(Op.X.Val->getType() == Op.Y.Val->getType());

1732

1734 if (VecTy == nullptr)

1735 return nullptr;

1737 unsigned ElemWidth = ElemTy->getBitWidth();

1738

1739

1740 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)

1741 return nullptr;

1742

1743

1744

1745

1746 if (ElemWidth <= 8)

1747 return nullptr;

1748

1749

1750 if (ElemWidth <= 32 && Op.Frac == 0)

1751 return nullptr;

1752

1753 auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);

1754 auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);

1755

1756

1757

1759 IRBuilder Builder(In.getParent(), In.getIterator(),

1760 InstSimplifyFolder(HVC.DL));

1761

1762 auto roundUpWidth = [](unsigned Width) -> unsigned {

1764

1765

1767 }

1768 if (Width > 32 && Width % 32 != 0) {

1769

1770 return alignTo(Width, 32u);

1771 }

1772 return Width;

1773 };

1774

1775 BitsX = roundUpWidth(BitsX);

1776 BitsY = roundUpWidth(BitsY);

1777

1778

1779

1780

1781 unsigned Width = std::max(BitsX, BitsY);

1782

1783 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);

1784 if (Width < ElemWidth) {

1787 } else if (Width > ElemWidth) {

1789 : Builder.CreateZExt(X, ResizeTy, "zxt");

1791 : Builder.CreateZExt(Y, ResizeTy, "zxt");

1792 };

1793

1794 assert(X->getType() == Y->getType() && X->getType() == ResizeTy);

1795

1796 unsigned VecLen = HVC.length(ResizeTy);

1797 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);

1798

1800 FxpOp ChopOp = Op;

1801 ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);

1802

1803 for (unsigned V = 0; V != VecLen / ChopLen; ++V) {

1804 ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);

1805 ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);

1806 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));

1808 break;

1809 }

1810

1812 return nullptr;

1813

1816 ? Builder.CreateSExt(Cat, VecTy, "sxt")

1817 : Builder.CreateZExt(Cat, VecTy, "zxt");

1818 return Ext;

1819}

1820

1821inline bool HvxIdioms::matchScatter(Instruction &In) const {

1823 if (II)

1824 return false;

1825 return (II->getIntrinsicID() == Intrinsic::masked_scatter);

1826}

1827

1828inline bool HvxIdioms::matchGather(Instruction &In) const {

1830 if (II)

1831 return false;

1832 return (II->getIntrinsicID() == Intrinsic::masked_gather);

1833}

1834

1836

1837

1839 switch (Opc) {

1840 case Instruction::Add:

1841 case Instruction::Sub:

1842 case Instruction::Mul:

1843 case Instruction::And:

1844 case Instruction::Or:

1845 case Instruction::Xor:

1846 case Instruction::AShr:

1847 case Instruction::LShr:

1848 case Instruction::Shl:

1849 case Instruction::UDiv:

1850 return true;

1851 }

1852 return false;

1853}

1854

1855

1857 assert(Ptr && "Unable to extract pointer");

1859 return Ptr;

1863 if (II->getIntrinsicID() == Intrinsic::masked_store)

1864 return II->getOperand(1);

1865 }

1866 return nullptr;

1867}

1868

1870 HvxIdioms::DstQualifier &Qual) {

1872 if (!In)

1873 return Destination;

1875 Destination = In;

1876 Qual = HvxIdioms::LdSt;

1878 if (II->getIntrinsicID() == Intrinsic::masked_gather) {

1879 Destination = In;

1880 Qual = HvxIdioms::LLVM_Gather;

1881 } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {

1882 Destination = In;

1883 Qual = HvxIdioms::LLVM_Scatter;

1884 } else if (II->getIntrinsicID() == Intrinsic::masked_store) {

1885 Destination = In;

1886 Qual = HvxIdioms::LdSt;

1887 } else if (II->getIntrinsicID() ==

1888 Intrinsic::hexagon_V6_vgather_vscattermh) {

1889 Destination = In;

1890 Qual = HvxIdioms::HEX_Gather_Scatter;

1891 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {

1892 Destination = In;

1893 Qual = HvxIdioms::HEX_Scatter;

1894 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {

1895 Destination = In;

1896 Qual = HvxIdioms::HEX_Gather;

1897 }

1903 Destination = In;

1904 Qual = HvxIdioms::Call;

1908 Destination = In;

1909 Qual = HvxIdioms::Arithmetic;

1910 } else {

1911 LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");

1912 }

1913 return Destination;

1914}

1915

1916

1917

1918

1919

1920

1921

1924 if (!In)

1925 return Destination;

1926

1928

1929 for (auto &U : In->uses()) {

1932 if (Destination)

1933 Users.push_back(Destination);

1934 }

1935 }

1936

1937 for (auto *I : Users)

1939 return I;

1940 return Destination;

1941}

1942

1943

1945 assert(In && "Bad instruction");

1948 IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&

1949 "Not a gather Intrinsic");

1951 if (IIn->getIntrinsicID() == Intrinsic::masked_gather)

1953 else

1955 return GEPIndex;

1956}

1957

1958

1959

1960

1963 if (!GEPIndex) {

1965 return nullptr;

1966 }

1969 if (IndexLoad)

1970 return IndexLoad;

1971

1973 if (IndexZEx) {

1975 if (IndexLoad)

1976 return IndexLoad;

1978 if (II && II->getIntrinsicID() == Intrinsic::masked_gather)

1980 }

1982 if (BaseShuffle) {

1984 if (IndexLoad)

1985 return IndexLoad;

1987 if (IE) {

1988 auto *Src = IE->getOperand(1);

1990 if (IndexLoad)

1991 return IndexLoad;

1993 if (Alloca)

1994 return Alloca;

1996 return Src;

1997 }

1999 return Src;

2000 }

2001 }

2002 }

2003 LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");

2004 return nullptr;

2005}

2006

2008 if (!In)

2009 return nullptr;

2010

2013

2015 if (II->getIntrinsicID() == Intrinsic::masked_load)

2016 return II->getType();

2017 if (II->getIntrinsicID() == Intrinsic::masked_store)

2018 return II->getOperand(0)->getType();

2019 }

2020 return In->getType();

2021}

2022

2024 if (!In)

2025 return nullptr;

2027 return In;

2029 if (II->getIntrinsicID() == Intrinsic::masked_load)

2030 return In;

2031 if (II->getIntrinsicID() == Intrinsic::masked_gather)

2032 return In;

2033 }

2043 return cstDataVector;

2045 return GEPIndex->getOperand(0);

2046 return nullptr;

2047}

2048

2049

2050

2053 if (!GEPIndex) {

2055 return nullptr;

2056 }

2059 return IndexLoad;

2060

2061 LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");

2062 return nullptr;

2063}

2064

2065

2066

2067

2071 assert(I && "Unable to reinterprete cast");

2072 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2073 std::vector shuffleMask;

2074 for (unsigned i = 0; i < 64; ++i)

2075 shuffleMask.push_back(i);

2077 Value *CastShuffle =

2078 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");

2079 return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");

2080}

2081

2082

2086 assert(I && "Unable to reinterprete cast");

2087 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2088 std::vector shuffleMask;

2089 for (unsigned i = 0; i < 128; ++i)

2090 shuffleMask.push_back(i);

2092 Value *CastShuffle =

2093 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");

2094 return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");

2095}

2096

2097

2100 unsigned int pattern) {

2101 std::vector byteMask;

2102 for (unsigned i = 0; i < 32; ++i)

2103 byteMask.push_back(pattern);

2104

2105 return Builder.CreateIntrinsic(

2106 HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),

2107 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},

2108 nullptr);

2109}

2110

2111Value *HvxIdioms::processVScatter(Instruction &In) const {

2113 assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");

2114 unsigned InpSize = HVC.getSizeOf(InpTy);

2115 auto *F = In.getFunction();

2116 LLVMContext &Ctx = F->getContext();

2118 assert(ElemTy && "llvm.scatter needs integer type argument");

2121 unsigned Elements = HVC.length(InpTy);

2122 dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";

2123 dbgs() << " Input type(" << *InpTy << ") elements(" << Elements

2124 << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("

2125 << ElemWidth << ")\n";

2126 });

2127

2128 IRBuilder Builder(In.getParent(), In.getIterator(),

2129 InstSimplifyFolder(HVC.DL));

2130

2131 auto *ValueToScatter = In.getOperand(0);

2132 LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");

2133

2135 LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize

2136 << ") for vscatter\n");

2137 return nullptr;

2138 }

2139

2140

2142 if (!IndexLoad)

2143 return nullptr;

2144 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");

2145

2146

2148 if (!Ptr)

2149 return nullptr;

2151

2153 if (!Indexes)

2154 return nullptr;

2155 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");

2157 "cst_ptr_to_i32");

2158 LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");

2159

2161 Value *CastIndex = nullptr;

2162 if (cstDataVector) {

2163

2164 AllocaInst *IndexesAlloca =

2165 Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));

2166 [[maybe_unused]] auto *StoreIndexes =

2167 Builder.CreateStore(cstDataVector, IndexesAlloca);

2168 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");

2170 IndexesAlloca, "reload_index");

2171 } else {

2172 if (ElemWidth == 2)

2174 else

2175 CastIndex = Indexes;

2176 }

2177 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");

2178

2179 if (ElemWidth == 1) {

2180

2181

2182 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2183

2184

2185 Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");

2186 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);

2188 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);

2189 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");

2190

2191 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);

2192 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);

2193 [[maybe_unused]] Value *IndexHi =

2194 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);

2195 [[maybe_unused]] Value *IndexLo =

2196 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);

2197 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");

2198 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");

2199

2200 Value *CastSrc =

2202 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");

2204 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);

2205 LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter

2206 << ")\n");

2207

2208 [[maybe_unused]] Value *UVSHi =

2209 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);

2210 [[maybe_unused]] Value *UVSLo =

2211 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);

2212 LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");

2213 LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");

2214

2215

2216 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);

2217 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");

2219 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,

2221 IndexHi, UVSHi},

2222 nullptr);

2223 LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");

2225 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,

2227 IndexLo, UVSLo},

2228 nullptr);

2229 } else if (ElemWidth == 2) {

2230 Value *CastSrc =

2232 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");

2234 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,

2236 CastSrc},

2237 nullptr);

2238 } else if (ElemWidth == 4) {

2240 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,

2242 ValueToScatter},

2243 nullptr);

2244 } else {

2245 LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");

2246 return nullptr;

2247 }

2248}

2249

2250Value *HvxIdioms::processVGather(Instruction &In) const {

2251 [[maybe_unused]] auto *InpTy =

2253 assert(InpTy && "Cannot handle no vector type for llvm.gather");

2254 [[maybe_unused]] auto *ElemTy =

2256 assert(ElemTy && "llvm.gather needs vector of ptr argument");

2257 auto *F = In.getFunction();

2258 LLVMContext &Ctx = F->getContext();

2259 LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"

2260 << *In.getParent() << "\n");

2261 LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("

2262 << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)

2263 << ") type(" << *ElemTy << ") Access alignment("

2264 << *In.getOperand(1) << ") AddressSpace("

2265 << ElemTy->getAddressSpace() << ")\n");

2266

2267

2269 "llvm.gather needs vector for mask");

2270 IRBuilder Builder(In.getParent(), In.getIterator(),

2271 InstSimplifyFolder(HVC.DL));

2272

2273

2274

2275

2276 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;

2278 if (!Dst) {

2279 LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");

2280 return nullptr;

2281 }

2282 LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual

2283 << ")\n");

2284

2285

2287 if (!Ptr) {

2288 LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");

2289 return nullptr;

2290 }

2291

2292

2294 assert(DstType && "Cannot handle non vector dst type for llvm.gather");

2295

2296

2298 if (!IndexLoad)

2299 return nullptr;

2300 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");

2301

2302

2304 if (!Indexes)

2305 return nullptr;

2306 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");

2307

2309 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);

2310 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {

2311

2312

2313

2314 unsigned OutputSize = HVC.getSizeOf(DstType);

2318 << " Address space ("

2320 << " Result type : " << *DstType

2321 << "\n Size in bytes : " << OutputSize

2322 << " element type(" << *DstElemTy

2323 << ")\n ElemWidth : " << ElemWidth << " bytes\n");

2324

2326 assert(IndexType && "Cannot handle non vector index type for llvm.gather");

2327 unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());

2328 LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");

2329

2330

2332 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2333

2334

2335

2336

2337

2338

2339

2341 if (ElemWidth == 1) {

2342

2343

2344

2345

2346 Value *CastIndexes =

2347 Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");

2348 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);

2349 auto *UnpackedIndexes =

2350 Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),

2351 V6_vunpack, CastIndexes, nullptr);

2352 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes

2353 << ")\n");

2354

2355 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);

2356 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);

2357 [[maybe_unused]] Value *IndexHi =

2358 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);

2359 [[maybe_unused]] Value *IndexLo =

2360 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);

2361 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");

2362 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");

2363

2364 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);

2365 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");

2366

2367

2368 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);

2369 [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(

2370 Type::getVoidTy(Ctx), V6_vgather,

2371 {Ptr, QByteMask, CastedPtr,

2373 nullptr);

2374 LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");

2375

2376 [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(

2377 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");

2378 LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");

2379

2380

2381

2383 Type::getVoidTy(Ctx), V6_vgather,

2384 {Ptr, QByteMask, CastedPtr,

2386 nullptr);

2387 LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");

2389 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");

2390 LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");

2391

2392

2393

2394

2395 auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);

2397 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);

2398 LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");

2399 [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);

2400 LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");

2401 } else if (ElemWidth == 2) {

2402

2403 if (IndexWidth == 2) {

2404

2405 Value *CastIndex =

2407 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");

2408

2409

2410 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);

2411 Value *AdjustedIndex = HVC.createHvxIntrinsic(

2412 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});

2414 << " Shifted half index: " << *AdjustedIndex << ")\n");

2415

2416 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);

2417

2418

2420 Type::getVoidTy(Ctx), V6_vgather,

2422 AdjustedIndex},

2423 nullptr);

2424 for (auto &U : Dst->uses()) {

2426 dbgs() << " dst used by: " << *UI << "\n";

2427 }

2428 for (auto &U : In.uses()) {

2430 dbgs() << " In used by : " << *UI << "\n";

2431 }

2432

2433

2435 HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");

2436 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2437 In.replaceAllUsesWith(LoadedResult);

2438 } else {

2439 dbgs() << " Unhandled index type for vgather\n";

2440 return nullptr;

2441 }

2442 } else if (ElemWidth == 4) {

2443 if (IndexWidth == 4) {

2444

2445 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);

2446 Value *AdjustedIndex = HVC.createHvxIntrinsic(

2447 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});

2449 << " Shifted word index: " << *AdjustedIndex << ")\n");

2451 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,

2453 AdjustedIndex},

2454 nullptr);

2455 } else {

2456 LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");

2457 return nullptr;

2458 }

2459 } else {

2460 LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");

2461 return nullptr;

2462 }

2463 } else if (HVC.HST.getVectorLength() == OutputSize * 2) {

2464

2465 LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");

2466 return nullptr;

2467 } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {

2468 LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");

2469 return nullptr;

2470 }

2471

2472

2473

2474 Dst->eraseFromParent();

2475 } else if (Qual == HvxIdioms::LLVM_Scatter) {

2476

2477 auto *DstInpTy = cast(Dst->getOperand(1)->getType());

2478 assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");

2479 [[maybe_unused]] unsigned DstInpSize = HVC.getSizeOf(DstInpTy);

2480 [[maybe_unused]] unsigned DstElements = HVC.length(DstInpTy);

2481 [[maybe_unused]] auto *DstElemTy =

2483 assert(DstElemTy && "llvm.scatter needs vector of ptr argument");

2484 LLVM_DEBUG(dbgs() << " Gather feeds into scatter\n Values to scatter : "

2485 << *Dst->getOperand(0) << "\n");

2486 LLVM_DEBUG(dbgs() << " Dst type(" << *DstInpTy << ") elements("

2487 << DstElements << ") VecLen(" << DstInpSize << ") type("

2488 << *DstElemTy << ") Access alignment("

2489 << *Dst->getOperand(2) << ")\n");

2490

2492 if (!Src)

2493 return nullptr;

2495

2497 LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");

2498 return nullptr;

2499 }

2500

2502 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2503 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");

2504

2506 if (!DstLoad) {

2507 LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");

2508 return nullptr;

2509 }

2510 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");

2511

2513 if (!Ptr)

2514 return nullptr;

2516 Value *CastIndex =

2518 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");

2519

2520

2521 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);

2522 Value *AdjustedIndex = HVC.createHvxIntrinsic(

2523 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});

2524 LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");

2525

2527 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2529 AdjustedIndex},

2530 nullptr);

2531 } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {

2532

2533

2534

2535

2536

2539 if (cstDataVector) {

2540

2541

2542

2543 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);

2544 [[maybe_unused]] auto *StoreIndexes =

2545 Builder.CreateStore(cstDataVector, IndexesAlloca);

2546 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");

2548 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");

2549 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);

2550 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");

2551

2553 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2554 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");

2555

2557 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2558 {ResultAlloca, CastedSrc,

2560 nullptr);

2562 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");

2563 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2564 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");

2565 In.replaceAllUsesWith(LoadedResult);

2566 }

2567 } else {

2568

2570 if (!Src)

2571 return nullptr;

2573

2575 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2576 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");

2577

2579 if (!DstLoad)

2580 return nullptr;

2581 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");

2583 if (!Ptr)

2584 return nullptr;

2586

2588 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,

2590 Indexes},

2591 nullptr);

2592 }

2593 return Gather;

2594 } else if (Qual == HvxIdioms::HEX_Scatter) {

2595

2596

2597

2598

2599 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);

2601 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2602 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");

2603 Value *CastIndex =

2605 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");

2606

2608 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2610 CastIndex},

2611 nullptr);

2613 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");

2614 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2615 In.replaceAllUsesWith(LoadedResult);

2616 } else if (Qual == HvxIdioms::HEX_Gather) {

2617

2618

2621 if (cstDataVector) {

2622

2623 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);

2624

2625 [[maybe_unused]] auto *StoreIndexes =

2626 Builder.CreateStore(cstDataVector, IndexesAlloca);

2627 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");

2629 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");

2630 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);

2631 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca

2632 << "\n AddressSpace: "

2634

2636 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");

2637 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");

2638

2640 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,

2641 {ResultAlloca, CastedSrc,

2643 nullptr);

2645 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");

2646 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");

2647 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");

2648 In.replaceAllUsesWith(LoadedResult);

2649 }

2650 }

2651 } else if (Qual == HvxIdioms::LLVM_Gather) {

2652

2653 errs() << " Underimplemented vgather to vgather sequence\n";

2654 return nullptr;

2655 } else

2657

2658 return Gather;

2659}

2660

2661auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,

2662 const FxpOp &Op) const -> Value * {

2663 assert(Op.X.Val->getType() == Op.Y.Val->getType());

2665 unsigned Width = InpTy->getScalarSizeInBits();

2666 bool Rounding = Op.RoundAt.has_value();

2667

2668 if (Op.RoundAt || *Op.RoundAt == Op.Frac - 1) {

2669

2671 Value *QMul = nullptr;

2672 if (Width == 16) {

2673 QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);

2674 } else if (Width == 32) {

2675 QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);

2676 }

2677 if (QMul != nullptr)

2678 return QMul;

2679 }

2680 }

2681

2682 assert(Width >= 32 || isPowerOf2_32(Width));

2683 assert(Width < 32 || Width % 32 == 0);

2684

2685

2686 if (Width < 32) {

2687 if (Width < 16)

2688 return nullptr;

2689

2690

2691

2692 assert(Width == 16);

2693 assert(Op.Frac != 0 && "Unshifted mul should have been skipped");

2694 if (Op.Frac == 16) {

2695

2696 if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))

2697 return MulH;

2698 }

2699

2700 Value *Prod32 = createMul16(Builder, Op.X, Op.Y);

2701 if (Rounding) {

2702 Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);

2703 Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");

2704 }

2705

2706 Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);

2708 ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")

2709 : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");

2710 return Builder.CreateTrunc(Shifted, InpTy, "trn");

2711 }

2712

2713

2714

2715

2716

2717 auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, 32);

2718 auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, 32);

2719 auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);

2720

2721 auto *HvxWordTy = cast(WordP.front()->getType());

2722

2723

2724 if (Op.RoundAt.has_value()) {

2727 RoundV[*Op.RoundAt / 32] =

2728 HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32));

2729 WordP = createAddLong(Builder, WordP, RoundV);

2730 }

2731

2732

2733

2734

2735 unsigned SkipWords = Op.Frac / 32;

2736 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);

2737

2738 for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {

2739 int Src = Dst + SkipWords;

2741 if (Src + 1 < End) {

2742 Value *Hi = WordP[Src + 1];

2743 WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,

2744 {Hi, Lo, ShiftAmt},

2745 nullptr, "int");

2746 } else {

2747

2748 WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");

2749 }

2750 }

2751 if (SkipWords != 0)

2752 WordP.resize(WordP.size() - SkipWords);

2753

2754 return HVC.joinVectorElements(Builder, WordP, Op.ResTy);

2755}

2756

2757auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,

2758 bool Rounding) const -> Value * {

2759 assert(X.Val->getType() == Y.Val->getType());

2760 assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));

2762

2763

2765 return nullptr;

2766

2767 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);

2768 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),

2769 {X.Val, Y.Val});

2770}

2771

2772auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,

2773 bool Rounding) const -> Value * {

2774 Type *InpTy = X.Val->getType();

2775 assert(InpTy == Y.Val->getType());

2778

2780 return nullptr;

2781

2782 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);

2783 auto V6_vmpyo_acc = Rounding

2784 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)

2787 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});

2788 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,

2789 {V1, X.Val, Y.Val});

2790}

2791

2792auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,

2793 Value *CarryIn) const

2794 -> std::pair<Value *, Value *> {

2795 assert(X->getType() == Y->getType());

2797 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {

2800 if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {

2801 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);

2802 } else {

2803 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);

2804 if (CarryIn == nullptr)

2805 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));

2806 Args.push_back(CarryIn);

2807 }

2808 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,

2809 nullptr, Args);

2812 return {Result, CarryOut};

2813 }

2814

2815

2816

2817

2818 Value *Result1 = X;

2819 if (CarryIn != nullptr) {

2820 unsigned Width = VecTy->getScalarSizeInBits();

2821 uint32_t Mask = 1;

2822 if (Width < 32) {

2823 for (unsigned i = 0, e = 32 / Width; i != e; ++i)

2824 Mask = (Mask << Width) | 1;

2825 }

2826 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);

2827 Value *ValueIn =

2828 HVC.createHvxIntrinsic(Builder, V6_vandqrt, nullptr,

2829 {CarryIn, HVC.getConstInt(Mask)});

2830 Result1 = Builder.CreateAdd(X, ValueIn, "add");

2831 }

2832

2836 return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};

2837}

2838

2839auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const

2842 std::tie(X, Y) = canonSgn(X, Y);

2843

2845 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);

2846 } else if (Y.Sgn == Signed) {

2847

2848 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);

2849 } else {

2850 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);

2851 }

2852

2853

2855 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});

2856

2857 return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));

2858}

2859

2860auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const

2862 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), false);

2863

2866 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);

2867 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,

2868 {X.Val, Y.Val});

2869 }

2870 }

2871

2872 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), true);

2874 Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");

2875 unsigned Len = HVC.length(HvxP16Ty) / 2;

2876

2877 SmallVector<int, 128> PickOdd(Len);

2878 for (int i = 0; i != static_cast<int>(Len); ++i)

2879 PickOdd[i] = 2 * i + 1;

2880

2882 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");

2883}

2884

2885auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const

2886 -> std::pair<Value *, Value *> {

2887 assert(X.Val->getType() == Y.Val->getType());

2888 assert(X.Val->getType() == HvxI32Ty);

2889

2891 std::tie(X, Y) = canonSgn(X, Y);

2892

2894 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;

2895 } else if (Y.Sgn == Signed) {

2896 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;

2897 } else {

2898 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;

2899 }

2900

2901 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,

2902 {X.Val, Y.Val}, {HvxI32Ty});

2905 return {Lo, Hi};

2906}

2907

2908auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,

2911 assert(WordX.size() == WordY.size());

2912 unsigned Idx = 0, Length = WordX.size();

2914

2915 while (Idx != Length) {

2916 if (HVC.isZero(WordX[Idx]))

2917 Sum[Idx] = WordY[Idx];

2918 else if (HVC.isZero(WordY[Idx]))

2919 Sum[Idx] = WordX[Idx];

2920 else

2921 break;

2922 ++Idx;

2923 }

2924

2925 Value *Carry = nullptr;

2926 for (; Idx != Length; ++Idx) {

2927 std::tie(Sum[Idx], Carry) =

2928 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);

2929 }

2930

2931

2932 return Sum;

2933}

2934

2935auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,

2939

2940

2941

2942 for (int i = 0, e = WordX.size(); i != e; ++i) {

2943 for (int j = 0, f = WordY.size(); j != f; ++j) {

2944

2945 Signedness SX = (i + 1 == e) ? SgnX : Unsigned;

2946 Signedness SY = (j + 1 == f) ? SgnY : Unsigned;

2947 auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});

2948 Products[i + j + 0].push_back(Lo);

2949 Products[i + j + 1].push_back(Hi);

2950 }

2951 }

2952

2954

2955 auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {

2957 return Zero;

2960 return Last;

2961 };

2962

2963 for (int i = 0, e = Products.size(); i != e; ++i) {

2964 while (Products[i].size() > 1) {

2965 Value *Carry = nullptr;

2966 for (int j = i; j != e; ++j) {

2967 auto &ProdJ = Products[j];

2968 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),

2969 pop_back_or_zero(ProdJ), Carry);

2970 ProdJ.insert(ProdJ.begin(), Sum);

2971 Carry = CarryOut;

2972 }

2973 }

2974 }

2975

2977 for (auto &P : Products) {

2978 assert(P.size() == 1 && "Should have been added together");

2980 }

2981

2982 return WordP;

2983}

2984

2985auto HvxIdioms::run() -> bool {

2987

2988 for (BasicBlock &B : HVC.F) {

2989 for (auto It = B.rbegin(); It != B.rend(); ++It) {

2990 if (auto Fxm = matchFxpMul(*It)) {

2991 Value *New = processFxpMul(*It, *Fxm);

2992

2994 if (!New)

2995 continue;

2997 It->replaceAllUsesWith(New);

2999 It = StartOver ? B.rbegin()

3002 } else if (matchGather(*It)) {

3003 Value *New = processVGather(*It);

3004 if (!New)

3005 continue;

3007

3008 It->eraseFromParent();

3012 } else if (matchScatter(*It)) {

3013 Value *New = processVScatter(*It);

3014 if (!New)

3015 continue;

3016 LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");

3017

3018 It->eraseFromParent();

3022 }

3023 }

3024 }

3025

3027}

3028

3029

3030

3031auto HexagonVectorCombine::run() -> bool {

3032 if (DumpModule)

3033 dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();

3034

3036 if (HST.useHVXOps()) {

3037 if (VAEnabled)

3038 Changed |= AlignVectors(*this).run();

3039 if (VIEnabled)

3040 Changed |= HvxIdioms(*this).run();

3041 }

3042

3043 if (DumpModule) {

3044 dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")

3045 << " after HexagonVectorCombine\n"

3046 << *F.getParent();

3047 }

3049}

3050

3051auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {

3053}

3054

3055auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {

3056 assert(ElemCount >= 0);

3057 IntegerType *ByteTy = Type::getInt8Ty(F.getContext());

3058 if (ElemCount == 0)

3059 return ByteTy;

3060 return VectorType::get(ByteTy, ElemCount, false);

3061}

3062

3063auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {

3064 assert(ElemCount >= 0);

3065 IntegerType *BoolTy = Type::getInt1Ty(F.getContext());

3066 if (ElemCount == 0)

3067 return BoolTy;

3068 return VectorType::get(BoolTy, ElemCount, false);

3069}

3070

3071auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const

3072 -> ConstantInt * {

3074}

3075

3076auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {

3078 return C->isZeroValue();

3079 return false;

3080}

3081

3082auto HexagonVectorCombine::getIntValue(const Value *Val) const

3083 -> std::optional {

3085 return CI->getValue();

3086 return std::nullopt;

3087}

3088

3089auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {

3091}

3092

3093auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {

3095}

3096

3097auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {

3099}

3100

3101auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const

3104 assert(ETy.isSimple() && "Invalid HVX element type");

3105

3106 assert(HST.isHVXElementType(ETy.getSimpleVT(), false) &&

3107 "Invalid HVX element type");

3108 unsigned HwLen = HST.getVectorLength();

3109 unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();

3110 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,

3111 false);

3112}

3113

3114auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const

3115 -> int {

3116 return getSizeOf(Val->getType(), Kind);

3117}

3118

3119auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const

3120 -> int {

3121 auto *NcTy = const_cast<Type *>(Ty);

3122 switch (Kind) {

3124 return DL.getTypeStoreSize(NcTy).getFixedValue();

3126 return DL.getTypeAllocSize(NcTy).getFixedValue();

3127 }

3129}

3130

3131auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {

3132

3133

3134 if (HST.isTypeForHVX(Ty))

3135 return HST.getVectorLength();

3136 return DL.getABITypeAlign(Ty).value();

3137}

3138

3139auto HexagonVectorCombine::length(Value *Val) const -> size_t {

3140 return length(Val->getType());

3141}

3142

3143auto HexagonVectorCombine::length(Type *Ty) const -> size_t {

3145 assert(VecTy && "Must be a vector type");

3146 return VecTy->getElementCount().getFixedValue();

3147}

3148

3149auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {

3154 return Zero;

3155}

3156

3157auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {

3159 auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);

3162 return Minus1;

3163}

3164

3165auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const

3169 Type *ElemTy = VecTy->getElementType();

3170

3172 ConstantInt::get(ElemTy, Val));

3174}

3175

3176auto HexagonVectorCombine::simplify(Value *V) const -> Value * {

3178 SimplifyQuery Q(DL, &TLI, &DT, &AC, In);

3180 }

3181 return nullptr;

3182}

3183

3184

3185auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,

3187 int Where) const -> Value * {

3188 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));

3189 int SrcLen = getSizeOf(Src);

3190 int DstLen = getSizeOf(Dst);

3191 assert(0 <= Start && Start + Length <= SrcLen);

3192 assert(0 <= Where && Where + Length <= DstLen);

3193

3196 Value *P2Src = vresize(Builder, Src, P2Len, Poison);

3197 Value *P2Dst = vresize(Builder, Dst, P2Len, Poison);

3198

3200 for (int i = 0; i != P2Len; ++i) {

3201

3202

3203 SMask[i] =

3204 (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;

3205 }

3206

3208 return vresize(Builder, P2Insert, DstLen, Poison);

3209}

3210

3211auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,

3213 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");

3215 return Hi;

3216 int VecLen = getSizeOf(Hi);

3217 if (auto IntAmt = getIntValue(Amt))

3218 return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),

3219 VecLen);

3220

3221 if (HST.isTypeForHVX(Hi->getType())) {

3222 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&

3223 "Expecting an exact HVX type");

3224 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),

3225 Hi->getType(), {Hi, Lo, Amt});

3226 }

3227

3228 if (VecLen == 4) {

3233 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");

3234 return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");

3235 }

3236 if (VecLen == 8) {

3238 return vralignb(Builder, Lo, Hi, Sub);

3239 }

3241}

3242

3243auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,

3245 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");

3247 return Lo;

3248 int VecLen = getSizeOf(Lo);

3249 if (auto IntAmt = getIntValue(Amt))

3250 return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);

3251

3252 if (HST.isTypeForHVX(Lo->getType())) {

3253 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&

3254 "Expecting an exact HVX type");

3255 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),

3256 Lo->getType(), {Hi, Lo, Amt});

3257 }

3258

3259 if (VecLen == 4) {

3263 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");

3264 return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");

3265 }

3266 if (VecLen == 8) {

3267 Type *Int64Ty = Type::getInt64Ty(F.getContext());

3271 {Hi64, Lo64, Amt},

3272 nullptr, "cup");

3274 }

3276}

3277

3278

3279auto HexagonVectorCombine::concat(IRBuilderBase &Builder,

3281 assert(!Vecs.empty());

3283 std::vector<Value *> Work[2];

3284 int ThisW = 0, OtherW = 1;

3285

3286 Work[ThisW].assign(Vecs.begin(), Vecs.end());

3287 while (Work[ThisW].size() > 1) {

3289 SMask.resize(length(Ty) * 2);

3290 std::iota(SMask.begin(), SMask.end(), 0);

3291

3292 Work[OtherW].clear();

3293 if (Work[ThisW].size() % 2 != 0)

3295 for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {

3297 Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");

3298 Work[OtherW].push_back(Joined);

3299 }

3301 }

3302

3303

3304

3305

3306 SMask.resize(Vecs.size() * length(Vecs.front()->getType()));

3307 std::iota(SMask.begin(), SMask.end(), 0);

3310}

3311

3312auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,

3313 int NewSize, Value *Pad) const -> Value * {

3316 assert(ValTy->getElementType() == Pad->getType());

3317

3318 int CurSize = length(ValTy);

3319 if (CurSize == NewSize)

3320 return Val;

3321

3322 if (CurSize > NewSize)

3323 return getElementRange(Builder, Val, Val, 0, NewSize);

3324

3325 SmallVector<int, 128> SMask(NewSize);

3326 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);

3327 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);

3330}

3331

3332auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,

3334

3335

3336

3338

3339 Type *FromSTy = FromTy->getScalarType();

3340 Type *ToSTy = ToTy->getScalarType();

3341 if (FromSTy == ToSTy)

3342 return Mask;

3343

3344 int FromSize = getSizeOf(FromSTy);

3345 int ToSize = getSizeOf(ToSTy);

3346 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);

3347

3349 int FromCount = length(MaskTy);

3350 int ToCount = (FromCount * FromSize) / ToSize;

3351 assert((FromCount * FromSize) % ToSize == 0);

3352

3353 auto *FromITy = getIntTy(FromSize * 8);

3354 auto *ToITy = getIntTy(ToSize * 8);

3355

3356

3357

3359 Mask, VectorType::get(FromITy, FromCount, false), "sxt");

3361 Ext, VectorType::get(ToITy, ToCount, false), "cst");

3363 Cast, VectorType::get(getBoolTy(), ToCount, false), "trn");

3364}

3365

3366

3367auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const

3370 if (ScalarTy == getBoolTy())

3371 return Val;

3372

3373 Value *Bytes = vbytes(Builder, Val);

3375 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");

3376

3377

3378 return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");

3379}

3380

3381

3382auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const

3385 if (ScalarTy == getByteTy())

3386 return Val;

3387

3388 if (ScalarTy != getBoolTy())

3389 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");

3390

3392 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");

3393 return Builder.CreateSExt(Val, getByteTy(), "sxt");

3394}

3395

3396auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,

3397 unsigned Start, unsigned Length) const

3400 return getElementRange(Builder, Val, Val, Start, Length);

3401}

3402

3403auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const

3405 size_t Len = length(Val);

3406 assert(Len % 2 == 0 && "Length should be even");

3407 return subvector(Builder, Val, 0, Len / 2);

3408}

3409

3410auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const

3412 size_t Len = length(Val);

3413 assert(Len % 2 == 0 && "Length should be even");

3414 return subvector(Builder, Val, Len / 2, Len / 2);

3415}

3416

3417auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,

3419 assert(Val0->getType() == Val1->getType());

3420 int Len = length(Val0);

3421 SmallVector<int, 128> Mask(2 * Len);

3422

3423 for (int i = 0; i != Len; ++i) {

3424 Mask[i] = 2 * i;

3425 Mask[i + Len] = 2 * i + 1;

3426 }

3428}

3429

3430auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,

3432 assert(Val0->getType() == Val1->getType());

3433 int Len = length(Val0);

3434 SmallVector<int, 128> Mask(2 * Len);

3435

3436 for (int i = 0; i != Len; ++i) {

3437 Mask[2 * i + 0] = i;

3438 Mask[2 * i + 1] = i + Len;

3439 }

3441}

3442

3443auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,

3449 auto getCast = [&](IRBuilderBase &Builder, Value *Val,

3451 Type *SrcTy = Val->getType();

3452 if (SrcTy == DestTy)

3453 return Val;

3454

3455

3456

3457 assert(HST.isTypeForHVX(SrcTy, true));

3458

3459 Type *BoolTy = Type::getInt1Ty(F.getContext());

3461 return Builder.CreateBitCast(Val, DestTy, "cst");

3462

3463

3464 unsigned HwLen = HST.getVectorLength();

3465 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast

3466 : Intrinsic::hexagon_V6_pred_typecast_128B;

3467 return Builder.CreateIntrinsic(TC, {DestTy, Val->getType()}, {Val},

3468 nullptr, "cup");

3469 };

3470

3474

3476 for (int i = 0, e = Args.size(); i != e; ++i) {

3478 Type *T = IntrTy->getParamType(i);

3479 if (A->getType() != T) {

3480 IntrArgs.push_back(getCast(Builder, A, T));

3481 } else {

3483 }

3484 }

3485 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";

3486 CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);

3487

3491

3493 if (RetTy == nullptr || CallTy == RetTy)

3494 return Call;

3495

3496 assert(HST.isTypeForHVX(CallTy, true));

3497 return getCast(Builder, Call, RetTy);

3498}

3499

3500auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,

3502 unsigned ToWidth) const

3504

3505

3506

3507

3508

3509

3510

3511

3512

3513

3514

3515

3517 assert(VecTy->getElementType()->isIntegerTy());

3518 unsigned FromWidth = VecTy->getScalarSizeInBits();

3520 assert(ToWidth <= FromWidth && "Breaking up into wider elements?");

3521 unsigned NumResults = FromWidth / ToWidth;

3522

3525 unsigned Length = length(VecTy);

3526

3527

3528

3529 auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {

3530

3531

3532

3533 if (Begin + 1 == End)

3534 return;

3535

3538

3539 auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);

3541

3542 Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));

3543

3544 unsigned Half = (Begin + End) / 2;

3545 Results[Begin] = sublo(Builder, Res);

3546 Results[Half] = subhi(Builder, Res);

3547

3548 splitFunc(Begin, Half, splitFunc);

3549 splitFunc(Half, End, splitFunc);

3550 };

3551

3552 splitInHalf(0, NumResults, splitInHalf);

3554}

3555

3556auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,

3558 VectorType *ToType) const

3560 assert(ToType->getElementType()->isIntegerTy());

3561

3562

3563

3564

3565

3566

3567

3568

3570

3571 unsigned ToWidth = ToType->getScalarSizeInBits();

3572 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();

3573 assert(Width <= ToWidth);

3575 unsigned Length = length(Inputs.front()->getType());

3576

3577 unsigned NeedInputs = ToWidth / Width;

3578 if (Inputs.size() != NeedInputs) {

3579

3580

3583 Last, getConstSplat(Last->getType(), Width - 1), "asr");

3584 Inputs.resize(NeedInputs, Sign);

3585 }

3586

3587 while (Inputs.size() > 1) {

3588 Width *= 2;

3589 auto *VTy = VectorType::get(getIntTy(Width), Length, false);

3590 for (int i = 0, e = Inputs.size(); i < e; i += 2) {

3591 Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);

3592 Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");

3593 }

3594 Inputs.resize(Inputs.size() / 2);

3595 }

3596

3597 assert(Inputs.front()->getType() == ToType);

3598 return Inputs.front();

3599}

3600

3601auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,

3602 Value *Ptr1) const

3603 -> std::optional {

3604

3605 const SCEV *Scev0 = SE.getSCEV(Ptr0);

3606 const SCEV *Scev1 = SE.getSCEV(Ptr1);

3607 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);

3609 APInt V = Const->getAPInt();

3610 if (V.isSignedIntN(8 * sizeof(int)))

3611 return static_cast<int>(V.getSExtValue());

3612 }

3613

3616 ~Builder() {

3618 I->eraseFromParent();

3619 }

3620 SmallVector<Instruction *, 8> ToErase;

3621 };

3622

3623#define CallBuilder(B, F) \

3624 [&](auto &B_) { \

3625 Value *V = B_.F; \

3626 if (auto *I = dyn_cast(V)) \

3627 B_.ToErase.push_back(I); \

3628 return V; \

3629 }(B)

3630

3631 auto Simplify = [this](Value *V) {

3633 return S;

3634 return V;

3635 };

3636

3637 auto StripBitCast = [](Value *V) {

3639 V = C->getOperand(0);

3640 return V;

3641 };

3642

3643 Ptr0 = StripBitCast(Ptr0);

3644 Ptr1 = StripBitCast(Ptr1);

3646 return std::nullopt;

3647

3650 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())

3651 return std::nullopt;

3652 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())

3653 return std::nullopt;

3654

3655 Builder B(Gep0->getParent());

3656 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);

3657

3658

3659 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)

3660 return std::nullopt;

3661

3662 Value *Idx0 = Gep0->getOperand(1);

3663 Value *Idx1 = Gep1->getOperand(1);

3664

3665

3667 Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))

3668 return Diff->getSExtValue() * Scale;

3669

3670 KnownBits Known0 = getKnownBits(Idx0, Gep0);

3671 KnownBits Known1 = getKnownBits(Idx1, Gep1);

3673 if (Unknown.isAllOnes())

3674 return std::nullopt;

3675

3677 Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));

3678 Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));

3679 Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));

3680 int Diff0 = 0;

3682 Diff0 = C->getSExtValue();

3683 } else {

3684 return std::nullopt;

3685 }

3686

3688 Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));

3689 Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));

3690 Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));

3691 int Diff1 = 0;

3693 Diff1 = C->getSExtValue();

3694 } else {

3695 return std::nullopt;

3696 }

3697

3698 return (Diff0 + Diff1) * Scale;

3699

3700#undef CallBuilder

3701}

3702

3703auto HexagonVectorCombine::getNumSignificantBits(const Value *V,

3704 const Instruction *CtxI) const

3705 -> unsigned {

3707}

3708

3709auto HexagonVectorCombine::getKnownBits(const Value *V,

3710 const Instruction *CtxI) const

3711 -> KnownBits {

3713}

3714

3715auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {

3716 if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||

3717 In.isFenceLike() || In.mayReadOrWriteMemory()) {

3718 return false;

3719 }

3721 return false;

3722 return true;

3723}

3724

3725template

3726auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,

3728 const T &IgnoreInsts) const

3729 -> bool {

3730 auto getLocOrNone =

3731 [this](const Instruction &I) -> std::optional {

3733 switch (II->getIntrinsicID()) {

3734 case Intrinsic::masked_load:

3736 case Intrinsic::masked_store:

3738 }

3739 }

3741 };

3742

3743

3746

3748 return false;

3749

3751 return true;

3752 bool MayWrite = In.mayWriteToMemory();

3753 auto MaybeLoc = getLocOrNone(In);

3754

3755 auto From = In.getIterator();

3756 if (From == To)

3757 return true;

3758 bool MoveUp = (To != Block.end() && To->comesBefore(&In));

3760 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);

3761 for (auto It = Range.first; It != Range.second; ++It) {

3762 const Instruction &I = *It;

3763 if (llvm::is_contained(IgnoreInsts, &I))

3764 continue;

3765

3766 if (auto *II = dyn_cast(&I)) {

3767 if (II->getIntrinsicID() == Intrinsic::assume)

3768 continue;

3769 }

3770

3771 if (I.mayThrow())

3772 return false;

3774 if (!CB->hasFnAttr(Attribute::WillReturn))

3775 return false;

3776 if (!CB->hasFnAttr(Attribute::NoSync))

3777 return false;

3778 }

3779 if (I.mayReadOrWriteMemory()) {

3780 auto MaybeLocI = getLocOrNone(I);

3781 if (MayWrite || I.mayWriteToMemory()) {

3782 if (!MaybeLoc || !MaybeLocI)

3783 return false;

3784 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))

3785 return false;

3786 }

3787 }

3788 }

3789 return true;

3790}

3791

3792auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {

3794 return VecTy->getElementType() == getByteTy();

3795 return false;

3796}

3797

3798auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,

3801 assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));

3802 SmallVector<int, 128> SMask(Length);

3803 std::iota(SMask.begin(), SMask.end(), Start);

3805}

3806

3807

3808

3809namespace {

3810class HexagonVectorCombineLegacy : public FunctionPass {

3811public:

3812 static char ID;

3813

3814 HexagonVectorCombineLegacy() : FunctionPass(ID) {}

3815

3816 StringRef getPassName() const override { return "Hexagon Vector Combine"; }

3817

3818 void getAnalysisUsage(AnalysisUsage &AU) const override {

3821 AU.addRequired();

3822 AU.addRequired();

3823 AU.addRequired();

3824 AU.addRequired();

3826 FunctionPass::getAnalysisUsage(AU);

3827 }

3828

3830 if (skipFunction(F))

3831 return false;

3832 AliasAnalysis &AA = getAnalysis().getAAResults();

3833 AssumptionCache &AC =

3834 getAnalysis().getAssumptionCache(F);

3835 DominatorTree &DT = getAnalysis().getDomTree();

3836 ScalarEvolution &SE = getAnalysis().getSE();

3837 TargetLibraryInfo &TLI =

3838 getAnalysis().getTLI(F);

3839 auto &TM = getAnalysis().getTM();

3840 HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);

3841 return HVC.run();

3842 }

3843};

3844}

3845

3846char HexagonVectorCombineLegacy::ID = 0;

3847

3849 "Hexagon Vector Combine", false, false)

3858

3860 return new HexagonVectorCombineLegacy();

3861}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPU Prepare AGPR Alloc

This file implements a class to represent arbitrary precision integral constant values and operations...

MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL

Function Alias Analysis Results

static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Analysis containing CSE Info

This file defines the DenseMap class.

static bool runOnFunction(Function &F, bool PostInlining)

static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))

shuff Hexagon Optimize Shuffle Vector

static Value * locateIndexesFromIntrinsic(Instruction *In)

Definition HexagonVectorCombine.cpp:2051

Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)

Definition HexagonVectorCombine.cpp:1922

Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)

Definition HexagonVectorCombine.cpp:2083

static Value * locateIndexesFromGEP(Value *In)

Definition HexagonVectorCombine.cpp:2023

#define CallBuilder(B, F)

Value * getPointer(Value *Ptr)

Definition HexagonVectorCombine.cpp:1856

#define DEFAULT_HVX_VTCM_PAGE_SIZE

Definition HexagonVectorCombine.cpp:63

static Value * locateAddressFromIntrinsic(Instruction *In)

Definition HexagonVectorCombine.cpp:1961

static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)

Definition HexagonVectorCombine.cpp:1869

Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)

Definition HexagonVectorCombine.cpp:2098

bool isArithmetic(unsigned Opc)

Definition HexagonVectorCombine.cpp:1838

static Type * getIndexType(Value *In)

Definition HexagonVectorCombine.cpp:2007

GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)

Definition HexagonVectorCombine.cpp:1944

Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)

Definition HexagonVectorCombine.cpp:2068

iv Induction Variable Users

static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)

static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)

static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)

static bool isUndef(const MachineInstr &MI)

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

uint64_t IntrinsicInst * II

#define INITIALIZE_PASS_DEPENDENCY(depName)

#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)

#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)

Remove Loads Into Fake Uses

static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)

This file defines the SmallVector class.

static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")

static SymbolRef::Type getType(const Symbol *Sym)

Target-Independent Code Generator Pass Configuration Options pass.

A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.

bool isAllOnes() const

Determine if all bits are set. This is true for zero-width values.

APInt ashr(unsigned ShiftAmt) const

Arithmetic right-shift function.

Type * getAllocatedType() const

Return the type that is being allocated by the instruction.

unsigned getAddressSpace() const

Return the address space for the allocation.

AnalysisUsage & addRequired()

LLVM_ABI void setPreservesCFG()

This function should be called by the pass, iff they do not:

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

An immutable pass that tracks lazily created AssumptionCache objects.

A cache of @llvm.assume calls within a function.

InstListType::const_iterator const_iterator

InstListType::iterator iterator

Instruction iterators...

const Instruction * getTerminator() const LLVM_READONLY

Returns the terminator instruction if the block is well formed or null if the block is not well forme...

AttributeList getAttributes() const

Return the attributes for this call.

@ ICMP_ULT

unsigned less than

static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)

get() constructors - Return a constant with vector type with an element count and element type matchi...

This is the shared class of boolean and integer constants.

static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)

static ConstantInt * getSigned(IntegerType *Ty, int64_t V)

Return a ConstantInt with the specified value for the specified type.

static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)

Return a ConstantVector with the specified constant in each element.

This is an important base class in LLVM.

A parsed version of the target data layout string in and methods for querying it.

LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const

Returns the offset in bytes between successive objects of the specified type, including alignment pad...

iterator_range< iterator > children()

DomTreeNodeBase< NodeT > * getRootNode()

getRootNode - This returns the entry node for the CFG of the function.

Legacy analysis pass which computes a DominatorTree.

Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.

FunctionPass class - This class is used to implement most global optimizations.

FunctionType * getFunctionType() const

Returns the FunctionType for me.

const BasicBlock & back() const

an instruction for type-safe pointer arithmetic to access elements of arrays and structs

Value * getPointerOperand()

bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const

bool useHVXV62Ops() const

bool useHVXV69Ops() const

unsigned getVectorLength() const

bool useHVXV66Ops() const

bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const

Intrinsic::ID getIntrinsicId(unsigned Opc) const

Common base class shared among various IRBuilders.

AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")

LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")

Return a vector value that contains.

Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")

LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)

Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")

LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")

Create a call to intrinsic ID with Args, mangled using Types.

Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")

Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")

LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)

Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...

Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)

Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")

Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")

StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)

Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)

Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")

CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)

Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)

void SetInsertPoint(BasicBlock *TheBB)

This specifies that created instructions should be appended to the end of the specified block.

Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)

Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")

Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)

const char * getOpcodeName() const

Class to represent integer types.

static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)

This static method is the primary way of constructing an IntegerType.

A wrapper class for inspecting calls to intrinsic functions.

Intrinsic::ID getIntrinsicID() const

Return the intrinsic ID of this intrinsic.

This is an important class for using LLVM in a threaded context.

An instruction for reading from memory.

bool doesNotAccessMemory() const

Whether this function accesses no memory.

bool onlyAccessesInaccessibleMem() const

Whether this function only (at most) accesses inaccessible memory.

static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)

static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)

Return a location representing a particular argument of a call.

static LLVM_ABI PoisonValue * get(Type *T)

Static factory methods - Return an 'poison' object of the specified type.

The main scalar evolution driver.

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

An instruction for storing to memory.

Provides information about what library functions are available for the current target.

Primary interface to the complete machine description for the target machine.

virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const

Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...

Target-Independent Code Generator Pass Configuration Options.

The instances of the Type class are immutable: once they are created, they are never changed.

bool isVectorTy() const

True if this is an instance of VectorType.

bool isIntOrIntVectorTy() const

Return true if this is an integer type or a vector of integer types.

LLVM_ABI unsigned getPointerAddressSpace() const

Get the address space of this pointer or pointer vector type.

Type * getScalarType() const

If this is a vector type, return the element type, otherwise return 'this'.

LLVMContext & getContext() const

Return the LLVMContext in which this type was uniqued.

LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY

If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

bool isIntegerTy() const

True if this is an instance of IntegerType.

static LLVM_ABI UndefValue * get(Type *T)

Static factory methods - Return an 'undef' object of the specified type.

Value * getOperand(unsigned i) const

LLVM Value Representation.

Type * getType() const

All values are typed, get the type of this value.

LLVM_ABI void replaceAllUsesWith(Value *V)

Change all uses of this to point to a new Value.

LLVM_ABI StringRef getName() const

Return a constant reference to the value's name.

const ParentTy * getParent() const

self_iterator getIterator()

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

Abstract Attribute helper functions.

Rounding

Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.

constexpr char Align[]

Key for Kernel::Arg::Metadata::mAlign.

constexpr char Args[]

Key for Kernel::Metadata::mArgs.

constexpr std::underlying_type_t< E > Mask()

Get a bitmask with 1s in all places up to the high-order bit of E's largest value.

unsigned ID

LLVM IR allows to use arbitrary numbers as calling convention identifiers.

@ C

The default llvm calling convention, compatible with C.

@ BasicBlock

Various leaf nodes.

LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})

Look up the Function declaration of the intrinsic id in the Module M.

Predicate

Predicate - These are "(BI << 5) | BO" for various predicates.

BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)

BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)

bool match(Val *V, const Pattern &P)

BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)

Matches logical shift operations.

class_match< ConstantInt > m_ConstantInt()

Match an arbitrary ConstantInt and ignore it.

BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)

class_match< Value > m_Value()

Match an arbitrary value and ignore it.

BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)

match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)

Combine two pattern matchers matching L || R.

@ Undef

Value of the register doesn't matter.

initializer< Ty > init(const Ty &Val)

@ User

could "use" a pointer

friend class Instruction

Iterator for Instructions in a `BasicBlock.

LLVM_ABI Instruction * getTerminator() const

LLVM_ABI Instruction & front() const

This is an optimization pass for GlobalISel generic memory operations.

FunctionPass * createHexagonVectorCombineLegacyPass()

Definition HexagonVectorCombine.cpp:3859

FunctionAddr VTableAddr Value

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())

If the specified value is a trivially dead instruction, delete it.

decltype(auto) dyn_cast(const From &Val)

dyn_cast - Return the argument parameter cast to the specified type.

FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty

const Value * getLoadStorePointerOperand(const Value *V)

A helper function that returns the pointer operand of a load or store instruction.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

constexpr bool isPowerOf2_64(uint64_t Value)

Return true if the argument is a power of two > 0 (64 bit edition.)

MemoryEffectsBase< IRMemLocation > MemoryEffects

Summary of how a function affects memory in the program.

LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)

Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...

OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)

Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.

unsigned Log2_64(uint64_t Value)

Return the floor log base 2 of the specified value, -1 if the value is zero.

detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)

Returns a concatenated range across two or more ranges.

uint64_t PowerOf2Ceil(uint64_t A)

Returns the power of two which is greater than or equal to the given value.

LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)

See if we can compute a simplified version of this instruction.

DomTreeNodeBase< BasicBlock > DomTreeNode

auto reverse(ContainerTy &&C)

constexpr bool isPowerOf2_32(uint32_t Value)

Return true if the argument is a power of two > 0.

void sort(IteratorTy Start, IteratorTy End)

LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)

Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...

LLVM_ABI raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

bool none_of(R &&Range, UnaryPredicate P)

Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.

class LLVM_GSL_OWNER SmallVector

Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

LLVM_ABI raw_fd_ostream & errs()

This returns a reference to a raw_ostream for standard error.

IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >

@ And

Bitwise or logical AND of integers.

@ Sub

Subtraction of integers.

uint64_t alignTo(uint64_t Size, Align A)

Returns a multiple of A needed to store Size bytes.

DWARFExpression::Operation Op

auto max_element(R &&Range)

Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...

raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)

ArrayRef(const T &OneElt) -> ArrayRef< T >

decltype(auto) cast(const From &Val)

cast - Return the argument parameter cast to the specified type.

void erase_if(Container &C, UnaryPredicate P)

Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...

LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)

Get the upper bound on bit size for this Value Op as a signed integer.

Type * getLoadStoreType(const Value *I)

A helper function that returns the type of a load or store instruction.

AAResults AliasAnalysis

Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.

LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)

Returns true if the result or effects of the given instructions I depend values not reachable through...

MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)

MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)

void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)

Implement std::swap in terms of BitVector swap.

This struct is a compact representation of a valid (non-zero power of two) alignment.

constexpr uint64_t value() const

This is a hole in the type system and should not be abused.

bool isSimple() const

Test if the given EVT is simple (as opposed to being extended).

TypeSize getSizeInBits() const

Return the size of the specified value type in bits.

static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)

Return the value type corresponding to the specified type.

MVT getSimpleVT() const

Return the SimpleValueType held in the specified simple EVT.