LLVM: lib/Target/Hexagon/HexagonVectorCombine.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
34#include "llvm/IR/IntrinsicsHexagon.h"
45
49
50#include
51#include
52#include
53#include
54#include
55#include
56#include
57
58#define DEBUG_TYPE "hexagon-vc"
59
60
61
62
63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
64
65using namespace llvm;
66
67namespace {
72
77
78class HexagonVectorCombine {
79public:
83 : F(F_), DL(F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
84 SE(SE_), TLI(TLI_),
86
87 bool run();
88
89
91
92
93 Type *getByteTy(int ElemCount = 0) const;
94
95
96 Type *getBoolTy(int ElemCount = 0) const;
97
99
100 std::optional getIntValue(const Value *Val) const;
101
103
105
106 bool isTrue(const Value *Val) const;
107
108 bool isFalse(const Value *Val) const;
109
110
111 VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
112
113 enum SizeKind {
114 Store,
115 Alloc,
116 };
117 int getSizeOf(const Value *Val, SizeKind Kind = Store) const;
118 int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;
119 int getTypeAlignment(Type *Ty) const;
120 size_t length(Value *Val) const;
121 size_t length(Type *Ty) const;
122
125 Constant *getConstSplat(Type *Ty, int Val) const;
126
128
130 int Length, int Where) const;
132 Value *Amt) const;
134 Value *Amt) const;
137 Value *Pad) const;
139 Type *ToTy) const;
143 unsigned Length) const;
148
154 unsigned ToWidth) const;
157
158 std::optional calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
159
160 unsigned getNumSignificantBits(const Value *V,
161 const Instruction *CtxI = nullptr) const;
163 const Instruction *CtxI = nullptr) const;
164
165 bool isSafeToClone(const Instruction &In) const;
166
167 template <typename T = std::vector<Instruction *>>
168 bool isSafeToMoveBeforeInBB(const Instruction &In,
170 const T &IgnoreInsts = {}) const;
171
172
173 [[maybe_unused]] bool isByteVecTy(Type *Ty) const;
174
183
184private:
186 int Start, int Length) const;
187};
188
189class AlignVectors {
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204public:
205 AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
206
207 bool run();
208
209private:
210 using InstList = std::vector<Instruction *>;
212
213 struct AddrInfo {
214 AddrInfo(const AddrInfo &) = default;
217 : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
218 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
219 AddrInfo &operator=(const AddrInfo &) = default;
220
221
227 int Offset = 0;
228
229 };
230 using AddrList = std::vector;
231
232 struct InstrLess {
235 }
236 };
237 using DepList = std::set<Instruction *, InstrLess>;
238
239 struct MoveGroup {
240 MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
241 : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
242 MoveGroup() = default;
243 Instruction *Base;
244 InstList Main;
245 InstList Deps;
246 InstMap Clones;
247 bool IsHvx;
248 bool IsLoad;
249 };
250 using MoveList = std::vector;
251
252 struct ByteSpan {
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268 struct Segment {
269
270 Segment(Value *Val, int Begin, int Len)
271 : Val(Val), Start(Begin), Size(Len) {}
272 Segment(const Segment &Seg) = default;
273 Segment &operator=(const Segment &Seg) = default;
274 Value *Val;
275 int Start;
276 int Size;
277 };
278
280 Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
281 Block(Value *Val, int Off, int Len, int Pos)
282 : Seg(Val, Off, Len), Pos(Pos) {}
284 Block &operator=(const Block &Blk) = default;
285 Segment Seg;
286 int Pos;
287 };
288
289 int extent() const;
290 ByteSpan section(int Start, int Length) const;
291 ByteSpan &shift(int Offset);
293
294 int size() const { return Blocks.size(); }
295 Block &operator[](int i) { return Blocks[i]; }
296 const Block &operator[](int i) const { return Blocks[i]; }
297
298 std::vector Blocks;
299
301 iterator begin() { return Blocks.begin(); }
302 iterator end() { return Blocks.end(); }
304 const_iterator begin() const { return Blocks.begin(); }
306 };
307
308 std::optional getAddrInfo(Instruction &In) const;
309 bool isHvx(const AddrInfo &AI) const;
310
311 [[maybe_unused]] bool isSectorTy(Type *Ty) const;
312
315 Value *getPassThrough(Value *Val) const;
316
318 int Adjust,
319 const InstMap &CloneMap = InstMap()) const;
321 int Alignment,
322 const InstMap &CloneMap = InstMap()) const;
323
328 int Alignment,
330
335 int Alignment,
337
344
346 bool createAddressGroups();
347 MoveList createLoadGroups(const AddrList &Group) const;
348 MoveList createStoreGroups(const AddrList &Group) const;
349 bool moveTogether(MoveGroup &Move) const;
350 template
352
353 void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
354 int ScLen, Value *AlignVal, Value *AlignAddr) const;
355 void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
356 int ScLen, Value *AlignVal, Value *AlignAddr) const;
357 bool realignGroup(const MoveGroup &Move) const;
358
360 int Alignment) const;
361
366
367 std::map<Instruction *, AddrList> AddrGroups;
368 const HexagonVectorCombine &HVC;
369};
370
372 const AlignVectors::AddrInfo &AI) {
373 OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
374 OS << "Addr: " << *AI.Addr << '\n';
375 OS << "Type: " << *AI.ValTy << '\n';
376 OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
377 OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
378 OS << "Offset: " << AI.Offset;
379 return OS;
380}
381
383 const AlignVectors::MoveGroup &MG) {
384 OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
385 OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
386 OS << "Main\n";
388 OS << " " << *I << '\n';
389 OS << "Deps\n";
391 OS << " " << *I << '\n';
392 OS << "Clones\n";
393 for (auto [K, V] : MG.Clones) {
394 OS << " ";
395 K->printAsOperand(OS, false);
396 OS << "\t-> " << *V << '\n';
397 }
398 return OS;
399}
400
403 OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
404 if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {
405 OS << "(self:" << B.Seg.Val << ')';
406 } else if (B.Seg.Val != nullptr) {
407 OS << *B.Seg.Val;
408 } else {
409 OS << "(null)";
410 }
411 return OS;
412}
413
415 const AlignVectors::ByteSpan &BS) {
416 OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
417 for (const AlignVectors::ByteSpan::Block &B : BS)
418 OS << B << '\n';
419 OS << ']';
420 return OS;
421}
422
423class HvxIdioms {
424public:
425 enum DstQualifier {
426 Undefined = 0,
427 Arithmetic,
428 LdSt,
429 LLVM_Gather,
430 LLVM_Scatter,
431 HEX_Gather_Scatter,
432 HEX_Gather,
433 HEX_Scatter,
435 };
436
437 HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
438 auto *Int32Ty = HVC.getIntTy(32);
439 HvxI32Ty = HVC.getHvxTy(Int32Ty, false);
440 HvxP32Ty = HVC.getHvxTy(Int32Ty, true);
441 }
442
443 bool run();
444
445private:
447
448
449
450
451 struct SValue {
453 Signedness Sgn;
454 };
455
456 struct FxpOp {
457 unsigned Opcode;
458 unsigned Frac;
460
461 std::optional RoundAt;
463 };
464
466 -> std::pair<unsigned, Signedness>;
467 auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;
468
469 auto matchFxpMul(Instruction &In) const -> std::optional;
470 auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;
471
473 const FxpOp &Op) const -> Value *;
474 auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
475 bool Rounding) const -> Value *;
476 auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
477 bool Rounding) const -> Value *;
478
480 Value *CarryIn = nullptr) const
481 -> std::pair<Value *, Value *>;
482 auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;
483 auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
485 auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
486 -> std::pair<Value *, Value *>;
492
493 bool matchScatter(Instruction &In) const;
497
500 const HexagonVectorCombine &HVC;
501
503};
504
506 const HvxIdioms::FxpOp &Op) {
507 static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};
509 if (Op.RoundAt.has_value()) {
510 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {
511 OS << ":rnd";
512 } else {
513 OS << " + 1<<" << *Op.RoundAt;
514 }
515 }
516 OS << "\n X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"
517 << " Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;
518 return OS;
519}
520
521}
522
523namespace {
524
525template T *getIfUnordered(T *MaybeT) {
526 return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
527}
530}
533}
536}
537
538#if !defined(_MSC_VER) || _MSC_VER >= 1926
539
540
541
542template <typename Pred, typename... Ts>
543void erase_if(std::map<Ts...> &map, Pred p)
544#else
545template <typename Pred, typename T, typename U>
546void erase_if(std::map<T, U> &map, Pred p)
547#endif
548{
549 for (auto i = map.begin(), e = map.end(); i != e;) {
550 if (p(*i))
551 i = map.erase(i);
552 else
553 i = std::next(i);
554 }
555}
556
557
558template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
560}
561
562}
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596auto AlignVectors::ByteSpan::extent() const -> int {
597 if (size() == 0)
598 return 0;
599 int Min = Blocks[0].Pos;
600 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
601 for (int i = 1, e = size(); i != e; ++i) {
602 Min = std::min(Min, Blocks[i].Pos);
603 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
604 }
605 return Max - Min;
606}
607
608auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
610 for (const ByteSpan::Block &B : Blocks) {
611 int L = std::max(B.Pos, Start);
612 int R = std::min(B.Pos + B.Seg.Size, Start + Length);
613 if (L < R) {
614
615 int Off = L > B.Pos ? L - B.Pos : 0;
616 Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
617 }
618 }
620}
621
622auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
625 return *this;
626}
627
629 SmallVector<Value *, 8> Values(Blocks.size());
630 for (int i = 0, e = Blocks.size(); i != e; ++i)
631 Values[i] = Blocks[i].Seg.Val;
632 return Values;
633}
634
635auto AlignVectors::getAddrInfo(Instruction &In) const
636 -> std::optional {
638 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
639 L->getAlign());
641 return AddrInfo(HVC, S, S->getPointerOperand(),
642 S->getValueOperand()->getType(), S->getAlign());
645 switch (ID) {
646 case Intrinsic::masked_load:
647 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
648 II->getParamAlign(0).valueOrOne());
649 case Intrinsic::masked_store:
650 return AddrInfo(HVC, II, II->getArgOperand(1),
651 II->getArgOperand(0)->getType(),
652 II->getParamAlign(1).valueOrOne());
653 }
654 }
655 return std::nullopt;
656}
657
658auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
660}
661
662auto AlignVectors::getPayload(Value *Val) const -> Value * {
666 ID = II->getIntrinsicID();
668 return In->getOperand(0);
669 }
670 return Val;
671}
672
673auto AlignVectors::getMask(Value *Val) const -> Value * {
675 switch (II->getIntrinsicID()) {
676 case Intrinsic::masked_load:
677 return II->getArgOperand(1);
678 case Intrinsic::masked_store:
679 return II->getArgOperand(2);
680 }
681 }
682
683 Type *ValTy = getPayload(Val)->getType();
685 return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
686 return HVC.getFullValue(HVC.getBoolTy());
687}
688
689auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
691 if (II->getIntrinsicID() == Intrinsic::masked_load)
692 return II->getArgOperand(2);
693 }
695}
696
697auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
698 Type *ValTy, int Adjust,
699 const InstMap &CloneMap) const
702 if (Instruction *New = CloneMap.lookup(I))
703 Ptr = New;
704 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust), "gep");
705}
706
707auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
708 Type *ValTy, int Alignment,
709 const InstMap &CloneMap) const
713 for (auto [Old, New] : CloneMap)
714 I->replaceUsesOfWith(Old, New);
715 return I;
716 }
717 return V;
718 };
719 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");
720 Value *Mask = HVC.getConstInt(-Alignment);
721 Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");
722 return Builder.CreateIntToPtr(
723 And, PointerType::getUnqual(ValTy->getContext()), "itp");
724}
725
726auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
727 Value *Predicate, int Alignment, Value *Mask,
730 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
731
732 if (Predicate) {
734 "Expectning scalar predicate");
735 if (HVC.isFalse(Predicate))
737 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
738 Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
739 Alignment, MDSources);
740 return Builder.CreateSelect(Mask, Load, PassThru);
741 }
742
743 }
744 assert(!HVC.isUndef(Mask));
745 if (HVC.isZero(Mask))
746 return PassThru;
747 if (HVC.isTrue(Mask))
748 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
749
751 Mask, PassThru, "mld");
754}
755
756auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,
757 Value *Ptr, int Alignment,
761 Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
764}
765
766auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,
768 int Alignment,
772 "Predicates 'scalar' vector loads not yet supported");
774 assert(->getType()->isVectorTy() && "Expectning scalar predicate");
775 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
776 if (HVC.isFalse(Predicate))
778 if (HVC.isTrue(Predicate))
779 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
780
781 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
782
783 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
784 {Predicate, Ptr, HVC.getConstInt(0)}, {},
785 MDSources);
786}
787
788auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
789 Value *Predicate, int Alignment, Value *Mask,
791 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
793 assert(!Predicate || (->getType()->isVectorTy() &&
794 "Expectning scalar predicate"));
795 if (Predicate) {
796 if (HVC.isFalse(Predicate))
798 if (HVC.isTrue(Predicate))
800 }
801
802
803 if (HVC.isTrue(Mask)) {
804 if (Predicate) {
805 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
806 MDSources);
807 }
808
809 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
810 }
811
812
813 if (!Predicate) {
815 Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
818 }
819
820
821
822 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
823 Predicate, Alignment, MDSources);
824 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
825 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
826 MDSources);
827}
828
829auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,
830 Value *Ptr, int Alignment,
836}
837
838auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,
840 int Alignment,
844 "Predicates 'scalar' vector stores not yet supported");
846 if (HVC.isFalse(Predicate))
848 if (HVC.isTrue(Predicate))
849 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
850
851 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
852 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
853
854 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,
855 {Predicate, Ptr, HVC.getConstInt(0), Val}, {},
856 MDSources);
857}
858
859auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
860 -> DepList {
862 assert(In->getParent() == Parent &&
863 "Base and In should be in the same block");
864 assert(Base->comesBefore(In) && "Base should come before In");
865
866 DepList Deps;
867 std::deque<Instruction *> WorkQ = {In};
868 while (!WorkQ.empty()) {
870 WorkQ.pop_front();
871 if (D != In)
872 Deps.insert(D);
873 for (Value *Op : D->operands()) {
875 if (I->getParent() == Parent && Base->comesBefore(I))
876 WorkQ.push_back(I);
877 }
878 }
879 }
880 return Deps;
881}
882
883auto AlignVectors::createAddressGroups() -> bool {
884
885
886 AddrList WorkStack;
887
888 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
889 for (AddrInfo &W : WorkStack) {
890 if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
891 return std::make_pair(W.Inst, *D);
892 }
893 return std::make_pair(nullptr, 0);
894 };
895
896 auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
898 for (Instruction &I : Block) {
899 auto AI = this->getAddrInfo(I);
900 if (!AI)
901 continue;
902 auto F = findBaseAndOffset(*AI);
904 if (Instruction *BI = F.first) {
905 AI->Offset = F.second;
906 GroupInst = BI;
907 } else {
908 WorkStack.push_back(*AI);
909 GroupInst = AI->Inst;
910 }
911 AddrGroups[GroupInst].push_back(*AI);
912 }
913
915 Visit(C, Visit);
916
917 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
918 WorkStack.pop_back();
919 };
920
921 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
922 assert(WorkStack.empty());
923
924
925
926
927 erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
928
929 erase_if(AddrGroups, [&](auto &G) {
931 G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
932 });
933
934 return !AddrGroups.empty();
935}
936
937auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
938
939
940
941 unsigned SizeLimit = VAGroupSizeLimit;
943 return {};
944
945 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
946 assert(!Move.Main.empty() && "Move group should have non-empty Main");
947 if (Move.Main.size() >= SizeLimit)
948 return false;
949
950 if (Move.IsHvx != isHvx(Info))
951 return false;
952
954 if (Base->getParent() != Info.Inst->getParent())
955 return false;
956
957 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))
958 return false;
959
960 auto isSafeToCopyAtBase = [&](const Instruction *I) {
961 return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&
962 HVC.isSafeToClone(*I);
963 };
964 DepList Deps = getUpwardDeps(Info.Inst, Base);
966 return false;
967
968 Move.Main.push_back(Info.Inst);
970 return true;
971 };
972
973 MoveList LoadGroups;
974
975 for (const AddrInfo &Info : Group) {
976 if (.Inst->mayReadFromMemory())
977 continue;
978 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
979 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
980 }
981
982
983 erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
984
985
987 erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });
988
989 return LoadGroups;
990}
991
992auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
993
994
995
996 unsigned SizeLimit = VAGroupSizeLimit;
998 return {};
999
1000 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
1001 assert(!Move.Main.empty() && "Move group should have non-empty Main");
1002 if (Move.Main.size() >= SizeLimit)
1003 return false;
1004
1005
1006 assert(Info.Inst->getType()->isVoidTy() &&
1007 "Not handling stores with return values");
1008
1009 if (Move.IsHvx != isHvx(Info))
1010 return false;
1011
1012
1013
1015 if (Base->getParent() != Info.Inst->getParent())
1016 return false;
1017 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
1018 return false;
1019 Move.Main.push_back(Info.Inst);
1020 return true;
1021 };
1022
1023 MoveList StoreGroups;
1024
1025 for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
1026 const AddrInfo &Info = *I;
1027 if (.Inst->mayWriteToMemory())
1028 continue;
1029 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1030 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
1031 }
1032
1033
1034 erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
1035
1036
1038 erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });
1039
1040
1041
1042
1043 if (!VADoFullStores) {
1044 erase_if(StoreGroups, [this](const MoveGroup &G) {
1045 return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {
1046 auto MaybeInfo = this->getAddrInfo(*S);
1047 assert(MaybeInfo.has_value());
1048 return HVC.HST.isHVXVectorType(
1049 EVT::getEVT(MaybeInfo->ValTy, false));
1050 });
1051 });
1052 }
1053
1054 return StoreGroups;
1055}
1056
1057auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {
1058
1059 assert(!Move.Main.empty() && "Move group should have non-empty Main");
1061
1062 if (Move.IsLoad) {
1063
1064
1065 Move.Clones = cloneBefore(Where->getIterator(), Move.Deps);
1066
1068 for (Instruction *M : Main) {
1069 if (M != Where)
1070 M->moveAfter(Where);
1071 for (auto [Old, New] : Move.Clones)
1072 M->replaceUsesOfWith(Old, New);
1073 Where = M;
1074 }
1075
1076 for (int i = 0, e = Move.Deps.size(); i != e; ++i)
1077 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1078 } else {
1079
1080
1081
1082 assert(Move.Deps.empty());
1083
1085 for (Instruction *M : Main.drop_front(1)) {
1087 Where = M;
1088 }
1089 }
1090
1091 return Move.Main.size() + Move.Deps.size() > 1;
1092}
1093
1094template
1096 -> InstMap {
1097 InstMap Map;
1098
1099 for (Instruction *I : Insts) {
1100 assert(HVC.isSafeToClone(*I));
1102 C->setName(Twine("c.") + I->getName() + ".");
1103 C->insertBefore(To);
1104
1105 for (auto [Old, New] : Map)
1106 C->replaceUsesOfWith(Old, New);
1107 Map.insert(std::make_pair(I, C));
1108 }
1109 return Map;
1110}
1111
1112auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1113 const ByteSpan &VSpan, int ScLen,
1114 Value *AlignVal, Value *AlignAddr) const
1115 -> void {
1117
1118 Type *SecTy = HVC.getByteTy(ScLen);
1119 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1120 bool DoAlign = !HVC.isZero(AlignVal);
1122 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1123
1124 ByteSpan ASpan;
1125 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
1127
1128
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152 for (int Index = 0; Index != NumSectors; ++Index)
1153 ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);
1154 for (int Index = 0; Index != NumSectors; ++Index) {
1155 ASpan.Blocks[Index].Seg.Val =
1156 reinterpret_cast<Value *>(&ASpan.Blocks[Index]);
1157 }
1158
1159
1160
1161
1162 DenseMap<void *, Instruction *> EarliestUser;
1164 if (B == nullptr)
1165 return true;
1166 if (A == nullptr)
1167 return false;
1168 assert(A->getParent() == B->getParent());
1169 return A->comesBefore(B);
1170 };
1171 auto earliestUser = [&](const auto &Uses) {
1173 for (const Use &U : Uses) {
1175 assert(I != nullptr && "Load used in a non-instruction?");
1176
1177
1178
1179 if (I->getParent() == BaseBlock) {
1181 User = std::min(User, I, isEarlier);
1182 } else {
1184 }
1185 }
1186 return User;
1187 };
1188
1189 for (const ByteSpan::Block &B : VSpan) {
1190 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);
1191 for (const ByteSpan::Block &S : ASection) {
1192 auto &EU = EarliestUser[S.Seg.Val];
1193 EU = std::min(EU, earliestUser(B.Seg.Val->uses()), isEarlier);
1194 }
1195 }
1196
1198 dbgs() << "ASpan:\n" << ASpan << '\n';
1199 dbgs() << "Earliest users of ASpan:\n";
1200 for (auto &[Val, User] : EarliestUser) {
1201 dbgs() << Val << "\n ->" << *User << '\n';
1202 }
1203 });
1204
1205 auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
1206 int Index, bool MakePred) {
1208 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1210 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1211
1212
1213
1214 int Start = (Index - DoAlign) * ScLen;
1215 int Width = (1 + DoAlign) * ScLen;
1216 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1217 VSpan.section(Start, Width).values());
1218 };
1219
1221
1222 assert(In->getParent() == To->getParent());
1223 DepList Deps = getUpwardDeps(&*In, &*To);
1224 In->moveBefore(To);
1225
1226 InstMap Map = cloneBefore(In, Deps);
1227 for (auto [Old, New] : Map)
1228 In->replaceUsesOfWith(Old, New);
1229 };
1230
1231
1232 LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");
1233 for (int Index = 0; Index != NumSectors + 1; ++Index) {
1234
1235
1236
1237
1238
1239
1241 DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;
1243 Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
1244 if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1247 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1248
1249
1250
1251
1252
1253
1255 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1256 moveBefore(Load->getIterator(), BasePos);
1257 }
1258 LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');
1259 }
1260 }
1261
1262
1263 LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");
1264 for (int Index = 0; Index != NumSectors; ++Index) {
1265 ASpan[Index].Seg.Val = nullptr;
1266 if (auto *Where = EarliestUser[&ASpan[Index]]) {
1269 assert(Val != nullptr);
1270 if (DoAlign) {
1272 assert(NextLoad != nullptr);
1273 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1274 }
1275 ASpan[Index].Seg.Val = Val;
1276 LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');
1277 }
1278 }
1279
1280 for (const ByteSpan::Block &B : VSpan) {
1281 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
1284
1285
1286
1287
1288 std::vector<ByteSpan::Block *> ABlocks;
1289 for (ByteSpan::Block &S : ASection) {
1290 if (S.Seg.Val != nullptr)
1291 ABlocks.push_back(&S);
1292 }
1294 [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {
1297 });
1298 for (ByteSpan::Block *S : ABlocks) {
1299
1300
1303 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1304 Accum =
1305 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1306 }
1307
1308
1309
1310
1311
1312
1313 Type *ValTy = getPayload(B.Seg.Val)->getType();
1316 getPassThrough(B.Seg.Val), "sel");
1318 }
1319}
1320
1321auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1322 const ByteSpan &VSpan, int ScLen,
1323 Value *AlignVal, Value *AlignAddr) const
1324 -> void {
1326
1327 Type *SecTy = HVC.getByteTy(ScLen);
1328 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1329 bool DoAlign = !HVC.isZero(AlignVal);
1330
1331
1332 ByteSpan ASpanV, ASpanM;
1333
1334
1335
1336 auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {
1339 return Val;
1340 auto *VecTy = VectorType::get(Ty, 1, false);
1341 return Builder.CreateBitCast(Val, VecTy, "cst");
1342 };
1343
1344
1345
1346 for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {
1347
1348
1349 ByteSpan VSection =
1350 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1352 Value *Zero = HVC.getNullValue(SecTy);
1355 for (ByteSpan::Block &S : VSection) {
1356 Value *Pay = getPayload(S.Seg.Val);
1357 Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1358 Pay->getType(), HVC.getByteTy());
1359 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1360 S.Seg.Start, S.Seg.Size, S.Pos);
1361 AccumM = Builder.CreateOr(AccumM, PartM);
1362
1363 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1364 S.Seg.Start, S.Seg.Size, S.Pos);
1365
1368 }
1369 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1370 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1371 }
1372
1374 dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';
1375 dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';
1376 });
1377
1378
1379 if (DoAlign) {
1380 for (int Index = 1; Index != NumSectors + 2; ++Index) {
1381 Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;
1382 Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;
1384 ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1385 ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1386 }
1387 }
1388
1390 dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';
1391 dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';
1392 });
1393
1394 auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,
1395 const ByteSpan &ASpanM, int Index, bool MakePred) {
1398 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1399 return;
1401 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1403 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1404
1405
1406
1407 int Start = (Index - DoAlign) * ScLen;
1408 int Width = (1 + DoAlign) * ScLen;
1409 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1410 HVC.vlsb(Builder, Mask),
1411 VSpan.section(Start, Width).values());
1412 };
1413
1414 for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {
1415 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1416 }
1417}
1418
1419auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
1420 LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');
1421
1422
1423 if (!Move.IsHvx)
1424 return false;
1425
1426
1427
1428 auto getMaxOf = [](auto Range, auto GetValue) {
1430 return GetValue(A) < GetValue(B);
1431 });
1432 };
1433
1434 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1450 AddrList MoveInfos;
1452 BaseInfos, std::back_inserter(MoveInfos),
1453 [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
1454
1455
1456 const AddrInfo &WithMaxAlign =
1457 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
1458 Align MaxGiven = WithMaxAlign.HaveAlign;
1459
1460
1461 const AddrInfo &WithMinOffset =
1462 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
1463
1464 const AddrInfo &WithMaxNeeded =
1465 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
1466 Align MinNeeded = WithMaxNeeded.NeedAlign;
1467
1468
1469
1470
1471 Instruction *InsertAt = Move.Main.front();
1472 if (!Move.IsLoad) {
1473
1475 InsertAt = &*std::next(InsertAt->getIterator());
1476 }
1477
1479 InstSimplifyFolder(HVC.DL));
1480 Value *AlignAddr = nullptr;
1481 Value *AlignVal = nullptr;
1482
1483 if (MinNeeded <= MaxGiven) {
1484 int Start = WithMinOffset.Offset;
1485 int OffAtMax = WithMaxAlign.Offset;
1486
1487
1488
1489
1490
1491
1492 int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
1493 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1494 WithMaxAlign.ValTy, Adjust, Move.Clones);
1495 int Diff = Start - (OffAtMax + Adjust);
1496 AlignVal = HVC.getConstInt(Diff);
1498 assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
1499 } else {
1500
1501
1502
1503
1504
1505
1506
1507 AlignAddr =
1508 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1509 MinNeeded.value(), Move.Clones);
1510 AlignVal =
1511 Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");
1513 for (auto [Old, New] : Move.Clones)
1514 I->replaceUsesOfWith(Old, New);
1515 }
1516 }
1517
1518 ByteSpan VSpan;
1519 for (const AddrInfo &AI : MoveInfos) {
1520 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1521 AI.Offset - WithMinOffset.Offset);
1522 }
1523
1524
1525
1526
1528 : std::max(MinNeeded.value(), 4);
1529 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1530 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1531
1533 dbgs() << "ScLen: " << ScLen << "\n";
1534 dbgs() << "AlignVal:" << *AlignVal << "\n";
1535 dbgs() << "AlignAddr:" << *AlignAddr << "\n";
1536 dbgs() << "VSpan:\n" << VSpan << '\n';
1537 });
1538
1539 if (Move.IsLoad)
1540 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1541 else
1542 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1543
1544 for (auto *Inst : Move.Main)
1545 Inst->eraseFromParent();
1546
1547 return true;
1548}
1549
1550auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
1551 int Alignment) const -> Value * {
1552 auto *AlignTy = AlignVal->getType();
1554 AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");
1555 Value *Zero = ConstantInt::get(AlignTy, 0);
1557}
1558
1559auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
1560 if (!HVC.isByteVecTy(Ty))
1561 return false;
1562 int Size = HVC.getSizeOf(Ty);
1565 return Size == 4 || Size == 8;
1566}
1567
1568auto AlignVectors::run() -> bool {
1570 << '\n');
1571 if (!createAddressGroups())
1572 return false;
1573
1575 dbgs() << "Address groups(" << AddrGroups.size() << "):\n";
1576 for (auto &[In, AL] : AddrGroups) {
1577 for (const AddrInfo &AI : AL)
1578 dbgs() << "---\n" << AI << '\n';
1579 }
1580 });
1581
1583 MoveList LoadGroups, StoreGroups;
1584
1585 for (auto &G : AddrGroups) {
1588 }
1589
1591 dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";
1592 for (const MoveGroup &G : LoadGroups)
1594 dbgs() << "Store groups(" << StoreGroups.size() << "):\n";
1595 for (const MoveGroup &G : StoreGroups)
1597 });
1598
1599
1600 unsigned CountLimit = VAGroupCountLimit;
1601 if (CountLimit == 0)
1602 return false;
1603
1604 if (LoadGroups.size() > CountLimit) {
1605 LoadGroups.resize(CountLimit);
1606 StoreGroups.clear();
1607 } else {
1608 unsigned StoreLimit = CountLimit - LoadGroups.size();
1609 if (StoreGroups.size() > StoreLimit)
1610 StoreGroups.resize(StoreLimit);
1611 }
1612
1613 for (auto &M : LoadGroups)
1614 Changed |= moveTogether(M);
1615 for (auto &M : StoreGroups)
1616 Changed |= moveTogether(M);
1617
1618 LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);
1619
1620 for (auto &M : LoadGroups)
1621 Changed |= realignGroup(M);
1622 for (auto &M : StoreGroups)
1623 Changed |= realignGroup(M);
1624
1626}
1627
1628
1629
1630
1631
1632auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const
1633 -> std::pair<unsigned, Signedness> {
1634 unsigned Bits = HVC.getNumSignificantBits(V, In);
1635
1636
1637
1638
1639
1640 KnownBits Known = HVC.getKnownBits(V, In);
1641 Signedness Sign = Signed;
1642 unsigned NumToTest = 0;
1644 NumToTest = Bits;
1646 NumToTest = Bits - 1;
1647
1650 Bits = NumToTest;
1651 }
1652
1653
1654
1657 Sign = Positive;
1658 }
1659 return {Bits, Sign};
1660}
1661
1662auto HvxIdioms::canonSgn(SValue X, SValue Y) const
1663 -> std::pair<SValue, SValue> {
1664
1665
1666
1667
1671}
1672
1673
1674
1675
1676auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional {
1677 using namespace PatternMatch;
1678 auto *Ty = In.getType();
1679
1681 return std::nullopt;
1682
1684
1685 FxpOp Op;
1687
1688
1689
1690 auto m_Shr = [](auto &&V, auto &&S) {
1692 };
1693
1694 uint64_t Qn = 0;
1696 Op.Frac = Qn;
1698 } else {
1699 Op.Frac = 0;
1700 }
1701
1702 if (Op.Frac > Width)
1703 return std::nullopt;
1704
1705
1706 uint64_t CV;
1710 return std::nullopt;
1711 if (CV != 0)
1714 }
1715
1716
1718 Op.Opcode = Instruction::Mul;
1719
1720 Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;
1721 Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;
1723 return Op;
1724 }
1725
1726 return std::nullopt;
1727}
1728
1729auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
1731 assert(Op.X.Val->getType() == Op.Y.Val->getType());
1732
1734 if (VecTy == nullptr)
1735 return nullptr;
1737 unsigned ElemWidth = ElemTy->getBitWidth();
1738
1739
1740 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1741 return nullptr;
1742
1743
1744
1745
1746 if (ElemWidth <= 8)
1747 return nullptr;
1748
1749
1750 if (ElemWidth <= 32 && Op.Frac == 0)
1751 return nullptr;
1752
1753 auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);
1754 auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);
1755
1756
1757
1759 IRBuilder Builder(In.getParent(), In.getIterator(),
1760 InstSimplifyFolder(HVC.DL));
1761
1762 auto roundUpWidth = [](unsigned Width) -> unsigned {
1764
1765
1767 }
1768 if (Width > 32 && Width % 32 != 0) {
1769
1770 return alignTo(Width, 32u);
1771 }
1772 return Width;
1773 };
1774
1775 BitsX = roundUpWidth(BitsX);
1776 BitsY = roundUpWidth(BitsY);
1777
1778
1779
1780
1781 unsigned Width = std::max(BitsX, BitsY);
1782
1783 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1784 if (Width < ElemWidth) {
1787 } else if (Width > ElemWidth) {
1789 : Builder.CreateZExt(X, ResizeTy, "zxt");
1791 : Builder.CreateZExt(Y, ResizeTy, "zxt");
1792 };
1793
1794 assert(X->getType() == Y->getType() && X->getType() == ResizeTy);
1795
1796 unsigned VecLen = HVC.length(ResizeTy);
1797 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1798
1800 FxpOp ChopOp = Op;
1801 ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);
1802
1803 for (unsigned V = 0; V != VecLen / ChopLen; ++V) {
1804 ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);
1805 ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);
1806 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1808 break;
1809 }
1810
1812 return nullptr;
1813
1816 ? Builder.CreateSExt(Cat, VecTy, "sxt")
1817 : Builder.CreateZExt(Cat, VecTy, "zxt");
1818 return Ext;
1819}
1820
1821inline bool HvxIdioms::matchScatter(Instruction &In) const {
1823 if ()
1824 return false;
1825 return (II->getIntrinsicID() == Intrinsic::masked_scatter);
1826}
1827
1828inline bool HvxIdioms::matchGather(Instruction &In) const {
1830 if ()
1831 return false;
1832 return (II->getIntrinsicID() == Intrinsic::masked_gather);
1833}
1834
1836
1837
1839 switch (Opc) {
1840 case Instruction::Add:
1841 case Instruction::Sub:
1842 case Instruction::Mul:
1843 case Instruction::And:
1844 case Instruction::Or:
1845 case Instruction::Xor:
1846 case Instruction::AShr:
1847 case Instruction::LShr:
1848 case Instruction::Shl:
1849 case Instruction::UDiv:
1850 return true;
1851 }
1852 return false;
1853}
1854
1855
1857 assert(Ptr && "Unable to extract pointer");
1859 return Ptr;
1863 if (II->getIntrinsicID() == Intrinsic::masked_store)
1864 return II->getOperand(1);
1865 }
1866 return nullptr;
1867}
1868
1870 HvxIdioms::DstQualifier &Qual) {
1872 if (!In)
1873 return Destination;
1875 Destination = In;
1876 Qual = HvxIdioms::LdSt;
1878 if (II->getIntrinsicID() == Intrinsic::masked_gather) {
1879 Destination = In;
1880 Qual = HvxIdioms::LLVM_Gather;
1881 } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {
1882 Destination = In;
1883 Qual = HvxIdioms::LLVM_Scatter;
1884 } else if (II->getIntrinsicID() == Intrinsic::masked_store) {
1885 Destination = In;
1886 Qual = HvxIdioms::LdSt;
1887 } else if (II->getIntrinsicID() ==
1888 Intrinsic::hexagon_V6_vgather_vscattermh) {
1889 Destination = In;
1890 Qual = HvxIdioms::HEX_Gather_Scatter;
1891 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1892 Destination = In;
1893 Qual = HvxIdioms::HEX_Scatter;
1894 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1895 Destination = In;
1896 Qual = HvxIdioms::HEX_Gather;
1897 }
1903 Destination = In;
1904 Qual = HvxIdioms::Call;
1908 Destination = In;
1909 Qual = HvxIdioms::Arithmetic;
1910 } else {
1911 LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");
1912 }
1913 return Destination;
1914}
1915
1916
1917
1918
1919
1920
1921
1924 if (!In)
1925 return Destination;
1926
1928
1929 for (auto &U : In->uses()) {
1932 if (Destination)
1933 Users.push_back(Destination);
1934 }
1935 }
1936
1939 return I;
1940 return Destination;
1941}
1942
1943
1945 assert(In && "Bad instruction");
1948 IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&
1949 "Not a gather Intrinsic");
1951 if (IIn->getIntrinsicID() == Intrinsic::masked_gather)
1953 else
1955 return GEPIndex;
1956}
1957
1958
1959
1960
1963 if (!GEPIndex) {
1965 return nullptr;
1966 }
1969 if (IndexLoad)
1970 return IndexLoad;
1971
1973 if (IndexZEx) {
1975 if (IndexLoad)
1976 return IndexLoad;
1978 if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
1980 }
1982 if (BaseShuffle) {
1984 if (IndexLoad)
1985 return IndexLoad;
1987 if (IE) {
1988 auto *Src = IE->getOperand(1);
1990 if (IndexLoad)
1991 return IndexLoad;
1993 if (Alloca)
1994 return Alloca;
1996 return Src;
1997 }
1999 return Src;
2000 }
2001 }
2002 }
2003 LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");
2004 return nullptr;
2005}
2006
2008 if (!In)
2009 return nullptr;
2010
2013
2015 if (II->getIntrinsicID() == Intrinsic::masked_load)
2016 return II->getType();
2017 if (II->getIntrinsicID() == Intrinsic::masked_store)
2018 return II->getOperand(0)->getType();
2019 }
2020 return In->getType();
2021}
2022
2024 if (!In)
2025 return nullptr;
2027 return In;
2029 if (II->getIntrinsicID() == Intrinsic::masked_load)
2030 return In;
2031 if (II->getIntrinsicID() == Intrinsic::masked_gather)
2032 return In;
2033 }
2043 return cstDataVector;
2045 return GEPIndex->getOperand(0);
2046 return nullptr;
2047}
2048
2049
2050
2053 if (!GEPIndex) {
2055 return nullptr;
2056 }
2059 return IndexLoad;
2060
2061 LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");
2062 return nullptr;
2063}
2064
2065
2066
2067
2071 assert(I && "Unable to reinterprete cast");
2072 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2073 std::vector shuffleMask;
2074 for (unsigned i = 0; i < 64; ++i)
2075 shuffleMask.push_back(i);
2077 Value *CastShuffle =
2078 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
2079 return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");
2080}
2081
2082
2086 assert(I && "Unable to reinterprete cast");
2087 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2088 std::vector shuffleMask;
2089 for (unsigned i = 0; i < 128; ++i)
2090 shuffleMask.push_back(i);
2092 Value *CastShuffle =
2093 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
2094 return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");
2095}
2096
2097
2100 unsigned int pattern) {
2101 std::vector byteMask;
2102 for (unsigned i = 0; i < 32; ++i)
2103 byteMask.push_back(pattern);
2104
2105 return Builder.CreateIntrinsic(
2106 HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),
2107 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2108 nullptr);
2109}
2110
2111Value *HvxIdioms::processVScatter(Instruction &In) const {
2113 assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");
2114 unsigned InpSize = HVC.getSizeOf(InpTy);
2115 auto *F = In.getFunction();
2116 LLVMContext &Ctx = F->getContext();
2118 assert(ElemTy && "llvm.scatter needs integer type argument");
2121 unsigned Elements = HVC.length(InpTy);
2122 dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";
2123 dbgs() << " Input type(" << *InpTy << ") elements(" << Elements
2124 << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("
2125 << ElemWidth << ")\n";
2126 });
2127
2128 IRBuilder Builder(In.getParent(), In.getIterator(),
2129 InstSimplifyFolder(HVC.DL));
2130
2131 auto *ValueToScatter = In.getOperand(0);
2132 LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");
2133
2135 LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize
2136 << ") for vscatter\n");
2137 return nullptr;
2138 }
2139
2140
2142 if (!IndexLoad)
2143 return nullptr;
2144 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
2145
2146
2148 if (!Ptr)
2149 return nullptr;
2151
2153 if (!Indexes)
2154 return nullptr;
2155 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
2157 "cst_ptr_to_i32");
2158 LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");
2159
2161 Value *CastIndex = nullptr;
2162 if (cstDataVector) {
2163
2164 AllocaInst *IndexesAlloca =
2165 Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));
2166 [[maybe_unused]] auto *StoreIndexes =
2167 Builder.CreateStore(cstDataVector, IndexesAlloca);
2168 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
2170 IndexesAlloca, "reload_index");
2171 } else {
2172 if (ElemWidth == 2)
2174 else
2175 CastIndex = Indexes;
2176 }
2177 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
2178
2179 if (ElemWidth == 1) {
2180
2181
2182 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2183
2184
2185 Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");
2186 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
2188 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);
2189 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");
2190
2191 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
2192 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
2193 [[maybe_unused]] Value *IndexHi =
2194 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2195 [[maybe_unused]] Value *IndexLo =
2196 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2197 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
2198 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
2199
2200 Value *CastSrc =
2202 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
2204 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);
2205 LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter
2206 << ")\n");
2207
2208 [[maybe_unused]] Value *UVSHi =
2209 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2210 [[maybe_unused]] Value *UVSLo =
2211 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2212 LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");
2213 LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");
2214
2215
2216 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2217 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
2219 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2221 IndexHi, UVSHi},
2222 nullptr);
2223 LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");
2225 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2227 IndexLo, UVSLo},
2228 nullptr);
2229 } else if (ElemWidth == 2) {
2230 Value *CastSrc =
2232 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
2234 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2236 CastSrc},
2237 nullptr);
2238 } else if (ElemWidth == 4) {
2240 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2242 ValueToScatter},
2243 nullptr);
2244 } else {
2245 LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");
2246 return nullptr;
2247 }
2248}
2249
2250Value *HvxIdioms::processVGather(Instruction &In) const {
2251 [[maybe_unused]] auto *InpTy =
2253 assert(InpTy && "Cannot handle no vector type for llvm.gather");
2254 [[maybe_unused]] auto *ElemTy =
2256 assert(ElemTy && "llvm.gather needs vector of ptr argument");
2257 auto *F = In.getFunction();
2258 LLVMContext &Ctx = F->getContext();
2259 LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"
2260 << *In.getParent() << "\n");
2261 LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("
2262 << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)
2263 << ") type(" << *ElemTy << ") Access alignment("
2264 << *In.getOperand(1) << ") AddressSpace("
2265 << ElemTy->getAddressSpace() << ")\n");
2266
2267
2269 "llvm.gather needs vector for mask");
2270 IRBuilder Builder(In.getParent(), In.getIterator(),
2271 InstSimplifyFolder(HVC.DL));
2272
2273
2274
2275
2276 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2278 if (!Dst) {
2279 LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");
2280 return nullptr;
2281 }
2282 LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual
2283 << ")\n");
2284
2285
2287 if (!Ptr) {
2288 LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");
2289 return nullptr;
2290 }
2291
2292
2294 assert(DstType && "Cannot handle non vector dst type for llvm.gather");
2295
2296
2298 if (!IndexLoad)
2299 return nullptr;
2300 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
2301
2302
2304 if (!Indexes)
2305 return nullptr;
2306 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
2307
2309 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2310 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2311
2312
2313
2314 unsigned OutputSize = HVC.getSizeOf(DstType);
2318 << " Address space ("
2320 << " Result type : " << *DstType
2321 << "\n Size in bytes : " << OutputSize
2322 << " element type(" << *DstElemTy
2323 << ")\n ElemWidth : " << ElemWidth << " bytes\n");
2324
2326 assert(IndexType && "Cannot handle non vector index type for llvm.gather");
2327 unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());
2328 LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");
2329
2330
2332 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2333
2334
2335
2336
2337
2338
2339
2341 if (ElemWidth == 1) {
2342
2343
2344
2345
2346 Value *CastIndexes =
2347 Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");
2348 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
2349 auto *UnpackedIndexes =
2350 Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),
2351 V6_vunpack, CastIndexes, nullptr);
2352 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes
2353 << ")\n");
2354
2355 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
2356 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
2357 [[maybe_unused]] Value *IndexHi =
2358 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2359 [[maybe_unused]] Value *IndexLo =
2360 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2361 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
2362 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
2363
2364 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2365 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
2366
2367
2368 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);
2369 [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(
2370 Type::getVoidTy(Ctx), V6_vgather,
2371 {Ptr, QByteMask, CastedPtr,
2373 nullptr);
2374 LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");
2375
2376 [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(
2377 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");
2378 LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");
2379
2380
2381
2383 Type::getVoidTy(Ctx), V6_vgather,
2384 {Ptr, QByteMask, CastedPtr,
2386 nullptr);
2387 LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");
2389 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");
2390 LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");
2391
2392
2393
2394
2395 auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);
2397 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);
2398 LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");
2399 [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);
2400 LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");
2401 } else if (ElemWidth == 2) {
2402
2403 if (IndexWidth == 2) {
2404
2405 Value *CastIndex =
2407 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
2408
2409
2410 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
2411 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2412 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2414 << " Shifted half index: " << *AdjustedIndex << ")\n");
2415
2416 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);
2417
2418
2420 Type::getVoidTy(Ctx), V6_vgather,
2422 AdjustedIndex},
2423 nullptr);
2424 for (auto &U : Dst->uses()) {
2426 dbgs() << " dst used by: " << *UI << "\n";
2427 }
2428 for (auto &U : In.uses()) {
2430 dbgs() << " In used by : " << *UI << "\n";
2431 }
2432
2433
2435 HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");
2436 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2437 In.replaceAllUsesWith(LoadedResult);
2438 } else {
2439 dbgs() << " Unhandled index type for vgather\n";
2440 return nullptr;
2441 }
2442 } else if (ElemWidth == 4) {
2443 if (IndexWidth == 4) {
2444
2445 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
2446 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2447 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2449 << " Shifted word index: " << *AdjustedIndex << ")\n");
2451 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2453 AdjustedIndex},
2454 nullptr);
2455 } else {
2456 LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");
2457 return nullptr;
2458 }
2459 } else {
2460 LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");
2461 return nullptr;
2462 }
2463 } else if (HVC.HST.getVectorLength() == OutputSize * 2) {
2464
2465 LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");
2466 return nullptr;
2467 } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {
2468 LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");
2469 return nullptr;
2470 }
2471
2472
2473
2474 Dst->eraseFromParent();
2475 } else if (Qual == HvxIdioms::LLVM_Scatter) {
2476
2477 auto *DstInpTy = cast(Dst->getOperand(1)->getType());
2478 assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
2479 [[maybe_unused]] unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2480 [[maybe_unused]] unsigned DstElements = HVC.length(DstInpTy);
2481 [[maybe_unused]] auto *DstElemTy =
2483 assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
2484 LLVM_DEBUG(dbgs() << " Gather feeds into scatter\n Values to scatter : "
2485 << *Dst->getOperand(0) << "\n");
2486 LLVM_DEBUG(dbgs() << " Dst type(" << *DstInpTy << ") elements("
2487 << DstElements << ") VecLen(" << DstInpSize << ") type("
2488 << *DstElemTy << ") Access alignment("
2489 << *Dst->getOperand(2) << ")\n");
2490
2492 if (!Src)
2493 return nullptr;
2495
2497 LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");
2498 return nullptr;
2499 }
2500
2502 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2503 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
2504
2506 if (!DstLoad) {
2507 LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");
2508 return nullptr;
2509 }
2510 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
2511
2513 if (!Ptr)
2514 return nullptr;
2516 Value *CastIndex =
2518 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
2519
2520
2521 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
2522 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2523 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2524 LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");
2525
2527 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2529 AdjustedIndex},
2530 nullptr);
2531 } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2532
2533
2534
2535
2536
2539 if (cstDataVector) {
2540
2541
2542
2543 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
2544 [[maybe_unused]] auto *StoreIndexes =
2545 Builder.CreateStore(cstDataVector, IndexesAlloca);
2546 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
2548 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
2549 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
2550 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");
2551
2553 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2554 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
2555
2557 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2558 {ResultAlloca, CastedSrc,
2560 nullptr);
2562 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
2563 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2564 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
2565 In.replaceAllUsesWith(LoadedResult);
2566 }
2567 } else {
2568
2570 if (!Src)
2571 return nullptr;
2573
2575 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2576 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
2577
2579 if (!DstLoad)
2580 return nullptr;
2581 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
2583 if (!Ptr)
2584 return nullptr;
2586
2588 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2590 Indexes},
2591 nullptr);
2592 }
2593 return Gather;
2594 } else if (Qual == HvxIdioms::HEX_Scatter) {
2595
2596
2597
2598
2599 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
2601 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2602 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
2603 Value *CastIndex =
2605 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
2606
2608 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2610 CastIndex},
2611 nullptr);
2613 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
2614 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2615 In.replaceAllUsesWith(LoadedResult);
2616 } else if (Qual == HvxIdioms::HEX_Gather) {
2617
2618
2621 if (cstDataVector) {
2622
2623 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
2624
2625 [[maybe_unused]] auto *StoreIndexes =
2626 Builder.CreateStore(cstDataVector, IndexesAlloca);
2627 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
2629 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
2630 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
2631 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca
2632 << "\n AddressSpace: "
2634
2636 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2637 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
2638
2640 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2641 {ResultAlloca, CastedSrc,
2643 nullptr);
2645 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
2646 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2647 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
2648 In.replaceAllUsesWith(LoadedResult);
2649 }
2650 }
2651 } else if (Qual == HvxIdioms::LLVM_Gather) {
2652
2653 errs() << " Underimplemented vgather to vgather sequence\n";
2654 return nullptr;
2655 } else
2657
2658 return Gather;
2659}
2660
2661auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2662 const FxpOp &Op) const -> Value * {
2663 assert(Op.X.Val->getType() == Op.Y.Val->getType());
2665 unsigned Width = InpTy->getScalarSizeInBits();
2666 bool Rounding = Op.RoundAt.has_value();
2667
2668 if (.RoundAt || *Op.RoundAt == Op.Frac - 1) {
2669
2671 Value *QMul = nullptr;
2672 if (Width == 16) {
2673 QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);
2674 } else if (Width == 32) {
2675 QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);
2676 }
2677 if (QMul != nullptr)
2678 return QMul;
2679 }
2680 }
2681
2682 assert(Width >= 32 || isPowerOf2_32(Width));
2683 assert(Width < 32 || Width % 32 == 0);
2684
2685
2686 if (Width < 32) {
2687 if (Width < 16)
2688 return nullptr;
2689
2690
2691
2692 assert(Width == 16);
2693 assert(Op.Frac != 0 && "Unshifted mul should have been skipped");
2694 if (Op.Frac == 16) {
2695
2696 if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))
2697 return MulH;
2698 }
2699
2700 Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
2701 if (Rounding) {
2702 Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);
2703 Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");
2704 }
2705
2706 Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);
2708 ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")
2709 : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");
2710 return Builder.CreateTrunc(Shifted, InpTy, "trn");
2711 }
2712
2713
2714
2715
2716
2717 auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, 32);
2718 auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, 32);
2719 auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);
2720
2721 auto *HvxWordTy = cast(WordP.front()->getType());
2722
2723
2724 if (Op.RoundAt.has_value()) {
2727 RoundV[*Op.RoundAt / 32] =
2728 HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32));
2729 WordP = createAddLong(Builder, WordP, RoundV);
2730 }
2731
2732
2733
2734
2735 unsigned SkipWords = Op.Frac / 32;
2736 Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);
2737
2738 for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2739 int Src = Dst + SkipWords;
2741 if (Src + 1 < End) {
2742 Value *Hi = WordP[Src + 1];
2743 WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
2745 nullptr, "int");
2746 } else {
2747
2748 WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");
2749 }
2750 }
2751 if (SkipWords != 0)
2752 WordP.resize(WordP.size() - SkipWords);
2753
2754 return HVC.joinVectorElements(Builder, WordP, Op.ResTy);
2755}
2756
2757auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
2758 bool Rounding) const -> Value * {
2759 assert(X.Val->getType() == Y.Val->getType());
2760 assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));
2762
2763
2765 return nullptr;
2766
2767 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
2768 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),
2769 {X.Val, Y.Val});
2770}
2771
2772auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
2773 bool Rounding) const -> Value * {
2774 Type *InpTy = X.Val->getType();
2775 assert(InpTy == Y.Val->getType());
2778
2780 return nullptr;
2781
2782 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
2783 auto V6_vmpyo_acc = Rounding
2784 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
2787 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});
2788 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
2790}
2791
2792auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
2793 Value *CarryIn) const
2794 -> std::pair<Value *, Value *> {
2795 assert(X->getType() == Y->getType());
2797 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
2800 if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {
2801 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
2802 } else {
2803 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
2804 if (CarryIn == nullptr)
2805 CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
2806 Args.push_back(CarryIn);
2807 }
2808 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
2809 nullptr, Args);
2812 return {Result, CarryOut};
2813 }
2814
2815
2816
2817
2819 if (CarryIn != nullptr) {
2820 unsigned Width = VecTy->getScalarSizeInBits();
2821 uint32_t Mask = 1;
2822 if (Width < 32) {
2823 for (unsigned i = 0, e = 32 / Width; i != e; ++i)
2824 Mask = (Mask << Width) | 1;
2825 }
2826 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
2827 Value *ValueIn =
2828 HVC.createHvxIntrinsic(Builder, V6_vandqrt, nullptr,
2829 {CarryIn, HVC.getConstInt(Mask)});
2830 Result1 = Builder.CreateAdd(X, ValueIn, "add");
2831 }
2832
2836 return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};
2837}
2838
2839auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
2842 std::tie(X, Y) = canonSgn(X, Y);
2843
2845 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
2846 } else if (Y.Sgn == Signed) {
2847
2848 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
2849 } else {
2850 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
2851 }
2852
2853
2855 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
2856
2857 return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
2858}
2859
2860auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
2862 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), false);
2863
2866 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2867 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2869 }
2870 }
2871
2872 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), true);
2874 Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");
2875 unsigned Len = HVC.length(HvxP16Ty) / 2;
2876
2877 SmallVector<int, 128> PickOdd(Len);
2878 for (int i = 0; i != static_cast<int>(Len); ++i)
2879 PickOdd[i] = 2 * i + 1;
2880
2882 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");
2883}
2884
2885auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
2886 -> std::pair<Value *, Value *> {
2887 assert(X.Val->getType() == Y.Val->getType());
2888 assert(X.Val->getType() == HvxI32Ty);
2889
2891 std::tie(X, Y) = canonSgn(X, Y);
2892
2894 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2895 } else if (Y.Sgn == Signed) {
2896 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2897 } else {
2898 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2899 }
2900
2901 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,
2902 {X.Val, Y.Val}, {HvxI32Ty});
2906}
2907
2908auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2911 assert(WordX.size() == WordY.size());
2912 unsigned Idx = 0, Length = WordX.size();
2914
2915 while (Idx != Length) {
2916 if (HVC.isZero(WordX[Idx]))
2917 Sum[Idx] = WordY[Idx];
2918 else if (HVC.isZero(WordY[Idx]))
2919 Sum[Idx] = WordX[Idx];
2920 else
2921 break;
2922 ++Idx;
2923 }
2924
2925 Value *Carry = nullptr;
2926 for (; Idx != Length; ++Idx) {
2927 std::tie(Sum[Idx], Carry) =
2928 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2929 }
2930
2931
2932 return Sum;
2933}
2934
2935auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2939
2940
2941
2942 for (int i = 0, e = WordX.size(); i != e; ++i) {
2943 for (int j = 0, f = WordY.size(); j != f; ++j) {
2944
2945 Signedness SX = (i + 1 == e) ? SgnX : Unsigned;
2946 Signedness SY = (j + 1 == f) ? SgnY : Unsigned;
2947 auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});
2948 Products[i + j + 0].push_back(Lo);
2949 Products[i + j + 1].push_back(Hi);
2950 }
2951 }
2952
2954
2955 auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {
2957 return Zero;
2960 return Last;
2961 };
2962
2963 for (int i = 0, e = Products.size(); i != e; ++i) {
2964 while (Products[i].size() > 1) {
2965 Value *Carry = nullptr;
2966 for (int j = i; j != e; ++j) {
2967 auto &ProdJ = Products[j];
2968 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2969 pop_back_or_zero(ProdJ), Carry);
2970 ProdJ.insert(ProdJ.begin(), Sum);
2971 Carry = CarryOut;
2972 }
2973 }
2974 }
2975
2977 for (auto &P : Products) {
2978 assert(P.size() == 1 && "Should have been added together");
2980 }
2981
2982 return WordP;
2983}
2984
2985auto HvxIdioms::run() -> bool {
2987
2988 for (BasicBlock &B : HVC.F) {
2989 for (auto It = B.rbegin(); It != B.rend(); ++It) {
2990 if (auto Fxm = matchFxpMul(*It)) {
2991 Value *New = processFxpMul(*It, *Fxm);
2992
2994 if (!New)
2995 continue;
2997 It->replaceAllUsesWith(New);
2999 It = StartOver ? B.rbegin()
3002 } else if (matchGather(*It)) {
3003 Value *New = processVGather(*It);
3004 if (!New)
3005 continue;
3007
3008 It->eraseFromParent();
3012 } else if (matchScatter(*It)) {
3013 Value *New = processVScatter(*It);
3014 if (!New)
3015 continue;
3016 LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");
3017
3018 It->eraseFromParent();
3022 }
3023 }
3024 }
3025
3027}
3028
3029
3030
3031auto HexagonVectorCombine::run() -> bool {
3032 if (DumpModule)
3033 dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();
3034
3036 if (HST.useHVXOps()) {
3037 if (VAEnabled)
3038 Changed |= AlignVectors(*this).run();
3039 if (VIEnabled)
3040 Changed |= HvxIdioms(*this).run();
3041 }
3042
3043 if (DumpModule) {
3044 dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")
3045 << " after HexagonVectorCombine\n"
3046 << *F.getParent();
3047 }
3049}
3050
3051auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {
3053}
3054
3055auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
3056 assert(ElemCount >= 0);
3057 IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
3058 if (ElemCount == 0)
3059 return ByteTy;
3060 return VectorType::get(ByteTy, ElemCount, false);
3061}
3062
3063auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
3064 assert(ElemCount >= 0);
3065 IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
3066 if (ElemCount == 0)
3067 return BoolTy;
3068 return VectorType::get(BoolTy, ElemCount, false);
3069}
3070
3071auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const
3072 -> ConstantInt * {
3074}
3075
3076auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
3078 return C->isZeroValue();
3079 return false;
3080}
3081
3082auto HexagonVectorCombine::getIntValue(const Value *Val) const
3083 -> std::optional {
3085 return CI->getValue();
3086 return std::nullopt;
3087}
3088
3089auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
3091}
3092
3093auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {
3095}
3096
3097auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {
3099}
3100
3101auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
3104 assert(ETy.isSimple() && "Invalid HVX element type");
3105
3106 assert(HST.isHVXElementType(ETy.getSimpleVT(), false) &&
3107 "Invalid HVX element type");
3108 unsigned HwLen = HST.getVectorLength();
3109 unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();
3110 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3111 false);
3112}
3113
3114auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const
3115 -> int {
3116 return getSizeOf(Val->getType(), Kind);
3117}
3118
3119auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const
3120 -> int {
3121 auto *NcTy = const_cast<Type *>(Ty);
3122 switch (Kind) {
3124 return DL.getTypeStoreSize(NcTy).getFixedValue();
3126 return DL.getTypeAllocSize(NcTy).getFixedValue();
3127 }
3129}
3130
3131auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
3132
3133
3134 if (HST.isTypeForHVX(Ty))
3135 return HST.getVectorLength();
3136 return DL.getABITypeAlign(Ty).value();
3137}
3138
3139auto HexagonVectorCombine::length(Value *Val) const -> size_t {
3140 return length(Val->getType());
3141}
3142
3143auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
3145 assert(VecTy && "Must be a vector type");
3146 return VecTy->getElementCount().getFixedValue();
3147}
3148
3149auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
3154 return Zero;
3155}
3156
3157auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
3159 auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
3162 return Minus1;
3163}
3164
3165auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const
3169 Type *ElemTy = VecTy->getElementType();
3170
3172 ConstantInt::get(ElemTy, Val));
3174}
3175
3176auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
3178 SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
3180 }
3181 return nullptr;
3182}
3183
3184
3185auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
3187 int Where) const -> Value * {
3188 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3189 int SrcLen = getSizeOf(Src);
3190 int DstLen = getSizeOf(Dst);
3191 assert(0 <= Start && Start + Length <= SrcLen);
3192 assert(0 <= Where && Where + Length <= DstLen);
3193
3196 Value *P2Src = vresize(Builder, Src, P2Len, Poison);
3197 Value *P2Dst = vresize(Builder, Dst, P2Len, Poison);
3198
3200 for (int i = 0; i != P2Len; ++i) {
3201
3202
3203 SMask[i] =
3204 (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
3205 }
3206
3208 return vresize(Builder, P2Insert, DstLen, Poison);
3209}
3210
3211auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
3213 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
3215 return Hi;
3216 int VecLen = getSizeOf(Hi);
3217 if (auto IntAmt = getIntValue(Amt))
3218 return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
3219 VecLen);
3220
3221 if (HST.isTypeForHVX(Hi->getType())) {
3222 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3223 "Expecting an exact HVX type");
3224 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3225 Hi->getType(), {Hi, Lo, Amt});
3226 }
3227
3228 if (VecLen == 4) {
3233 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
3234 return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");
3235 }
3236 if (VecLen == 8) {
3238 return vralignb(Builder, Lo, Hi, Sub);
3239 }
3241}
3242
3243auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
3245 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
3247 return Lo;
3248 int VecLen = getSizeOf(Lo);
3249 if (auto IntAmt = getIntValue(Amt))
3250 return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
3251
3252 if (HST.isTypeForHVX(Lo->getType())) {
3253 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3254 "Expecting an exact HVX type");
3255 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3256 Lo->getType(), {Hi, Lo, Amt});
3257 }
3258
3259 if (VecLen == 4) {
3263 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
3264 return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");
3265 }
3266 if (VecLen == 8) {
3267 Type *Int64Ty = Type::getInt64Ty(F.getContext());
3271 {Hi64, Lo64, Amt},
3272 nullptr, "cup");
3274 }
3276}
3277
3278
3279auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3281 assert(!Vecs.empty());
3283 std::vector<Value *> Work[2];
3284 int ThisW = 0, OtherW = 1;
3285
3286 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3287 while (Work[ThisW].size() > 1) {
3289 SMask.resize(length(Ty) * 2);
3290 std::iota(SMask.begin(), SMask.end(), 0);
3291
3292 Work[OtherW].clear();
3293 if (Work[ThisW].size() % 2 != 0)
3295 for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
3297 Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");
3298 Work[OtherW].push_back(Joined);
3299 }
3301 }
3302
3303
3304
3305
3306 SMask.resize(Vecs.size() * length(Vecs.front()->getType()));
3307 std::iota(SMask.begin(), SMask.end(), 0);
3310}
3311
3312auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
3313 int NewSize, Value *Pad) const -> Value * {
3316 assert(ValTy->getElementType() == Pad->getType());
3317
3318 int CurSize = length(ValTy);
3319 if (CurSize == NewSize)
3320 return Val;
3321
3322 if (CurSize > NewSize)
3323 return getElementRange(Builder, Val, Val, 0, NewSize);
3324
3325 SmallVector<int, 128> SMask(NewSize);
3326 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
3327 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
3330}
3331
3332auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
3334
3335
3336
3338
3339 Type *FromSTy = FromTy->getScalarType();
3340 Type *ToSTy = ToTy->getScalarType();
3341 if (FromSTy == ToSTy)
3342 return Mask;
3343
3344 int FromSize = getSizeOf(FromSTy);
3345 int ToSize = getSizeOf(ToSTy);
3346 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3347
3349 int FromCount = length(MaskTy);
3350 int ToCount = (FromCount * FromSize) / ToSize;
3351 assert((FromCount * FromSize) % ToSize == 0);
3352
3353 auto *FromITy = getIntTy(FromSize * 8);
3354 auto *ToITy = getIntTy(ToSize * 8);
3355
3356
3357
3359 Mask, VectorType::get(FromITy, FromCount, false), "sxt");
3361 Ext, VectorType::get(ToITy, ToCount, false), "cst");
3363 Cast, VectorType::get(getBoolTy(), ToCount, false), "trn");
3364}
3365
3366
3367auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
3370 if (ScalarTy == getBoolTy())
3371 return Val;
3372
3373 Value *Bytes = vbytes(Builder, Val);
3375 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");
3376
3377
3378 return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");
3379}
3380
3381
3382auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
3385 if (ScalarTy == getByteTy())
3386 return Val;
3387
3388 if (ScalarTy != getBoolTy())
3389 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");
3390
3392 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");
3393 return Builder.CreateSExt(Val, getByteTy(), "sxt");
3394}
3395
3396auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
3397 unsigned Start, unsigned Length) const
3400 return getElementRange(Builder, Val, Val, Start, Length);
3401}
3402
3403auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const
3405 size_t Len = length(Val);
3406 assert(Len % 2 == 0 && "Length should be even");
3407 return subvector(Builder, Val, 0, Len / 2);
3408}
3409
3410auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const
3412 size_t Len = length(Val);
3413 assert(Len % 2 == 0 && "Length should be even");
3414 return subvector(Builder, Val, Len / 2, Len / 2);
3415}
3416
3417auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
3419 assert(Val0->getType() == Val1->getType());
3420 int Len = length(Val0);
3421 SmallVector<int, 128> Mask(2 * Len);
3422
3423 for (int i = 0; i != Len; ++i) {
3424 Mask[i] = 2 * i;
3425 Mask[i + Len] = 2 * i + 1;
3426 }
3428}
3429
3430auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
3432 assert(Val0->getType() == Val1->getType());
3433 int Len = length(Val0);
3434 SmallVector<int, 128> Mask(2 * Len);
3435
3436 for (int i = 0; i != Len; ++i) {
3437 Mask[2 * i + 0] = i;
3438 Mask[2 * i + 1] = i + Len;
3439 }
3441}
3442
3443auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3449 auto getCast = [&](IRBuilderBase &Builder, Value *Val,
3451 Type *SrcTy = Val->getType();
3452 if (SrcTy == DestTy)
3453 return Val;
3454
3455
3456
3457 assert(HST.isTypeForHVX(SrcTy, true));
3458
3459 Type *BoolTy = Type::getInt1Ty(F.getContext());
3461 return Builder.CreateBitCast(Val, DestTy, "cst");
3462
3463
3464 unsigned HwLen = HST.getVectorLength();
3465 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3466 : Intrinsic::hexagon_V6_pred_typecast_128B;
3467 return Builder.CreateIntrinsic(TC, {DestTy, Val->getType()}, {Val},
3468 nullptr, "cup");
3469 };
3470
3474
3476 for (int i = 0, e = Args.size(); i != e; ++i) {
3478 Type *T = IntrTy->getParamType(i);
3480 IntrArgs.push_back(getCast(Builder, A, T));
3481 } else {
3483 }
3484 }
3485 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";
3486 CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);
3487
3491
3493 if (RetTy == nullptr || CallTy == RetTy)
3494 return Call;
3495
3496 assert(HST.isTypeForHVX(CallTy, true));
3497 return getCast(Builder, Call, RetTy);
3498}
3499
3500auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3502 unsigned ToWidth) const
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3517 assert(VecTy->getElementType()->isIntegerTy());
3518 unsigned FromWidth = VecTy->getScalarSizeInBits();
3520 assert(ToWidth <= FromWidth && "Breaking up into wider elements?");
3521 unsigned NumResults = FromWidth / ToWidth;
3522
3525 unsigned Length = length(VecTy);
3526
3527
3528
3529 auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {
3530
3531
3532
3533 if (Begin + 1 == End)
3534 return;
3535
3538
3539 auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
3541
3542 Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3543
3544 unsigned Half = (Begin + End) / 2;
3545 Results[Begin] = sublo(Builder, Res);
3546 Results[Half] = subhi(Builder, Res);
3547
3548 splitFunc(Begin, Half, splitFunc);
3549 splitFunc(Half, End, splitFunc);
3550 };
3551
3552 splitInHalf(0, NumResults, splitInHalf);
3554}
3555
3556auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3558 VectorType *ToType) const
3560 assert(ToType->getElementType()->isIntegerTy());
3561
3562
3563
3564
3565
3566
3567
3568
3570
3571 unsigned ToWidth = ToType->getScalarSizeInBits();
3572 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3573 assert(Width <= ToWidth);
3575 unsigned Length = length(Inputs.front()->getType());
3576
3577 unsigned NeedInputs = ToWidth / Width;
3578 if (Inputs.size() != NeedInputs) {
3579
3580
3583 Last, getConstSplat(Last->getType(), Width - 1), "asr");
3584 Inputs.resize(NeedInputs, Sign);
3585 }
3586
3587 while (Inputs.size() > 1) {
3588 Width *= 2;
3589 auto *VTy = VectorType::get(getIntTy(Width), Length, false);
3590 for (int i = 0, e = Inputs.size(); i < e; i += 2) {
3591 Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
3592 Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");
3593 }
3594 Inputs.resize(Inputs.size() / 2);
3595 }
3596
3597 assert(Inputs.front()->getType() == ToType);
3598 return Inputs.front();
3599}
3600
3601auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
3602 Value *Ptr1) const
3603 -> std::optional {
3604
3605 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3606 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3607 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3609 APInt V = Const->getAPInt();
3610 if (V.isSignedIntN(8 * sizeof(int)))
3611 return static_cast<int>(V.getSExtValue());
3612 }
3613
3616 ~Builder() {
3618 I->eraseFromParent();
3619 }
3620 SmallVector<Instruction *, 8> ToErase;
3621 };
3622
3623#define CallBuilder(B, F) \
3624 [&](auto &B_) { \
3625 Value *V = B_.F; \
3626 if (auto *I = dyn_cast(V)) \
3627 B_.ToErase.push_back(I); \
3628 return V; \
3629 }(B)
3630
3631 auto Simplify = [this](Value *V) {
3633 return S;
3634 return V;
3635 };
3636
3637 auto StripBitCast = [](Value *V) {
3640 return V;
3641 };
3642
3643 Ptr0 = StripBitCast(Ptr0);
3644 Ptr1 = StripBitCast(Ptr1);
3646 return std::nullopt;
3647
3650 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3651 return std::nullopt;
3652 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3653 return std::nullopt;
3654
3655 Builder B(Gep0->getParent());
3656 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
3657
3658
3659 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3660 return std::nullopt;
3661
3662 Value *Idx0 = Gep0->getOperand(1);
3663 Value *Idx1 = Gep1->getOperand(1);
3664
3665
3667 Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
3668 return Diff->getSExtValue() * Scale;
3669
3670 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3671 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3673 if (Unknown.isAllOnes())
3674 return std::nullopt;
3675
3677 Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
3678 Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
3679 Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
3680 int Diff0 = 0;
3682 Diff0 = C->getSExtValue();
3683 } else {
3684 return std::nullopt;
3685 }
3686
3688 Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
3689 Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
3690 Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
3691 int Diff1 = 0;
3693 Diff1 = C->getSExtValue();
3694 } else {
3695 return std::nullopt;
3696 }
3697
3698 return (Diff0 + Diff1) * Scale;
3699
3700#undef CallBuilder
3701}
3702
3703auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
3704 const Instruction *CtxI) const
3705 -> unsigned {
3707}
3708
3709auto HexagonVectorCombine::getKnownBits(const Value *V,
3710 const Instruction *CtxI) const
3711 -> KnownBits {
3713}
3714
3715auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {
3716 if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||
3717 In.isFenceLike() || In.mayReadOrWriteMemory()) {
3718 return false;
3719 }
3721 return false;
3722 return true;
3723}
3724
3725template
3726auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
3728 const T &IgnoreInsts) const
3729 -> bool {
3730 auto getLocOrNone =
3731 [this](const Instruction &I) -> std::optional {
3733 switch (II->getIntrinsicID()) {
3734 case Intrinsic::masked_load:
3736 case Intrinsic::masked_store:
3738 }
3739 }
3741 };
3742
3743
3746
3748 return false;
3749
3751 return true;
3752 bool MayWrite = In.mayWriteToMemory();
3753 auto MaybeLoc = getLocOrNone(In);
3754
3755 auto From = In.getIterator();
3756 if (From == To)
3757 return true;
3758 bool MoveUp = (To != Block.end() && To->comesBefore(&In));
3760 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
3761 for (auto It = Range.first; It != Range.second; ++It) {
3762 const Instruction &I = *It;
3763 if (llvm::is_contained(IgnoreInsts, &I))
3764 continue;
3765
3766 if (auto *II = dyn_cast(&I)) {
3767 if (II->getIntrinsicID() == Intrinsic::assume)
3768 continue;
3769 }
3770
3772 return false;
3774 if (!CB->hasFnAttr(Attribute::WillReturn))
3775 return false;
3776 if (!CB->hasFnAttr(Attribute::NoSync))
3777 return false;
3778 }
3779 if (I.mayReadOrWriteMemory()) {
3780 auto MaybeLocI = getLocOrNone(I);
3781 if (MayWrite || I.mayWriteToMemory()) {
3782 if (!MaybeLoc || !MaybeLocI)
3783 return false;
3784 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
3785 return false;
3786 }
3787 }
3788 }
3789 return true;
3790}
3791
3792auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
3794 return VecTy->getElementType() == getByteTy();
3795 return false;
3796}
3797
3798auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
3801 assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
3802 SmallVector<int, 128> SMask(Length);
3803 std::iota(SMask.begin(), SMask.end(), Start);
3805}
3806
3807
3808
3809namespace {
3810class HexagonVectorCombineLegacy : public FunctionPass {
3811public:
3812 static char ID;
3813
3814 HexagonVectorCombineLegacy() : FunctionPass(ID) {}
3815
3816 StringRef getPassName() const override { return "Hexagon Vector Combine"; }
3817
3818 void getAnalysisUsage(AnalysisUsage &AU) const override {
3821 AU.addRequired();
3822 AU.addRequired();
3823 AU.addRequired();
3824 AU.addRequired();
3826 FunctionPass::getAnalysisUsage(AU);
3827 }
3828
3830 if (skipFunction(F))
3831 return false;
3832 AliasAnalysis &AA = getAnalysis().getAAResults();
3833 AssumptionCache &AC =
3834 getAnalysis().getAssumptionCache(F);
3835 DominatorTree &DT = getAnalysis().getDomTree();
3836 ScalarEvolution &SE = getAnalysis().getSE();
3837 TargetLibraryInfo &TLI =
3838 getAnalysis().getTLI(F);
3839 auto &TM = getAnalysis().getTM();
3840 HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);
3841 return HVC.run();
3842 }
3843};
3844}
3845
3846char HexagonVectorCombineLegacy::ID = 0;
3847
3849 "Hexagon Vector Combine", false, false)
3858
3860 return new HexagonVectorCombineLegacy();
3861}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
shuff Hexagon Optimize Shuffle Vector
static Value * locateIndexesFromIntrinsic(Instruction *In)
Definition HexagonVectorCombine.cpp:2051
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Definition HexagonVectorCombine.cpp:1922
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
Definition HexagonVectorCombine.cpp:2083
static Value * locateIndexesFromGEP(Value *In)
Definition HexagonVectorCombine.cpp:2023
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
Definition HexagonVectorCombine.cpp:1856
#define DEFAULT_HVX_VTCM_PAGE_SIZE
Definition HexagonVectorCombine.cpp:63
static Value * locateAddressFromIntrinsic(Instruction *In)
Definition HexagonVectorCombine.cpp:1961
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Definition HexagonVectorCombine.cpp:1869
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
Definition HexagonVectorCombine.cpp:2098
bool isArithmetic(unsigned Opc)
Definition HexagonVectorCombine.cpp:1838
static Type * getIndexType(Value *In)
Definition HexagonVectorCombine.cpp:2007
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Definition HexagonVectorCombine.cpp:1944
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
Definition HexagonVectorCombine.cpp:2068
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
Definition HexagonVectorCombine.cpp:3859
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.