LLVM: lib/Target/Hexagon/HexagonVectorCombine.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
34#include "llvm/IR/IntrinsicsHexagon.h"
45
49
50#include
51#include
52#include
53#include
54#include
55#include
56#include
57
58#define DEBUG_TYPE "hexagon-vc"
59
60
61
62
63#define DEFAULT_HVX_VTCM_PAGE_SIZE 131072
64
65using namespace llvm;
66
67namespace {
72
77
78class HexagonVectorCombine {
79public:
83 : F(F_), DL(F.getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
84 SE(SE_), TLI(TLI_),
86
87 bool run();
88
89
91
92
93 Type *getByteTy(int ElemCount = 0) const;
94
95
96 Type *getBoolTy(int ElemCount = 0) const;
97
99
100 std::optional getIntValue(const Value *Val) const;
101
103
105
106 bool isTrue(const Value *Val) const;
107
108 bool isFalse(const Value *Val) const;
109
110
111 VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
112
113 enum SizeKind {
114 Store,
115 Alloc,
116 };
117 int getSizeOf(const Value *Val, SizeKind Kind = Store) const;
118 int getSizeOf(const Type *Ty, SizeKind Kind = Store) const;
119 int getTypeAlignment(Type *Ty) const;
120 size_t length(Value *Val) const;
121 size_t length(Type *Ty) const;
122
124
126 int Length, int Where) const;
128 Value *Amt) const;
130 Value *Amt) const;
133 Value *Pad) const;
135 Type *ToTy) const;
139 unsigned Length) const;
144
150 unsigned ToWidth) const;
153
154 std::optional calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
155
156 unsigned getNumSignificantBits(const Value *V,
157 const Instruction *CtxI = nullptr) const;
159 const Instruction *CtxI = nullptr) const;
160
161 bool isSafeToClone(const Instruction &In) const;
162
163 template <typename T = std::vector<Instruction *>>
164 bool isSafeToMoveBeforeInBB(const Instruction &In,
166 const T &IgnoreInsts = {}) const;
167
168
169 [[maybe_unused]] bool isByteVecTy(Type *Ty) const;
170
179
180private:
182 int Start, int Length) const;
183};
184
185class AlignVectors {
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200public:
201 AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
202
203 bool run();
204
205private:
206 using InstList = std::vector<Instruction *>;
208
209 struct AddrInfo {
210 AddrInfo(const AddrInfo &) = default;
213 : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
214 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
215 AddrInfo &operator=(const AddrInfo &) = default;
216
217
223 int Offset = 0;
224
225 };
226 using AddrList = std::vector;
227
228 struct InstrLess {
231 }
232 };
233 using DepList = std::set<Instruction *, InstrLess>;
234
235 struct MoveGroup {
236 MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
237 : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
238 MoveGroup() = default;
239 Instruction *Base;
240 InstList Main;
241 InstList Deps;
242 InstMap Clones;
243 bool IsHvx;
244 bool IsLoad;
245 };
246 using MoveList = std::vector;
247
248 struct ByteSpan {
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264 struct Segment {
265
266 Segment(Value *Val, int Begin, int Len)
267 : Val(Val), Start(Begin), Size(Len) {}
268 Segment(const Segment &Seg) = default;
269 Segment &operator=(const Segment &Seg) = default;
270 Value *Val;
271 int Start;
272 int Size;
273 };
274
276 Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
277 Block(Value *Val, int Off, int Len, int Pos)
278 : Seg(Val, Off, Len), Pos(Pos) {}
280 Block &operator=(const Block &Blk) = default;
281 Segment Seg;
282 int Pos;
283 };
284
285 int extent() const;
286 ByteSpan section(int Start, int Length) const;
287 ByteSpan &shift(int Offset);
289
290 int size() const { return Blocks.size(); }
291 Block &operator[](int i) { return Blocks[i]; }
292 const Block &operator[](int i) const { return Blocks[i]; }
293
294 std::vector Blocks;
295
297 iterator begin() { return Blocks.begin(); }
298 iterator end() { return Blocks.end(); }
300 const_iterator begin() const { return Blocks.begin(); }
302 };
303
304 std::optional getAddrInfo(Instruction &In) const;
305 bool isHvx(const AddrInfo &AI) const;
306
307 [[maybe_unused]] bool isSectorTy(Type *Ty) const;
308
311 Value *getPassThrough(Value *Val) const;
312
314 int Adjust,
315 const InstMap &CloneMap = InstMap()) const;
317 int Alignment,
318 const InstMap &CloneMap = InstMap()) const;
319
324 int Alignment,
326
331 int Alignment,
333
340
342 bool createAddressGroups();
343 MoveList createLoadGroups(const AddrList &Group) const;
344 MoveList createStoreGroups(const AddrList &Group) const;
345 bool moveTogether(MoveGroup &Move) const;
346 template
348
349 void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
350 int ScLen, Value *AlignVal, Value *AlignAddr) const;
351 void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
352 int ScLen, Value *AlignVal, Value *AlignAddr) const;
353 bool realignGroup(const MoveGroup &Move) const;
354
356 int Alignment) const;
357
362
363 std::map<Instruction *, AddrList> AddrGroups;
364 const HexagonVectorCombine &HVC;
365};
366
368 const AlignVectors::AddrInfo &AI) {
369 OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
370 OS << "Addr: " << *AI.Addr << '\n';
371 OS << "Type: " << *AI.ValTy << '\n';
372 OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
373 OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
374 OS << "Offset: " << AI.Offset;
375 return OS;
376}
377
379 const AlignVectors::MoveGroup &MG) {
380 OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
381 OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
382 OS << "Main\n";
384 OS << " " << *I << '\n';
385 OS << "Deps\n";
387 OS << " " << *I << '\n';
388 OS << "Clones\n";
389 for (auto [K, V] : MG.Clones) {
390 OS << " ";
391 K->printAsOperand(OS, false);
392 OS << "\t-> " << *V << '\n';
393 }
394 return OS;
395}
396
399 OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
400 if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {
401 OS << "(self:" << B.Seg.Val << ')';
402 } else if (B.Seg.Val != nullptr) {
403 OS << *B.Seg.Val;
404 } else {
405 OS << "(null)";
406 }
407 return OS;
408}
409
411 const AlignVectors::ByteSpan &BS) {
412 OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
413 for (const AlignVectors::ByteSpan::Block &B : BS)
414 OS << B << '\n';
415 OS << ']';
416 return OS;
417}
418
419class HvxIdioms {
420public:
421 enum DstQualifier {
422 Undefined = 0,
423 Arithmetic,
424 LdSt,
425 LLVM_Gather,
426 LLVM_Scatter,
427 HEX_Gather_Scatter,
428 HEX_Gather,
429 HEX_Scatter,
431 };
432
433 HvxIdioms(const HexagonVectorCombine &HVC_) : HVC(HVC_) {
434 auto *Int32Ty = HVC.getIntTy(32);
435 HvxI32Ty = HVC.getHvxTy(Int32Ty, false);
436 HvxP32Ty = HVC.getHvxTy(Int32Ty, true);
437 }
438
439 bool run();
440
441private:
443
444
445
446
447 struct SValue {
449 Signedness Sgn;
450 };
451
452 struct FxpOp {
453 unsigned Opcode;
454 unsigned Frac;
456
457 std::optional RoundAt;
459 };
460
462 -> std::pair<unsigned, Signedness>;
463 auto canonSgn(SValue X, SValue Y) const -> std::pair<SValue, SValue>;
464
465 auto matchFxpMul(Instruction &In) const -> std::optional;
466 auto processFxpMul(Instruction &In, const FxpOp &Op) const -> Value *;
467
469 const FxpOp &Op) const -> Value *;
470 auto createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
471 bool Rounding) const -> Value *;
472 auto createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
473 bool Rounding) const -> Value *;
474
476 Value *CarryIn = nullptr) const
477 -> std::pair<Value *, Value *>;
478 auto createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const -> Value *;
479 auto createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
481 auto createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
482 -> std::pair<Value *, Value *>;
488
489 bool matchScatter(Instruction &In) const;
493
496 const HexagonVectorCombine &HVC;
497
499};
500
502 const HvxIdioms::FxpOp &Op) {
503 static const char *SgnNames[] = {"Positive", "Signed", "Unsigned"};
505 if (Op.RoundAt.has_value()) {
506 if (Op.Frac != 0 && *Op.RoundAt == Op.Frac - 1) {
507 OS << ":rnd";
508 } else {
509 OS << " + 1<<" << *Op.RoundAt;
510 }
511 }
512 OS << "\n X:(" << SgnNames[Op.X.Sgn] << ") " << *Op.X.Val << "\n"
513 << " Y:(" << SgnNames[Op.Y.Sgn] << ") " << *Op.Y.Val;
514 return OS;
515}
516
517}
518
519namespace {
520
521template T *getIfUnordered(T *MaybeT) {
522 return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
523}
526}
529}
532}
533
534#if !defined(_MSC_VER) || _MSC_VER >= 1926
535
536
537
538template <typename Pred, typename... Ts>
539void erase_if(std::map<Ts...> &map, Pred p)
540#else
541template <typename Pred, typename T, typename U>
542void erase_if(std::map<T, U> &map, Pred p)
543#endif
544{
545 for (auto i = map.begin(), e = map.end(); i != e;) {
546 if (p(*i))
547 i = map.erase(i);
548 else
549 i = std::next(i);
550 }
551}
552
553
554template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
556}
557
558}
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592auto AlignVectors::ByteSpan::extent() const -> int {
593 if (size() == 0)
594 return 0;
595 int Min = Blocks[0].Pos;
596 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
597 for (int i = 1, e = size(); i != e; ++i) {
598 Min = std::min(Min, Blocks[i].Pos);
599 Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
600 }
601 return Max - Min;
602}
603
604auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
606 for (const ByteSpan::Block &B : Blocks) {
607 int L = std::max(B.Pos, Start);
608 int R = std::min(B.Pos + B.Seg.Size, Start + Length);
609 if (L < R) {
610
611 int Off = L > B.Pos ? L - B.Pos : 0;
612 Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
613 }
614 }
616}
617
618auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
621 return *this;
622}
623
625 SmallVector<Value *, 8> Values(Blocks.size());
626 for (int i = 0, e = Blocks.size(); i != e; ++i)
627 Values[i] = Blocks[i].Seg.Val;
628 return Values;
629}
630
631auto AlignVectors::getAddrInfo(Instruction &In) const
632 -> std::optional {
634 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
635 L->getAlign());
637 return AddrInfo(HVC, S, S->getPointerOperand(),
638 S->getValueOperand()->getType(), S->getAlign());
641 switch (ID) {
642 case Intrinsic::masked_load:
643 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
644 II->getParamAlign(0).valueOrOne());
645 case Intrinsic::masked_store:
646 return AddrInfo(HVC, II, II->getArgOperand(1),
647 II->getArgOperand(0)->getType(),
648 II->getParamAlign(1).valueOrOne());
649 }
650 }
651 return std::nullopt;
652}
653
654auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
656}
657
658auto AlignVectors::getPayload(Value *Val) const -> Value * {
662 ID = II->getIntrinsicID();
664 return In->getOperand(0);
665 }
666 return Val;
667}
668
669auto AlignVectors::getMask(Value *Val) const -> Value * {
671 switch (II->getIntrinsicID()) {
672 case Intrinsic::masked_load:
673 return II->getArgOperand(1);
674 case Intrinsic::masked_store:
675 return II->getArgOperand(2);
676 }
677 }
678
679 Type *ValTy = getPayload(Val)->getType();
683}
684
685auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
687 if (II->getIntrinsicID() == Intrinsic::masked_load)
688 return II->getArgOperand(2);
689 }
691}
692
693auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
694 Type *ValTy, int Adjust,
695 const InstMap &CloneMap) const
698 if (Instruction *New = CloneMap.lookup(I))
699 Ptr = New;
700 return Builder.CreatePtrAdd(Ptr, HVC.getConstInt(Adjust), "gep");
701}
702
703auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
704 Type *ValTy, int Alignment,
705 const InstMap &CloneMap) const
709 for (auto [Old, New] : CloneMap)
710 I->replaceUsesOfWith(Old, New);
711 return I;
712 }
713 return V;
714 };
715 Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");
716 Value *Mask = HVC.getConstInt(-Alignment);
717 Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");
718 return Builder.CreateIntToPtr(
719 And, PointerType::getUnqual(ValTy->getContext()), "itp");
720}
721
722auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
723 Value *Predicate, int Alignment, Value *Mask,
726 bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
727
728 if (Predicate) {
730 "Expectning scalar predicate");
731 if (HVC.isFalse(Predicate))
733 if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
734 Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
735 Alignment, MDSources);
736 return Builder.CreateSelect(Mask, Load, PassThru);
737 }
738
739 }
740 assert(!HVC.isUndef(Mask));
741 if (HVC.isZero(Mask))
742 return PassThru;
743 if (HVC.isTrue(Mask))
744 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
745
747 Mask, PassThru, "mld");
750}
751
752auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,
753 Value *Ptr, int Alignment,
757 Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
760}
761
762auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,
764 int Alignment,
768 "Predicates 'scalar' vector loads not yet supported");
770 assert(->getType()->isVectorTy() && "Expectning scalar predicate");
771 assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
772 if (HVC.isFalse(Predicate))
774 if (HVC.isTrue(Predicate))
775 return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
776
777 auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
778
779 return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
780 {Predicate, Ptr, HVC.getConstInt(0)}, {},
781 MDSources);
782}
783
784auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
785 Value *Predicate, int Alignment, Value *Mask,
787 if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
789 assert(!Predicate || (->getType()->isVectorTy() &&
790 "Expectning scalar predicate"));
791 if (Predicate) {
792 if (HVC.isFalse(Predicate))
794 if (HVC.isTrue(Predicate))
796 }
797
798
799 if (HVC.isTrue(Mask)) {
800 if (Predicate) {
801 return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
802 MDSources);
803 }
804
805 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
806 }
807
808
809 if (!Predicate) {
811 Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
814 }
815
816
817
818 Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
819 Predicate, Alignment, MDSources);
820 Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
821 return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
822 MDSources);
823}
824
825auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,
826 Value *Ptr, int Alignment,
832}
833
834auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,
836 int Alignment,
840 "Predicates 'scalar' vector stores not yet supported");
842 if (HVC.isFalse(Predicate))
844 if (HVC.isTrue(Predicate))
845 return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
846
847 assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
848 auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
849
850 return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,
851 {Predicate, Ptr, HVC.getConstInt(0), Val}, {},
852 MDSources);
853}
854
855auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
856 -> DepList {
858 assert(In->getParent() == Parent &&
859 "Base and In should be in the same block");
860 assert(Base->comesBefore(In) && "Base should come before In");
861
862 DepList Deps;
863 std::deque<Instruction *> WorkQ = {In};
864 while (!WorkQ.empty()) {
866 WorkQ.pop_front();
867 if (D != In)
868 Deps.insert(D);
869 for (Value *Op : D->operands()) {
871 if (I->getParent() == Parent && Base->comesBefore(I))
872 WorkQ.push_back(I);
873 }
874 }
875 }
876 return Deps;
877}
878
879auto AlignVectors::createAddressGroups() -> bool {
880
881
882 AddrList WorkStack;
883
884 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
885 for (AddrInfo &W : WorkStack) {
886 if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
887 return std::make_pair(W.Inst, *D);
888 }
889 return std::make_pair(nullptr, 0);
890 };
891
892 auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
894 for (Instruction &I : Block) {
895 auto AI = this->getAddrInfo(I);
896 if (!AI)
897 continue;
898 auto F = findBaseAndOffset(*AI);
900 if (Instruction *BI = F.first) {
901 AI->Offset = F.second;
902 GroupInst = BI;
903 } else {
904 WorkStack.push_back(*AI);
905 GroupInst = AI->Inst;
906 }
907 AddrGroups[GroupInst].push_back(*AI);
908 }
909
911 Visit(C, Visit);
912
913 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
914 WorkStack.pop_back();
915 };
916
917 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
918 assert(WorkStack.empty());
919
920
921
922
923 erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
924
925 erase_if(AddrGroups, [&](auto &G) {
927 G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
928 });
929
930 return !AddrGroups.empty();
931}
932
933auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
934
935
936
937 unsigned SizeLimit = VAGroupSizeLimit;
939 return {};
940
941 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
942 assert(!Move.Main.empty() && "Move group should have non-empty Main");
943 if (Move.Main.size() >= SizeLimit)
944 return false;
945
946 if (Move.IsHvx != isHvx(Info))
947 return false;
948
950 if (Base->getParent() != Info.Inst->getParent())
951 return false;
952
953 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))
954 return false;
955
956 auto isSafeToCopyAtBase = [&](const Instruction *I) {
957 return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&
958 HVC.isSafeToClone(*I);
959 };
960 DepList Deps = getUpwardDeps(Info.Inst, Base);
962 return false;
963
964 Move.Main.push_back(Info.Inst);
966 return true;
967 };
968
969 MoveList LoadGroups;
970
971 for (const AddrInfo &Info : Group) {
972 if (.Inst->mayReadFromMemory())
973 continue;
974 if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
975 LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
976 }
977
978
979 erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
980
981
983 erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });
984
985 return LoadGroups;
986}
987
988auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
989
990
991
992 unsigned SizeLimit = VAGroupSizeLimit;
994 return {};
995
996 auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
997 assert(!Move.Main.empty() && "Move group should have non-empty Main");
998 if (Move.Main.size() >= SizeLimit)
999 return false;
1000
1001
1002 assert(Info.Inst->getType()->isVoidTy() &&
1003 "Not handling stores with return values");
1004
1005 if (Move.IsHvx != isHvx(Info))
1006 return false;
1007
1008
1009
1011 if (Base->getParent() != Info.Inst->getParent())
1012 return false;
1013 if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
1014 return false;
1015 Move.Main.push_back(Info.Inst);
1016 return true;
1017 };
1018
1019 MoveList StoreGroups;
1020
1021 for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
1022 const AddrInfo &Info = *I;
1023 if (.Inst->mayWriteToMemory())
1024 continue;
1025 if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
1026 StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
1027 }
1028
1029
1030 erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
1031
1032
1034 erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });
1035
1036
1037
1038
1039 if (!VADoFullStores) {
1040 erase_if(StoreGroups, [this](const MoveGroup &G) {
1041 return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {
1042 auto MaybeInfo = this->getAddrInfo(*S);
1043 assert(MaybeInfo.has_value());
1044 return HVC.HST.isHVXVectorType(
1045 EVT::getEVT(MaybeInfo->ValTy, false));
1046 });
1047 });
1048 }
1049
1050 return StoreGroups;
1051}
1052
1053auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {
1054
1055 assert(!Move.Main.empty() && "Move group should have non-empty Main");
1057
1058 if (Move.IsLoad) {
1059
1060
1061 Move.Clones = cloneBefore(Where->getIterator(), Move.Deps);
1062
1064 for (Instruction *M : Main) {
1065 if (M != Where)
1066 M->moveAfter(Where);
1067 for (auto [Old, New] : Move.Clones)
1068 M->replaceUsesOfWith(Old, New);
1069 Where = M;
1070 }
1071
1072 for (int i = 0, e = Move.Deps.size(); i != e; ++i)
1073 Move.Deps[i] = Move.Clones[Move.Deps[i]];
1074 } else {
1075
1076
1077
1078 assert(Move.Deps.empty());
1079
1081 for (Instruction *M : Main.drop_front(1)) {
1083 Where = M;
1084 }
1085 }
1086
1087 return Move.Main.size() + Move.Deps.size() > 1;
1088}
1089
1090template
1092 -> InstMap {
1093 InstMap Map;
1094
1095 for (Instruction *I : Insts) {
1096 assert(HVC.isSafeToClone(*I));
1098 C->setName(Twine("c.") + I->getName() + ".");
1099 C->insertBefore(To);
1100
1101 for (auto [Old, New] : Map)
1102 C->replaceUsesOfWith(Old, New);
1103 Map.insert(std::make_pair(I, C));
1104 }
1105 return Map;
1106}
1107
1108auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
1109 const ByteSpan &VSpan, int ScLen,
1110 Value *AlignVal, Value *AlignAddr) const
1111 -> void {
1113
1114 Type *SecTy = HVC.getByteTy(ScLen);
1115 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1116 bool DoAlign = !HVC.isZero(AlignVal);
1118 BasicBlock *BaseBlock = Builder.GetInsertBlock();
1119
1120 ByteSpan ASpan;
1123
1124
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148 for (int Index = 0; Index != NumSectors; ++Index)
1149 ASpan.Blocks.emplace_back(nullptr, ScLen, Index * ScLen);
1150 for (int Index = 0; Index != NumSectors; ++Index) {
1151 ASpan.Blocks[Index].Seg.Val =
1152 reinterpret_cast<Value *>(&ASpan.Blocks[Index]);
1153 }
1154
1155
1156
1157
1158 DenseMap<void *, Instruction *> EarliestUser;
1160 if (B == nullptr)
1161 return true;
1162 if (A == nullptr)
1163 return false;
1164 assert(A->getParent() == B->getParent());
1165 return A->comesBefore(B);
1166 };
1167 auto earliestUser = [&](const auto &Uses) {
1169 for (const Use &U : Uses) {
1171 assert(I != nullptr && "Load used in a non-instruction?");
1172
1173
1174
1175 if (I->getParent() == BaseBlock) {
1177 User = std::min(User, I, isEarlier);
1178 } else {
1180 }
1181 }
1182 return User;
1183 };
1184
1185 for (const ByteSpan::Block &B : VSpan) {
1186 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size);
1187 for (const ByteSpan::Block &S : ASection) {
1188 auto &EU = EarliestUser[S.Seg.Val];
1189 EU = std::min(EU, earliestUser(B.Seg.Val->uses()), isEarlier);
1190 }
1191 }
1192
1194 dbgs() << "ASpan:\n" << ASpan << '\n';
1195 dbgs() << "Earliest users of ASpan:\n";
1196 for (auto &[Val, User] : EarliestUser) {
1197 dbgs() << Val << "\n ->" << *User << '\n';
1198 }
1199 });
1200
1201 auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
1202 int Index, bool MakePred) {
1204 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1206 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1207
1208
1209
1210 int Start = (Index - DoAlign) * ScLen;
1211 int Width = (1 + DoAlign) * ScLen;
1212 return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
1213 VSpan.section(Start, Width).values());
1214 };
1215
1217
1218 assert(In->getParent() == To->getParent());
1219 DepList Deps = getUpwardDeps(&*In, &*To);
1220 In->moveBefore(To);
1221
1222 InstMap Map = cloneBefore(In, Deps);
1223 for (auto [Old, New] : Map)
1224 In->replaceUsesOfWith(Old, New);
1225 };
1226
1227
1228 LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");
1229 for (int Index = 0; Index != NumSectors + 1; ++Index) {
1230
1231
1232
1233
1234
1235
1237 DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;
1239 Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
1240 if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
1243 createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
1244
1245
1246
1247
1248
1249
1251 if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
1252 moveBefore(Load->getIterator(), BasePos);
1253 }
1254 LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');
1255 }
1256 }
1257
1258
1259 LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");
1260 for (int Index = 0; Index != NumSectors; ++Index) {
1261 ASpan[Index].Seg.Val = nullptr;
1262 if (auto *Where = EarliestUser[&ASpan[Index]]) {
1265 assert(Val != nullptr);
1266 if (DoAlign) {
1268 assert(NextLoad != nullptr);
1269 Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
1270 }
1271 ASpan[Index].Seg.Val = Val;
1272 LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');
1273 }
1274 }
1275
1276 for (const ByteSpan::Block &B : VSpan) {
1277 ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
1280
1281
1282
1283
1284 std::vector<ByteSpan::Block *> ABlocks;
1285 for (ByteSpan::Block &S : ASection) {
1286 if (S.Seg.Val != nullptr)
1287 ABlocks.push_back(&S);
1288 }
1290 [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {
1293 });
1294 for (ByteSpan::Block *S : ABlocks) {
1295
1296
1299 Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
1300 Accum =
1301 HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
1302 }
1303
1304
1305
1306
1307
1308
1309 Type *ValTy = getPayload(B.Seg.Val)->getType();
1312 getPassThrough(B.Seg.Val), "sel");
1314 }
1315}
1316
1317auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
1318 const ByteSpan &VSpan, int ScLen,
1319 Value *AlignVal, Value *AlignAddr) const
1320 -> void {
1322
1323 Type *SecTy = HVC.getByteTy(ScLen);
1324 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
1325 bool DoAlign = !HVC.isZero(AlignVal);
1326
1327
1328 ByteSpan ASpanV, ASpanM;
1329
1330
1331
1332 auto MakeVec = [](IRBuilderBase &Builder, Value *Val) -> Value * {
1335 return Val;
1336 auto *VecTy = VectorType::get(Ty, 1, false);
1337 return Builder.CreateBitCast(Val, VecTy, "cst");
1338 };
1339
1340
1341
1342 for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {
1343
1344
1345 ByteSpan VSection =
1346 VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
1351 for (ByteSpan::Block &S : VSection) {
1352 Value *Pay = getPayload(S.Seg.Val);
1353 Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
1354 Pay->getType(), HVC.getByteTy());
1355 Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
1356 S.Seg.Start, S.Seg.Size, S.Pos);
1357 AccumM = Builder.CreateOr(AccumM, PartM);
1358
1359 Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
1360 S.Seg.Start, S.Seg.Size, S.Pos);
1361
1364 }
1365 ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
1366 ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
1367 }
1368
1370 dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';
1371 dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';
1372 });
1373
1374
1375 if (DoAlign) {
1376 for (int Index = 1; Index != NumSectors + 2; ++Index) {
1377 Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;
1378 Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;
1380 ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
1381 ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
1382 }
1383 }
1384
1386 dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';
1387 dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';
1388 });
1389
1390 auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,
1391 const ByteSpan &ASpanM, int Index, bool MakePred) {
1394 if (HVC.isUndef(Val) || HVC.isZero(Mask))
1395 return;
1397 createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
1399 MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
1400
1401
1402
1403 int Start = (Index - DoAlign) * ScLen;
1404 int Width = (1 + DoAlign) * ScLen;
1405 this->createStore(Builder, Val, Ptr, Predicate, ScLen,
1406 HVC.vlsb(Builder, Mask),
1407 VSpan.section(Start, Width).values());
1408 };
1409
1410 for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {
1411 createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
1412 }
1413}
1414
1415auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
1416 LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');
1417
1418
1419 if (!Move.IsHvx)
1420 return false;
1421
1422
1423
1424 auto getMaxOf = [](auto Range, auto GetValue) {
1426 return GetValue(A) < GetValue(B);
1427 });
1428 };
1429
1430 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
1446 AddrList MoveInfos;
1448 BaseInfos, std::back_inserter(MoveInfos),
1449 [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
1450
1451
1452 const AddrInfo &WithMaxAlign =
1453 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
1454 Align MaxGiven = WithMaxAlign.HaveAlign;
1455
1456
1457 const AddrInfo &WithMinOffset =
1458 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
1459
1460 const AddrInfo &WithMaxNeeded =
1461 getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
1462 Align MinNeeded = WithMaxNeeded.NeedAlign;
1463
1464
1465
1466
1467 Instruction *InsertAt = Move.Main.front();
1468 if (!Move.IsLoad) {
1469
1471 InsertAt = &*std::next(InsertAt->getIterator());
1472 }
1473
1475 InstSimplifyFolder(HVC.DL));
1476 Value *AlignAddr = nullptr;
1477 Value *AlignVal = nullptr;
1478
1479 if (MinNeeded <= MaxGiven) {
1480 int Start = WithMinOffset.Offset;
1481 int OffAtMax = WithMaxAlign.Offset;
1482
1483
1484
1485
1486
1487
1488 int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
1489 AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
1490 WithMaxAlign.ValTy, Adjust, Move.Clones);
1491 int Diff = Start - (OffAtMax + Adjust);
1492 AlignVal = HVC.getConstInt(Diff);
1494 assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
1495 } else {
1496
1497
1498
1499
1500
1501
1502
1503 AlignAddr =
1504 createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
1505 MinNeeded.value(), Move.Clones);
1506 AlignVal =
1507 Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");
1509 for (auto [Old, New] : Move.Clones)
1510 I->replaceUsesOfWith(Old, New);
1511 }
1512 }
1513
1514 ByteSpan VSpan;
1515 for (const AddrInfo &AI : MoveInfos) {
1516 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
1517 AI.Offset - WithMinOffset.Offset);
1518 }
1519
1520
1521
1522
1524 : std::max(MinNeeded.value(), 4);
1525 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
1526 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
1527
1529 dbgs() << "ScLen: " << ScLen << "\n";
1530 dbgs() << "AlignVal:" << *AlignVal << "\n";
1531 dbgs() << "AlignAddr:" << *AlignAddr << "\n";
1532 dbgs() << "VSpan:\n" << VSpan << '\n';
1533 });
1534
1535 if (Move.IsLoad)
1536 realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1537 else
1538 realignStoreGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
1539
1540 for (auto *Inst : Move.Main)
1541 Inst->eraseFromParent();
1542
1543 return true;
1544}
1545
1546auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
1547 int Alignment) const -> Value * {
1548 auto *AlignTy = AlignVal->getType();
1550 AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");
1551 Value *Zero = ConstantInt::get(AlignTy, 0);
1553}
1554
1555auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
1556 if (!HVC.isByteVecTy(Ty))
1557 return false;
1558 int Size = HVC.getSizeOf(Ty);
1561 return Size == 4 || Size == 8;
1562}
1563
1564auto AlignVectors::run() -> bool {
1566 << '\n');
1567 if (!createAddressGroups())
1568 return false;
1569
1571 dbgs() << "Address groups(" << AddrGroups.size() << "):\n";
1572 for (auto &[In, AL] : AddrGroups) {
1573 for (const AddrInfo &AI : AL)
1574 dbgs() << "---\n" << AI << '\n';
1575 }
1576 });
1577
1579 MoveList LoadGroups, StoreGroups;
1580
1581 for (auto &G : AddrGroups) {
1584 }
1585
1587 dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";
1588 for (const MoveGroup &G : LoadGroups)
1590 dbgs() << "Store groups(" << StoreGroups.size() << "):\n";
1591 for (const MoveGroup &G : StoreGroups)
1593 });
1594
1595
1596 unsigned CountLimit = VAGroupCountLimit;
1597 if (CountLimit == 0)
1598 return false;
1599
1600 if (LoadGroups.size() > CountLimit) {
1601 LoadGroups.resize(CountLimit);
1602 StoreGroups.clear();
1603 } else {
1604 unsigned StoreLimit = CountLimit - LoadGroups.size();
1605 if (StoreGroups.size() > StoreLimit)
1606 StoreGroups.resize(StoreLimit);
1607 }
1608
1609 for (auto &M : LoadGroups)
1610 Changed |= moveTogether(M);
1611 for (auto &M : StoreGroups)
1612 Changed |= moveTogether(M);
1613
1614 LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);
1615
1616 for (auto &M : LoadGroups)
1617 Changed |= realignGroup(M);
1618 for (auto &M : StoreGroups)
1619 Changed |= realignGroup(M);
1620
1622}
1623
1624
1625
1626
1627
1628auto HvxIdioms::getNumSignificantBits(Value *V, Instruction *In) const
1629 -> std::pair<unsigned, Signedness> {
1630 unsigned Bits = HVC.getNumSignificantBits(V, In);
1631
1632
1633
1634
1635
1636 KnownBits Known = HVC.getKnownBits(V, In);
1637 Signedness Sign = Signed;
1638 unsigned NumToTest = 0;
1640 NumToTest = Bits;
1642 NumToTest = Bits - 1;
1643
1646 Bits = NumToTest;
1647 }
1648
1649
1650
1653 Sign = Positive;
1654 }
1655 return {Bits, Sign};
1656}
1657
1658auto HvxIdioms::canonSgn(SValue X, SValue Y) const
1659 -> std::pair<SValue, SValue> {
1660
1661
1662
1663
1667}
1668
1669
1670
1671
1672auto HvxIdioms::matchFxpMul(Instruction &In) const -> std::optional {
1673 using namespace PatternMatch;
1674 auto *Ty = In.getType();
1675
1677 return std::nullopt;
1678
1680
1681 FxpOp Op;
1683
1684
1685
1686 auto m_Shr = [](auto &&V, auto &&S) {
1688 };
1689
1690 uint64_t Qn = 0;
1692 Op.Frac = Qn;
1694 } else {
1695 Op.Frac = 0;
1696 }
1697
1698 if (Op.Frac > Width)
1699 return std::nullopt;
1700
1701
1702 uint64_t CV;
1706 return std::nullopt;
1707 if (CV != 0)
1710 }
1711
1712
1714 Op.Opcode = Instruction::Mul;
1715
1716 Op.X.Sgn = getNumSignificantBits(Op.X.Val, &In).second;
1717 Op.Y.Sgn = getNumSignificantBits(Op.Y.Val, &In).second;
1719 return Op;
1720 }
1721
1722 return std::nullopt;
1723}
1724
1725auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
1727 assert(Op.X.Val->getType() == Op.Y.Val->getType());
1728
1730 if (VecTy == nullptr)
1731 return nullptr;
1733 unsigned ElemWidth = ElemTy->getBitWidth();
1734
1735
1736 if ((HVC.length(VecTy) * ElemWidth) % (8 * HVC.HST.getVectorLength()) != 0)
1737 return nullptr;
1738
1739
1740
1741
1742 if (ElemWidth <= 8)
1743 return nullptr;
1744
1745
1746 if (ElemWidth <= 32 && Op.Frac == 0)
1747 return nullptr;
1748
1749 auto [BitsX, SignX] = getNumSignificantBits(Op.X.Val, &In);
1750 auto [BitsY, SignY] = getNumSignificantBits(Op.Y.Val, &In);
1751
1752
1753
1755 IRBuilder Builder(In.getParent(), In.getIterator(),
1756 InstSimplifyFolder(HVC.DL));
1757
1758 auto roundUpWidth = [](unsigned Width) -> unsigned {
1760
1761
1763 }
1764 if (Width > 32 && Width % 32 != 0) {
1765
1766 return alignTo(Width, 32u);
1767 }
1768 return Width;
1769 };
1770
1771 BitsX = roundUpWidth(BitsX);
1772 BitsY = roundUpWidth(BitsY);
1773
1774
1775
1776
1777 unsigned Width = std::max(BitsX, BitsY);
1778
1779 auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
1780 if (Width < ElemWidth) {
1783 } else if (Width > ElemWidth) {
1785 : Builder.CreateZExt(X, ResizeTy, "zxt");
1787 : Builder.CreateZExt(Y, ResizeTy, "zxt");
1788 };
1789
1790 assert(X->getType() == Y->getType() && X->getType() == ResizeTy);
1791
1792 unsigned VecLen = HVC.length(ResizeTy);
1793 unsigned ChopLen = (8 * HVC.HST.getVectorLength()) / std::min(Width, 32u);
1794
1796 FxpOp ChopOp = Op;
1797 ChopOp.ResTy = VectorType::get(Op.ResTy->getElementType(), ChopLen, false);
1798
1799 for (unsigned V = 0; V != VecLen / ChopLen; ++V) {
1800 ChopOp.X.Val = HVC.subvector(Builder, X, V * ChopLen, ChopLen);
1801 ChopOp.Y.Val = HVC.subvector(Builder, Y, V * ChopLen, ChopLen);
1802 Results.push_back(processFxpMulChopped(Builder, In, ChopOp));
1804 break;
1805 }
1806
1808 return nullptr;
1809
1812 ? Builder.CreateSExt(Cat, VecTy, "sxt")
1813 : Builder.CreateZExt(Cat, VecTy, "zxt");
1814 return Ext;
1815}
1816
1817inline bool HvxIdioms::matchScatter(Instruction &In) const {
1819 if ()
1820 return false;
1821 return (II->getIntrinsicID() == Intrinsic::masked_scatter);
1822}
1823
1824inline bool HvxIdioms::matchGather(Instruction &In) const {
1826 if ()
1827 return false;
1828 return (II->getIntrinsicID() == Intrinsic::masked_gather);
1829}
1830
1832
1833
1835 switch (Opc) {
1836 case Instruction::Add:
1837 case Instruction::Sub:
1838 case Instruction::Mul:
1839 case Instruction::And:
1840 case Instruction::Or:
1841 case Instruction::Xor:
1842 case Instruction::AShr:
1843 case Instruction::LShr:
1844 case Instruction::Shl:
1845 case Instruction::UDiv:
1846 return true;
1847 }
1848 return false;
1849}
1850
1851
1853 assert(Ptr && "Unable to extract pointer");
1855 return Ptr;
1859 if (II->getIntrinsicID() == Intrinsic::masked_store)
1860 return II->getOperand(1);
1861 }
1862 return nullptr;
1863}
1864
1866 HvxIdioms::DstQualifier &Qual) {
1868 if (!In)
1869 return Destination;
1871 Destination = In;
1872 Qual = HvxIdioms::LdSt;
1874 if (II->getIntrinsicID() == Intrinsic::masked_gather) {
1875 Destination = In;
1876 Qual = HvxIdioms::LLVM_Gather;
1877 } else if (II->getIntrinsicID() == Intrinsic::masked_scatter) {
1878 Destination = In;
1879 Qual = HvxIdioms::LLVM_Scatter;
1880 } else if (II->getIntrinsicID() == Intrinsic::masked_store) {
1881 Destination = In;
1882 Qual = HvxIdioms::LdSt;
1883 } else if (II->getIntrinsicID() ==
1884 Intrinsic::hexagon_V6_vgather_vscattermh) {
1885 Destination = In;
1886 Qual = HvxIdioms::HEX_Gather_Scatter;
1887 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vscattermh_128B) {
1888 Destination = In;
1889 Qual = HvxIdioms::HEX_Scatter;
1890 } else if (II->getIntrinsicID() == Intrinsic::hexagon_V6_vgathermh_128B) {
1891 Destination = In;
1892 Qual = HvxIdioms::HEX_Gather;
1893 }
1899 Destination = In;
1900 Qual = HvxIdioms::Call;
1904 Destination = In;
1905 Qual = HvxIdioms::Arithmetic;
1906 } else {
1907 LLVM_DEBUG(dbgs() << "Unhandled destination : " << *In << "\n");
1908 }
1909 return Destination;
1910}
1911
1912
1913
1914
1915
1916
1917
1920 if (!In)
1921 return Destination;
1922
1924
1925 for (auto &U : In->uses()) {
1928 if (Destination)
1929 Users.push_back(Destination);
1930 }
1931 }
1932
1935 return I;
1936 return Destination;
1937}
1938
1939
1941 assert(In && "Bad instruction");
1944 IIn->getIntrinsicID() == Intrinsic::masked_scatter)) &&
1945 "Not a gather Intrinsic");
1947 if (IIn->getIntrinsicID() == Intrinsic::masked_gather)
1949 else
1951 return GEPIndex;
1952}
1953
1954
1955
1956
1959 if (!GEPIndex) {
1961 return nullptr;
1962 }
1965 if (IndexLoad)
1966 return IndexLoad;
1967
1969 if (IndexZEx) {
1971 if (IndexLoad)
1972 return IndexLoad;
1974 if (II && II->getIntrinsicID() == Intrinsic::masked_gather)
1976 }
1978 if (BaseShuffle) {
1980 if (IndexLoad)
1981 return IndexLoad;
1983 if (IE) {
1984 auto *Src = IE->getOperand(1);
1986 if (IndexLoad)
1987 return IndexLoad;
1989 if (Alloca)
1990 return Alloca;
1992 return Src;
1993 }
1995 return Src;
1996 }
1997 }
1998 }
1999 LLVM_DEBUG(dbgs() << " Unable to locate Address from intrinsic\n");
2000 return nullptr;
2001}
2002
2004 if (!In)
2005 return nullptr;
2006
2009
2011 if (II->getIntrinsicID() == Intrinsic::masked_load)
2012 return II->getType();
2013 if (II->getIntrinsicID() == Intrinsic::masked_store)
2014 return II->getOperand(0)->getType();
2015 }
2016 return In->getType();
2017}
2018
2020 if (!In)
2021 return nullptr;
2023 return In;
2025 if (II->getIntrinsicID() == Intrinsic::masked_load)
2026 return In;
2027 if (II->getIntrinsicID() == Intrinsic::masked_gather)
2028 return In;
2029 }
2039 return cstDataVector;
2041 return GEPIndex->getOperand(0);
2042 return nullptr;
2043}
2044
2045
2046
2049 if (!GEPIndex) {
2051 return nullptr;
2052 }
2055 return IndexLoad;
2056
2057 LLVM_DEBUG(dbgs() << " Unable to locate Index from intrinsic\n");
2058 return nullptr;
2059}
2060
2061
2062
2063
2067 assert(I && "Unable to reinterprete cast");
2068 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2069 std::vector shuffleMask;
2070 for (unsigned i = 0; i < 64; ++i)
2071 shuffleMask.push_back(i);
2073 Value *CastShuffle =
2074 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
2075 return Builder.CreateBitCast(CastShuffle, NT, "cst64_i16_to_32_i32");
2076}
2077
2078
2082 assert(I && "Unable to reinterprete cast");
2083 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2084 std::vector shuffleMask;
2085 for (unsigned i = 0; i < 128; ++i)
2086 shuffleMask.push_back(i);
2088 Value *CastShuffle =
2089 Builder.CreateShuffleVector(I, I, Mask, "identity_shuffle");
2090 return Builder.CreateBitCast(CastShuffle, NT, "cst128_i8_to_32_i32");
2091}
2092
2093
2096 unsigned int pattern) {
2097 std::vector byteMask;
2098 for (unsigned i = 0; i < 32; ++i)
2099 byteMask.push_back(pattern);
2100
2101 return Builder.CreateIntrinsic(
2102 HVC.getBoolTy(128), HVC.HST.getIntrinsicId(Hexagon::V6_vandvrt),
2103 {llvm::ConstantDataVector::get(Ctx, byteMask), HVC.getConstInt(~0)},
2104 nullptr);
2105}
2106
2107Value *HvxIdioms::processVScatter(Instruction &In) const {
2109 assert(InpTy && "Cannot handle no vector type for llvm.scatter/gather");
2110 unsigned InpSize = HVC.getSizeOf(InpTy);
2111 auto *F = In.getFunction();
2112 LLVMContext &Ctx = F->getContext();
2114 assert(ElemTy && "llvm.scatter needs integer type argument");
2117 unsigned Elements = HVC.length(InpTy);
2118 dbgs() << "\n[Process scatter](" << In << ")\n" << *In.getParent() << "\n";
2119 dbgs() << " Input type(" << *InpTy << ") elements(" << Elements
2120 << ") VecLen(" << InpSize << ") type(" << *ElemTy << ") ElemWidth("
2121 << ElemWidth << ")\n";
2122 });
2123
2124 IRBuilder Builder(In.getParent(), In.getIterator(),
2125 InstSimplifyFolder(HVC.DL));
2126
2127 auto *ValueToScatter = In.getOperand(0);
2128 LLVM_DEBUG(dbgs() << " ValueToScatter : " << *ValueToScatter << "\n");
2129
2131 LLVM_DEBUG(dbgs() << "Unhandled vector size(" << InpSize
2132 << ") for vscatter\n");
2133 return nullptr;
2134 }
2135
2136
2138 if (!IndexLoad)
2139 return nullptr;
2140 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
2141
2142
2144 if (!Ptr)
2145 return nullptr;
2147
2149 if (!Indexes)
2150 return nullptr;
2151 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
2153 "cst_ptr_to_i32");
2154 LLVM_DEBUG(dbgs() << " CastedDst : " << *CastedDst << "\n");
2155
2157 Value *CastIndex = nullptr;
2158 if (cstDataVector) {
2159
2160 AllocaInst *IndexesAlloca =
2161 Builder.CreateAlloca(HVC.getHvxTy(HVC.getIntTy(32), false));
2162 [[maybe_unused]] auto *StoreIndexes =
2163 Builder.CreateStore(cstDataVector, IndexesAlloca);
2164 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
2166 IndexesAlloca, "reload_index");
2167 } else {
2168 if (ElemWidth == 2)
2170 else
2171 CastIndex = Indexes;
2172 }
2173 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
2174
2175 if (ElemWidth == 1) {
2176
2177
2178 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2179
2180
2181 Value *CastIndexes = Builder.CreateBitCast(CastIndex, NT, "cast_to_32i32");
2182 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
2184 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastIndexes, nullptr);
2185 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes << ")\n");
2186
2187 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
2188 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
2189 [[maybe_unused]] Value *IndexHi =
2190 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2191 [[maybe_unused]] Value *IndexLo =
2192 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2193 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
2194 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
2195
2196 Value *CastSrc =
2198 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
2200 HVC.getHvxTy(HVC.getIntTy(32), true), V6_vunpack, CastSrc, nullptr);
2201 LLVM_DEBUG(dbgs() << " UnpackedValToScat: " << *UnpackedValueToScatter
2202 << ")\n");
2203
2204 [[maybe_unused]] Value *UVSHi =
2205 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedValueToScatter);
2206 [[maybe_unused]] Value *UVSLo =
2207 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedValueToScatter);
2208 LLVM_DEBUG(dbgs() << " UVSHi : " << *UVSHi << ")\n");
2209 LLVM_DEBUG(dbgs() << " UVSLo : " << *UVSLo << ")\n");
2210
2211
2212 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2213 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
2215 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2217 IndexHi, UVSHi},
2218 nullptr);
2219 LLVM_DEBUG(dbgs() << " ResHi : " << *ResHi << ")\n");
2221 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermhq_128B,
2223 IndexLo, UVSLo},
2224 nullptr);
2225 } else if (ElemWidth == 2) {
2226 Value *CastSrc =
2228 LLVM_DEBUG(dbgs() << " CastSrc : " << *CastSrc << ")\n");
2230 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermh_128B,
2232 CastSrc},
2233 nullptr);
2234 } else if (ElemWidth == 4) {
2236 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vscattermw_128B,
2238 ValueToScatter},
2239 nullptr);
2240 } else {
2241 LLVM_DEBUG(dbgs() << "Unhandled element type for vscatter\n");
2242 return nullptr;
2243 }
2244}
2245
2246Value *HvxIdioms::processVGather(Instruction &In) const {
2247 [[maybe_unused]] auto *InpTy =
2249 assert(InpTy && "Cannot handle no vector type for llvm.gather");
2250 [[maybe_unused]] auto *ElemTy =
2252 assert(ElemTy && "llvm.gather needs vector of ptr argument");
2253 auto *F = In.getFunction();
2254 LLVMContext &Ctx = F->getContext();
2255 LLVM_DEBUG(dbgs() << "\n[Process gather](" << In << ")\n"
2256 << *In.getParent() << "\n");
2257 LLVM_DEBUG(dbgs() << " Input type(" << *InpTy << ") elements("
2258 << HVC.length(InpTy) << ") VecLen(" << HVC.getSizeOf(InpTy)
2259 << ") type(" << *ElemTy << ") Access alignment("
2260 << *In.getOperand(1) << ") AddressSpace("
2261 << ElemTy->getAddressSpace() << ")\n");
2262
2263
2265 "llvm.gather needs vector for mask");
2266 IRBuilder Builder(In.getParent(), In.getIterator(),
2267 InstSimplifyFolder(HVC.DL));
2268
2269
2270
2271
2272 HvxIdioms::DstQualifier Qual = HvxIdioms::Undefined;
2274 if (!Dst) {
2275 LLVM_DEBUG(dbgs() << " Unable to locate vgather destination\n");
2276 return nullptr;
2277 }
2278 LLVM_DEBUG(dbgs() << " Destination : " << *Dst << " Qual(" << Qual
2279 << ")\n");
2280
2281
2283 if (!Ptr) {
2284 LLVM_DEBUG(dbgs() << "Could not locate vgather destination ptr\n");
2285 return nullptr;
2286 }
2287
2288
2290 assert(DstType && "Cannot handle non vector dst type for llvm.gather");
2291
2292
2294 if (!IndexLoad)
2295 return nullptr;
2296 LLVM_DEBUG(dbgs() << " IndexLoad : " << *IndexLoad << "\n");
2297
2298
2300 if (!Indexes)
2301 return nullptr;
2302 LLVM_DEBUG(dbgs() << " Indexes : " << *Indexes << "\n");
2303
2305 Type *NT = HVC.getHvxTy(HVC.getIntTy(32), false);
2306 if (Qual == HvxIdioms::LdSt || Qual == HvxIdioms::Arithmetic) {
2307
2308
2309
2310 unsigned OutputSize = HVC.getSizeOf(DstType);
2314 << " Address space ("
2316 << " Result type : " << *DstType
2317 << "\n Size in bytes : " << OutputSize
2318 << " element type(" << *DstElemTy
2319 << ")\n ElemWidth : " << ElemWidth << " bytes\n");
2320
2322 assert(IndexType && "Cannot handle non vector index type for llvm.gather");
2323 unsigned IndexWidth = HVC.DL.getTypeAllocSize(IndexType->getElementType());
2324 LLVM_DEBUG(dbgs() << " IndexWidth(" << IndexWidth << ")\n");
2325
2326
2328 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2329
2330
2331
2332
2333
2334
2335
2337 if (ElemWidth == 1) {
2338
2339
2340
2341
2342 Value *CastIndexes =
2343 Builder.CreateBitCast(Indexes, NT, "cast_to_32i32");
2344 auto V6_vunpack = HVC.HST.getIntrinsicId(Hexagon::V6_vunpackub);
2345 auto *UnpackedIndexes =
2346 Builder.CreateIntrinsic(HVC.getHvxTy(HVC.getIntTy(32), true),
2347 V6_vunpack, CastIndexes, nullptr);
2348 LLVM_DEBUG(dbgs() << " UnpackedIndexes : " << *UnpackedIndexes
2349 << ")\n");
2350
2351 auto V6_hi = HVC.HST.getIntrinsicId(Hexagon::V6_hi);
2352 auto V6_lo = HVC.HST.getIntrinsicId(Hexagon::V6_lo);
2353 [[maybe_unused]] Value *IndexHi =
2354 HVC.createHvxIntrinsic(Builder, V6_hi, NT, UnpackedIndexes);
2355 [[maybe_unused]] Value *IndexLo =
2356 HVC.createHvxIntrinsic(Builder, V6_lo, NT, UnpackedIndexes);
2357 LLVM_DEBUG(dbgs() << " UnpackedIndHi : " << *IndexHi << ")\n");
2358 LLVM_DEBUG(dbgs() << " UnpackedIndLo : " << *IndexLo << ")\n");
2359
2360 auto *QByteMask = get_i32_Mask(HVC, Builder, Ctx, 0x00ff00ff);
2361 LLVM_DEBUG(dbgs() << " QByteMask : " << *QByteMask << "\n");
2362
2363
2364 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermhq);
2365 [[maybe_unused]] auto GatherHi = Builder.CreateIntrinsic(
2366 Type::getVoidTy(Ctx), V6_vgather,
2367 {Ptr, QByteMask, CastedPtr,
2369 nullptr);
2370 LLVM_DEBUG(dbgs() << " GatherHi : " << *GatherHi << ")\n");
2371
2372 [[maybe_unused]] Value *LoadedResultHi = Builder.CreateLoad(
2373 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_hi");
2374 LLVM_DEBUG(dbgs() << " LoadedResultHi : " << *LoadedResultHi << "\n");
2375
2376
2377
2379 Type::getVoidTy(Ctx), V6_vgather,
2380 {Ptr, QByteMask, CastedPtr,
2382 nullptr);
2383 LLVM_DEBUG(dbgs() << " GatherLo : " << *Gather << ")\n");
2385 HVC.getHvxTy(HVC.getIntTy(32), false), Ptr, "temp_result_lo");
2386 LLVM_DEBUG(dbgs() << " LoadedResultLo : " << *LoadedResultLo << "\n");
2387
2388
2389
2390
2391 auto V6_vpackeb = HVC.HST.getIntrinsicId(Hexagon::V6_vpackeb);
2393 NT, V6_vpackeb, {LoadedResultHi, LoadedResultLo}, nullptr);
2394 LLVM_DEBUG(dbgs() << " ScaledRes : " << *Res << "\n");
2395 [[maybe_unused]] auto *StoreRes = Builder.CreateStore(Res, Ptr);
2396 LLVM_DEBUG(dbgs() << " StoreRes : " << *StoreRes << "\n");
2397 } else if (ElemWidth == 2) {
2398
2399 if (IndexWidth == 2) {
2400
2401 Value *CastIndex =
2403 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
2404
2405
2406 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
2407 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2408 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2410 << " Shifted half index: " << *AdjustedIndex << ")\n");
2411
2412 auto V6_vgather = HVC.HST.getIntrinsicId(Hexagon::V6_vgathermh);
2413
2414
2416 Type::getVoidTy(Ctx), V6_vgather,
2418 AdjustedIndex},
2419 nullptr);
2420 for (auto &U : Dst->uses()) {
2422 dbgs() << " dst used by: " << *UI << "\n";
2423 }
2424 for (auto &U : In.uses()) {
2426 dbgs() << " In used by : " << *UI << "\n";
2427 }
2428
2429
2431 HVC.getHvxTy(HVC.getIntTy(16), false), Ptr, "temp_result");
2432 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2433 In.replaceAllUsesWith(LoadedResult);
2434 } else {
2435 dbgs() << " Unhandled index type for vgather\n";
2436 return nullptr;
2437 }
2438 } else if (ElemWidth == 4) {
2439 if (IndexWidth == 4) {
2440
2441 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
2442 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2443 Builder, V6_vaslh, NT, {Indexes, HVC.getConstInt(2)});
2445 << " Shifted word index: " << *AdjustedIndex << ")\n");
2447 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermw_128B,
2449 AdjustedIndex},
2450 nullptr);
2451 } else {
2452 LLVM_DEBUG(dbgs() << " Unhandled index type for vgather\n");
2453 return nullptr;
2454 }
2455 } else {
2456 LLVM_DEBUG(dbgs() << " Unhandled element type for vgather\n");
2457 return nullptr;
2458 }
2459 } else if (HVC.HST.getVectorLength() == OutputSize * 2) {
2460
2461 LLVM_DEBUG(dbgs() << " Unhandled half of register size\n");
2462 return nullptr;
2463 } else if (HVC.HST.getVectorLength() * 2 == OutputSize) {
2464 LLVM_DEBUG(dbgs() << " Unhandle twice the register size\n");
2465 return nullptr;
2466 }
2467
2468
2469
2470 Dst->eraseFromParent();
2471 } else if (Qual == HvxIdioms::LLVM_Scatter) {
2472
2474 auto *DstInpTy = cast(Dst->getOperand(1)->getType());
2475 assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
2476 unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
2477 unsigned DstElements = HVC.length(DstInpTy);
2478 auto *DstElemTy = cast(DstInpTy->getElementType());
2479 assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
2480 dbgs() << " Gather feeds into scatter\n Values to scatter : "
2481 << *Dst->getOperand(0) << "\n";
2482 dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements
2483 << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy
2484 << ") Access alignment(" << *Dst->getOperand(2) << ")\n";
2485 });
2486
2488 if (!Src)
2489 return nullptr;
2491
2493 LLVM_DEBUG(dbgs() << " Source is not a pointer type...\n");
2494 return nullptr;
2495 }
2496
2498 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2499 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
2500
2502 if (!DstLoad) {
2503 LLVM_DEBUG(dbgs() << " Unable to locate DstLoad\n");
2504 return nullptr;
2505 }
2506 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
2507
2509 if (!Ptr)
2510 return nullptr;
2512 Value *CastIndex =
2514 LLVM_DEBUG(dbgs() << " Cast index: " << *CastIndex << ")\n");
2515
2516
2517 auto V6_vaslh = HVC.HST.getIntrinsicId(Hexagon::V6_vaslh);
2518 Value *AdjustedIndex = HVC.createHvxIntrinsic(
2519 Builder, V6_vaslh, NT, {CastIndex, HVC.getConstInt(1)});
2520 LLVM_DEBUG(dbgs() << " Shifted half index: " << *AdjustedIndex << ")\n");
2521
2523 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2525 AdjustedIndex},
2526 nullptr);
2527 } else if (Qual == HvxIdioms::HEX_Gather_Scatter) {
2528
2529
2530
2531
2532
2535 if (cstDataVector) {
2536
2537
2538
2539 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
2540 [[maybe_unused]] auto *StoreIndexes =
2541 Builder.CreateStore(cstDataVector, IndexesAlloca);
2542 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
2544 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
2545 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
2546 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca << "\n");
2547
2549 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2550 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
2551
2553 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2554 {ResultAlloca, CastedSrc,
2556 nullptr);
2558 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
2559 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2560 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
2561 In.replaceAllUsesWith(LoadedResult);
2562 }
2563 } else {
2564
2566 if (!Src)
2567 return nullptr;
2569
2571 Src, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2572 LLVM_DEBUG(dbgs() << " CastedSrc: " << *CastedSrc << "\n");
2573
2575 if (!DstLoad)
2576 return nullptr;
2577 LLVM_DEBUG(dbgs() << " DstLoad : " << *DstLoad << "\n");
2579 if (!Ptr)
2580 return nullptr;
2582
2584 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgather_vscattermh,
2586 Indexes},
2587 nullptr);
2588 }
2589 return Gather;
2590 } else if (Qual == HvxIdioms::HEX_Scatter) {
2591
2592
2593
2594
2595 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
2597 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2598 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
2599 Value *CastIndex =
2601 LLVM_DEBUG(dbgs() << " Cast index : " << *CastIndex << ")\n");
2602
2604 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2606 CastIndex},
2607 nullptr);
2609 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
2610 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2611 In.replaceAllUsesWith(LoadedResult);
2612 } else if (Qual == HvxIdioms::HEX_Gather) {
2613
2614
2617 if (cstDataVector) {
2618
2619 AllocaInst *IndexesAlloca = Builder.CreateAlloca(NT);
2620
2621 [[maybe_unused]] auto *StoreIndexes =
2622 Builder.CreateStore(cstDataVector, IndexesAlloca);
2623 LLVM_DEBUG(dbgs() << " StoreIndexes : " << *StoreIndexes << "\n");
2625 IndexesAlloca->getAllocatedType(), IndexesAlloca, "reload_index");
2626 AllocaInst *ResultAlloca = Builder.CreateAlloca(NT);
2627 LLVM_DEBUG(dbgs() << " ResultAlloca : " << *ResultAlloca
2628 << "\n AddressSpace: "
2630
2632 IndexLoad, Type::getInt32Ty(Ctx), "cst_ptr_to_i32");
2633 LLVM_DEBUG(dbgs() << " CastedSrc : " << *CastedSrc << "\n");
2634
2636 Type::getVoidTy(Ctx), Intrinsic::hexagon_V6_vgathermh_128B,
2637 {ResultAlloca, CastedSrc,
2639 nullptr);
2641 HVC.getHvxTy(HVC.getIntTy(16), false), ResultAlloca, "temp_result");
2642 LLVM_DEBUG(dbgs() << " LoadedResult : " << *LoadedResult << "\n");
2643 LLVM_DEBUG(dbgs() << " Gather : " << *Gather << "\n");
2644 In.replaceAllUsesWith(LoadedResult);
2645 }
2646 }
2647 } else if (Qual == HvxIdioms::LLVM_Gather) {
2648
2649 errs() << " Underimplemented vgather to vgather sequence\n";
2650 return nullptr;
2651 } else
2653
2654 return Gather;
2655}
2656
2657auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
2658 const FxpOp &Op) const -> Value * {
2659 assert(Op.X.Val->getType() == Op.Y.Val->getType());
2661 unsigned Width = InpTy->getScalarSizeInBits();
2662 bool Rounding = Op.RoundAt.has_value();
2663
2664 if (.RoundAt || *Op.RoundAt == Op.Frac - 1) {
2665
2667 Value *QMul = nullptr;
2668 if (Width == 16) {
2669 QMul = createMulQ15(Builder, Op.X, Op.Y, Rounding);
2670 } else if (Width == 32) {
2671 QMul = createMulQ31(Builder, Op.X, Op.Y, Rounding);
2672 }
2673 if (QMul != nullptr)
2674 return QMul;
2675 }
2676 }
2677
2678 assert(Width >= 32 || isPowerOf2_32(Width));
2679 assert(Width < 32 || Width % 32 == 0);
2680
2681
2682 if (Width < 32) {
2683 if (Width < 16)
2684 return nullptr;
2685
2686
2687
2688 assert(Width == 16);
2689 assert(Op.Frac != 0 && "Unshifted mul should have been skipped");
2690 if (Op.Frac == 16) {
2691
2692 if (Value *MulH = createMulH16(Builder, Op.X, Op.Y))
2693 return MulH;
2694 }
2695
2696 Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
2697 if (Rounding) {
2698 Value *RoundVal = ConstantInt::get(Prod32->getType(), 1 << *Op.RoundAt);
2699 Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");
2700 }
2701
2702 Value *ShiftAmt = ConstantInt::get(Prod32->getType(), Op.Frac);
2704 ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")
2705 : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");
2706 return Builder.CreateTrunc(Shifted, InpTy, "trn");
2707 }
2708
2709
2710
2711
2712
2713 auto WordX = HVC.splitVectorElements(Builder, Op.X.Val, 32);
2714 auto WordY = HVC.splitVectorElements(Builder, Op.Y.Val, 32);
2715 auto WordP = createMulLong(Builder, WordX, Op.X.Sgn, WordY, Op.Y.Sgn);
2716
2717 auto *HvxWordTy = cast(WordP.front()->getType());
2718
2719
2720 if (Op.RoundAt.has_value()) {
2723 RoundV[*Op.RoundAt / 32] =
2724 ConstantInt::get(HvxWordTy, 1 << (*Op.RoundAt % 32));
2725 WordP = createAddLong(Builder, WordP, RoundV);
2726 }
2727
2728
2729
2730
2731 unsigned SkipWords = Op.Frac / 32;
2732 Constant *ShiftAmt = ConstantInt::get(HvxWordTy, Op.Frac % 32);
2733
2734 for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
2735 int Src = Dst + SkipWords;
2737 if (Src + 1 < End) {
2738 Value *Hi = WordP[Src + 1];
2739 WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
2741 nullptr, "int");
2742 } else {
2743
2744 WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");
2745 }
2746 }
2747 if (SkipWords != 0)
2748 WordP.resize(WordP.size() - SkipWords);
2749
2750 return HVC.joinVectorElements(Builder, WordP, Op.ResTy);
2751}
2752
2753auto HvxIdioms::createMulQ15(IRBuilderBase &Builder, SValue X, SValue Y,
2754 bool Rounding) const -> Value * {
2755 assert(X.Val->getType() == Y.Val->getType());
2756 assert(X.Val->getType()->getScalarType() == HVC.getIntTy(16));
2758
2759
2761 return nullptr;
2762
2763 auto V6_vmpyhvsrs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhvsrs);
2764 return HVC.createHvxIntrinsic(Builder, V6_vmpyhvsrs, X.Val->getType(),
2765 {X.Val, Y.Val});
2766}
2767
2768auto HvxIdioms::createMulQ31(IRBuilderBase &Builder, SValue X, SValue Y,
2769 bool Rounding) const -> Value * {
2770 Type *InpTy = X.Val->getType();
2771 assert(InpTy == Y.Val->getType());
2774
2776 return nullptr;
2777
2778 auto V6_vmpyewuh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyewuh);
2779 auto V6_vmpyo_acc = Rounding
2780 ? HVC.HST.getIntrinsicId(Hexagon::V6_vmpyowh_rnd_sacc)
2783 HVC.createHvxIntrinsic(Builder, V6_vmpyewuh, InpTy, {X.Val, Y.Val});
2784 return HVC.createHvxIntrinsic(Builder, V6_vmpyo_acc, InpTy,
2786}
2787
2788auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
2789 Value *CarryIn) const
2790 -> std::pair<Value *, Value *> {
2791 assert(X->getType() == Y->getType());
2793 if (VecTy == HvxI32Ty && HVC.HST.useHVXV62Ops()) {
2796 if (CarryIn == nullptr && HVC.HST.useHVXV66Ops()) {
2797 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarryo);
2798 } else {
2799 AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
2800 if (CarryIn == nullptr)
2802 Args.push_back(CarryIn);
2803 }
2804 Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
2805 nullptr, Args);
2808 return {Result, CarryOut};
2809 }
2810
2811
2812
2813
2815 if (CarryIn != nullptr) {
2816 unsigned Width = VecTy->getScalarSizeInBits();
2817 uint32_t Mask = 1;
2818 if (Width < 32) {
2819 for (unsigned i = 0, e = 32 / Width; i != e; ++i)
2820 Mask = (Mask << Width) | 1;
2821 }
2822 auto V6_vandqrt = HVC.HST.getIntrinsicId(Hexagon::V6_vandqrt);
2823 Value *ValueIn =
2824 HVC.createHvxIntrinsic(Builder, V6_vandqrt, nullptr,
2825 {CarryIn, HVC.getConstInt(Mask)});
2826 Result1 = Builder.CreateAdd(X, ValueIn, "add");
2827 }
2828
2832 return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};
2833}
2834
2835auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
2838 std::tie(X, Y) = canonSgn(X, Y);
2839
2841 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhv);
2842 } else if (Y.Sgn == Signed) {
2843
2844 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyhus);
2845 } else {
2846 V6_vmpyh = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhv);
2847 }
2848
2849
2851 HVC.createHvxIntrinsic(Builder, V6_vmpyh, HvxP32Ty, {Y.Val, X.Val});
2852
2853 return HVC.vshuff(Builder, HVC.sublo(Builder, P), HVC.subhi(Builder, P));
2854}
2855
2856auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
2858 Type *HvxI16Ty = HVC.getHvxTy(HVC.getIntTy(16), false);
2859
2862 auto V6_vmpyuhvs = HVC.HST.getIntrinsicId(Hexagon::V6_vmpyuhvs);
2863 return HVC.createHvxIntrinsic(Builder, V6_vmpyuhvs, HvxI16Ty,
2865 }
2866 }
2867
2868 Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), true);
2870 Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");
2871 unsigned Len = HVC.length(HvxP16Ty) / 2;
2872
2873 SmallVector<int, 128> PickOdd(Len);
2874 for (int i = 0; i != static_cast<int>(Len); ++i)
2875 PickOdd[i] = 2 * i + 1;
2876
2878 HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");
2879}
2880
2881auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
2882 -> std::pair<Value *, Value *> {
2883 assert(X.Val->getType() == Y.Val->getType());
2884 assert(X.Val->getType() == HvxI32Ty);
2885
2887 std::tie(X, Y) = canonSgn(X, Y);
2888
2890 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyss_parts;
2891 } else if (Y.Sgn == Signed) {
2892 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyus_parts;
2893 } else {
2894 V6_vmpy_parts = Intrinsic::hexagon_V6_vmpyuu_parts;
2895 }
2896
2897 Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,
2898 {X.Val, Y.Val}, {HvxI32Ty});
2902}
2903
2904auto HvxIdioms::createAddLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2907 assert(WordX.size() == WordY.size());
2908 unsigned Idx = 0, Length = WordX.size();
2910
2911 while (Idx != Length) {
2912 if (HVC.isZero(WordX[Idx]))
2913 Sum[Idx] = WordY[Idx];
2914 else if (HVC.isZero(WordY[Idx]))
2915 Sum[Idx] = WordX[Idx];
2916 else
2917 break;
2918 ++Idx;
2919 }
2920
2921 Value *Carry = nullptr;
2922 for (; Idx != Length; ++Idx) {
2923 std::tie(Sum[Idx], Carry) =
2924 createAddCarry(Builder, WordX[Idx], WordY[Idx], Carry);
2925 }
2926
2927
2928 return Sum;
2929}
2930
2931auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
2935
2936
2937
2938 for (int i = 0, e = WordX.size(); i != e; ++i) {
2939 for (int j = 0, f = WordY.size(); j != f; ++j) {
2940
2941 Signedness SX = (i + 1 == e) ? SgnX : Unsigned;
2942 Signedness SY = (j + 1 == f) ? SgnY : Unsigned;
2943 auto [Lo, Hi] = createMul32(Builder, {WordX[i], SX}, {WordY[j], SY});
2944 Products[i + j + 0].push_back(Lo);
2945 Products[i + j + 1].push_back(Hi);
2946 }
2947 }
2948
2950
2951 auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {
2953 return Zero;
2956 return Last;
2957 };
2958
2959 for (int i = 0, e = Products.size(); i != e; ++i) {
2960 while (Products[i].size() > 1) {
2961 Value *Carry = nullptr;
2962 for (int j = i; j != e; ++j) {
2963 auto &ProdJ = Products[j];
2964 auto [Sum, CarryOut] = createAddCarry(Builder, pop_back_or_zero(ProdJ),
2965 pop_back_or_zero(ProdJ), Carry);
2966 ProdJ.insert(ProdJ.begin(), Sum);
2967 Carry = CarryOut;
2968 }
2969 }
2970 }
2971
2973 for (auto &P : Products) {
2974 assert(P.size() == 1 && "Should have been added together");
2976 }
2977
2978 return WordP;
2979}
2980
2981auto HvxIdioms::run() -> bool {
2983
2984 for (BasicBlock &B : HVC.F) {
2985 for (auto It = B.rbegin(); It != B.rend(); ++It) {
2986 if (auto Fxm = matchFxpMul(*It)) {
2987 Value *New = processFxpMul(*It, *Fxm);
2988
2990 if (!New)
2991 continue;
2993 It->replaceAllUsesWith(New);
2995 It = StartOver ? B.rbegin()
2998 } else if (matchGather(*It)) {
2999 Value *New = processVGather(*It);
3000 if (!New)
3001 continue;
3003
3004 It->eraseFromParent();
3008 } else if (matchScatter(*It)) {
3009 Value *New = processVScatter(*It);
3010 if (!New)
3011 continue;
3012 LLVM_DEBUG(dbgs() << " Scatter : " << *New << "\n");
3013
3014 It->eraseFromParent();
3018 }
3019 }
3020 }
3021
3023}
3024
3025
3026
3027auto HexagonVectorCombine::run() -> bool {
3028 if (DumpModule)
3029 dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();
3030
3032 if (HST.useHVXOps()) {
3033 if (VAEnabled)
3034 Changed |= AlignVectors(*this).run();
3035 if (VIEnabled)
3036 Changed |= HvxIdioms(*this).run();
3037 }
3038
3039 if (DumpModule) {
3040 dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")
3041 << " after HexagonVectorCombine\n"
3042 << *F.getParent();
3043 }
3045}
3046
3047auto HexagonVectorCombine::getIntTy(unsigned Width) const -> IntegerType * {
3049}
3050
3051auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
3052 assert(ElemCount >= 0);
3053 IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
3054 if (ElemCount == 0)
3055 return ByteTy;
3056 return VectorType::get(ByteTy, ElemCount, false);
3057}
3058
3059auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
3060 assert(ElemCount >= 0);
3061 IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
3062 if (ElemCount == 0)
3063 return BoolTy;
3064 return VectorType::get(BoolTy, ElemCount, false);
3065}
3066
3067auto HexagonVectorCombine::getConstInt(int Val, unsigned Width) const
3068 -> ConstantInt * {
3070}
3071
3072auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
3074 return C->isZeroValue();
3075 return false;
3076}
3077
3078auto HexagonVectorCombine::getIntValue(const Value *Val) const
3079 -> std::optional {
3081 return CI->getValue();
3082 return std::nullopt;
3083}
3084
3085auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
3087}
3088
3089auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {
3091}
3092
3093auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {
3095}
3096
3097auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
3100 assert(ETy.isSimple() && "Invalid HVX element type");
3101
3102 assert(HST.isHVXElementType(ETy.getSimpleVT(), false) &&
3103 "Invalid HVX element type");
3104 unsigned HwLen = HST.getVectorLength();
3105 unsigned NumElems = (8 * HwLen) / ETy.getSizeInBits();
3106 return VectorType::get(ElemTy, Pair ? 2 * NumElems : NumElems,
3107 false);
3108}
3109
3110auto HexagonVectorCombine::getSizeOf(const Value *Val, SizeKind Kind) const
3111 -> int {
3112 return getSizeOf(Val->getType(), Kind);
3113}
3114
3115auto HexagonVectorCombine::getSizeOf(const Type *Ty, SizeKind Kind) const
3116 -> int {
3117 auto *NcTy = const_cast<Type *>(Ty);
3118 switch (Kind) {
3120 return DL.getTypeStoreSize(NcTy).getFixedValue();
3122 return DL.getTypeAllocSize(NcTy).getFixedValue();
3123 }
3125}
3126
3127auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
3128
3129
3130 if (HST.isTypeForHVX(Ty))
3131 return HST.getVectorLength();
3132 return DL.getABITypeAlign(Ty).value();
3133}
3134
3135auto HexagonVectorCombine::length(Value *Val) const -> size_t {
3136 return length(Val->getType());
3137}
3138
3139auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
3141 assert(VecTy && "Must be a vector type");
3142 return VecTy->getElementCount().getFixedValue();
3143}
3144
3145auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
3147 SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
3149 }
3150 return nullptr;
3151}
3152
3153
3154auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
3156 int Where) const -> Value * {
3157 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
3158 int SrcLen = getSizeOf(Src);
3159 int DstLen = getSizeOf(Dst);
3160 assert(0 <= Start && Start + Length <= SrcLen);
3161 assert(0 <= Where && Where + Length <= DstLen);
3162
3165 Value *P2Src = vresize(Builder, Src, P2Len, Poison);
3166 Value *P2Dst = vresize(Builder, Dst, P2Len, Poison);
3167
3169 for (int i = 0; i != P2Len; ++i) {
3170
3171
3172 SMask[i] =
3173 (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
3174 }
3175
3177 return vresize(Builder, P2Insert, DstLen, Poison);
3178}
3179
3180auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
3182 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
3184 return Hi;
3185 int VecLen = getSizeOf(Hi);
3186 if (auto IntAmt = getIntValue(Amt))
3187 return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
3188 VecLen);
3189
3190 if (HST.isTypeForHVX(Hi->getType())) {
3191 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3192 "Expecting an exact HVX type");
3193 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_vlalignb),
3194 Hi->getType(), {Hi, Lo, Amt});
3195 }
3196
3197 if (VecLen == 4) {
3202 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
3203 return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");
3204 }
3205 if (VecLen == 8) {
3207 return vralignb(Builder, Lo, Hi, Sub);
3208 }
3210}
3211
3212auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
3214 assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
3216 return Lo;
3217 int VecLen = getSizeOf(Lo);
3218 if (auto IntAmt = getIntValue(Amt))
3219 return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
3220
3221 if (HST.isTypeForHVX(Lo->getType())) {
3222 assert(static_cast<unsigned>(VecLen) == HST.getVectorLength() &&
3223 "Expecting an exact HVX type");
3224 return createHvxIntrinsic(Builder, HST.getIntrinsicId(Hexagon::V6_valignb),
3225 Lo->getType(), {Hi, Lo, Amt});
3226 }
3227
3228 if (VecLen == 4) {
3232 Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
3233 return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");
3234 }
3235 if (VecLen == 8) {
3236 Type *Int64Ty = Type::getInt64Ty(F.getContext());
3240 {Hi64, Lo64, Amt},
3241 nullptr, "cup");
3243 }
3245}
3246
3247
3248auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
3250 assert(!Vecs.empty());
3252 std::vector<Value *> Work[2];
3253 int ThisW = 0, OtherW = 1;
3254
3255 Work[ThisW].assign(Vecs.begin(), Vecs.end());
3256 while (Work[ThisW].size() > 1) {
3258 SMask.resize(length(Ty) * 2);
3259 std::iota(SMask.begin(), SMask.end(), 0);
3260
3261 Work[OtherW].clear();
3262 if (Work[ThisW].size() % 2 != 0)
3264 for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
3266 Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");
3267 Work[OtherW].push_back(Joined);
3268 }
3270 }
3271
3272
3273
3274
3275 SMask.resize(Vecs.size() * length(Vecs.front()->getType()));
3276 std::iota(SMask.begin(), SMask.end(), 0);
3279}
3280
3281auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
3282 int NewSize, Value *Pad) const -> Value * {
3285 assert(ValTy->getElementType() == Pad->getType());
3286
3287 int CurSize = length(ValTy);
3288 if (CurSize == NewSize)
3289 return Val;
3290
3291 if (CurSize > NewSize)
3292 return getElementRange(Builder, Val, Val, 0, NewSize);
3293
3294 SmallVector<int, 128> SMask(NewSize);
3295 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
3296 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
3299}
3300
3301auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
3303
3304
3305
3307
3308 Type *FromSTy = FromTy->getScalarType();
3309 Type *ToSTy = ToTy->getScalarType();
3310 if (FromSTy == ToSTy)
3311 return Mask;
3312
3313 int FromSize = getSizeOf(FromSTy);
3314 int ToSize = getSizeOf(ToSTy);
3315 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
3316
3318 int FromCount = length(MaskTy);
3319 int ToCount = (FromCount * FromSize) / ToSize;
3320 assert((FromCount * FromSize) % ToSize == 0);
3321
3322 auto *FromITy = getIntTy(FromSize * 8);
3323 auto *ToITy = getIntTy(ToSize * 8);
3324
3325
3326
3328 Mask, VectorType::get(FromITy, FromCount, false), "sxt");
3330 Ext, VectorType::get(ToITy, ToCount, false), "cst");
3332 Cast, VectorType::get(getBoolTy(), ToCount, false), "trn");
3333}
3334
3335
3336auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
3339 if (ScalarTy == getBoolTy())
3340 return Val;
3341
3342 Value *Bytes = vbytes(Builder, Val);
3344 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");
3345
3346
3347 return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");
3348}
3349
3350
3351auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
3354 if (ScalarTy == getByteTy())
3355 return Val;
3356
3357 if (ScalarTy != getBoolTy())
3358 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");
3359
3361 return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");
3362 return Builder.CreateSExt(Val, getByteTy(), "sxt");
3363}
3364
3365auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
3366 unsigned Start, unsigned Length) const
3369 return getElementRange(Builder, Val, Val, Start, Length);
3370}
3371
3372auto HexagonVectorCombine::sublo(IRBuilderBase &Builder, Value *Val) const
3374 size_t Len = length(Val);
3375 assert(Len % 2 == 0 && "Length should be even");
3376 return subvector(Builder, Val, 0, Len / 2);
3377}
3378
3379auto HexagonVectorCombine::subhi(IRBuilderBase &Builder, Value *Val) const
3381 size_t Len = length(Val);
3382 assert(Len % 2 == 0 && "Length should be even");
3383 return subvector(Builder, Val, Len / 2, Len / 2);
3384}
3385
3386auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
3388 assert(Val0->getType() == Val1->getType());
3389 int Len = length(Val0);
3390 SmallVector<int, 128> Mask(2 * Len);
3391
3392 for (int i = 0; i != Len; ++i) {
3393 Mask[i] = 2 * i;
3394 Mask[i + Len] = 2 * i + 1;
3395 }
3397}
3398
3399auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
3401 assert(Val0->getType() == Val1->getType());
3402 int Len = length(Val0);
3403 SmallVector<int, 128> Mask(2 * Len);
3404
3405 for (int i = 0; i != Len; ++i) {
3406 Mask[2 * i + 0] = i;
3407 Mask[2 * i + 1] = i + Len;
3408 }
3410}
3411
3412auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
3418 auto getCast = [&](IRBuilderBase &Builder, Value *Val,
3420 Type *SrcTy = Val->getType();
3421 if (SrcTy == DestTy)
3422 return Val;
3423
3424
3425
3426 assert(HST.isTypeForHVX(SrcTy, true));
3427
3428 Type *BoolTy = Type::getInt1Ty(F.getContext());
3430 return Builder.CreateBitCast(Val, DestTy, "cst");
3431
3432
3433 unsigned HwLen = HST.getVectorLength();
3434 Intrinsic::ID TC = HwLen == 64 ? Intrinsic::hexagon_V6_pred_typecast
3435 : Intrinsic::hexagon_V6_pred_typecast_128B;
3436 return Builder.CreateIntrinsic(TC, {DestTy, Val->getType()}, {Val},
3437 nullptr, "cup");
3438 };
3439
3443
3445 for (int i = 0, e = Args.size(); i != e; ++i) {
3447 Type *T = IntrTy->getParamType(i);
3449 IntrArgs.push_back(getCast(Builder, A, T));
3450 } else {
3452 }
3453 }
3454 StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";
3455 CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);
3456
3460
3462 if (RetTy == nullptr || CallTy == RetTy)
3463 return Call;
3464
3465 assert(HST.isTypeForHVX(CallTy, true));
3466 return getCast(Builder, Call, RetTy);
3467}
3468
3469auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
3471 unsigned ToWidth) const
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3486 assert(VecTy->getElementType()->isIntegerTy());
3487 unsigned FromWidth = VecTy->getScalarSizeInBits();
3489 assert(ToWidth <= FromWidth && "Breaking up into wider elements?");
3490 unsigned NumResults = FromWidth / ToWidth;
3491
3494 unsigned Length = length(VecTy);
3495
3496
3497
3498 auto splitInHalf = [&](unsigned Begin, unsigned End, auto splitFunc) -> void {
3499
3500
3501
3502 if (Begin + 1 == End)
3503 return;
3504
3507
3508 auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
3510
3511 Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
3512
3513 unsigned Half = (Begin + End) / 2;
3514 Results[Begin] = sublo(Builder, Res);
3515 Results[Half] = subhi(Builder, Res);
3516
3517 splitFunc(Begin, Half, splitFunc);
3518 splitFunc(Half, End, splitFunc);
3519 };
3520
3521 splitInHalf(0, NumResults, splitInHalf);
3523}
3524
3525auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
3527 VectorType *ToType) const
3529 assert(ToType->getElementType()->isIntegerTy());
3530
3531
3532
3533
3534
3535
3536
3537
3539
3540 unsigned ToWidth = ToType->getScalarSizeInBits();
3541 unsigned Width = Inputs.front()->getType()->getScalarSizeInBits();
3542 assert(Width <= ToWidth);
3544 unsigned Length = length(Inputs.front()->getType());
3545
3546 unsigned NeedInputs = ToWidth / Width;
3547 if (Inputs.size() != NeedInputs) {
3548
3549
3552 Last, ConstantInt::get(Last->getType(), Width - 1), "asr");
3553 Inputs.resize(NeedInputs, Sign);
3554 }
3555
3556 while (Inputs.size() > 1) {
3557 Width *= 2;
3558 auto *VTy = VectorType::get(getIntTy(Width), Length, false);
3559 for (int i = 0, e = Inputs.size(); i < e; i += 2) {
3560 Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
3561 Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");
3562 }
3563 Inputs.resize(Inputs.size() / 2);
3564 }
3565
3566 assert(Inputs.front()->getType() == ToType);
3567 return Inputs.front();
3568}
3569
3570auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
3571 Value *Ptr1) const
3572 -> std::optional {
3573
3574 const SCEV *Scev0 = SE.getSCEV(Ptr0);
3575 const SCEV *Scev1 = SE.getSCEV(Ptr1);
3576 const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
3578 APInt V = Const->getAPInt();
3579 if (V.isSignedIntN(8 * sizeof(int)))
3580 return static_cast<int>(V.getSExtValue());
3581 }
3582
3585 ~Builder() {
3587 I->eraseFromParent();
3588 }
3589 SmallVector<Instruction *, 8> ToErase;
3590 };
3591
3592#define CallBuilder(B, F) \
3593 [&](auto &B_) { \
3594 Value *V = B_.F; \
3595 if (auto *I = dyn_cast(V)) \
3596 B_.ToErase.push_back(I); \
3597 return V; \
3598 }(B)
3599
3600 auto Simplify = [this](Value *V) {
3602 return S;
3603 return V;
3604 };
3605
3606 auto StripBitCast = [](Value *V) {
3609 return V;
3610 };
3611
3612 Ptr0 = StripBitCast(Ptr0);
3613 Ptr1 = StripBitCast(Ptr1);
3615 return std::nullopt;
3616
3619 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
3620 return std::nullopt;
3621 if (Gep0->getSourceElementType() != Gep1->getSourceElementType())
3622 return std::nullopt;
3623
3624 Builder B(Gep0->getParent());
3625 int Scale = getSizeOf(Gep0->getSourceElementType(), Alloc);
3626
3627
3628 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
3629 return std::nullopt;
3630
3631 Value *Idx0 = Gep0->getOperand(1);
3632 Value *Idx1 = Gep1->getOperand(1);
3633
3634
3636 Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
3637 return Diff->getSExtValue() * Scale;
3638
3639 KnownBits Known0 = getKnownBits(Idx0, Gep0);
3640 KnownBits Known1 = getKnownBits(Idx1, Gep1);
3642 if (Unknown.isAllOnes())
3643 return std::nullopt;
3644
3646 Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
3647 Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
3648 Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
3649 int Diff0 = 0;
3651 Diff0 = C->getSExtValue();
3652 } else {
3653 return std::nullopt;
3654 }
3655
3657 Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
3658 Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
3659 Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
3660 int Diff1 = 0;
3662 Diff1 = C->getSExtValue();
3663 } else {
3664 return std::nullopt;
3665 }
3666
3667 return (Diff0 + Diff1) * Scale;
3668
3669#undef CallBuilder
3670}
3671
3672auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
3673 const Instruction *CtxI) const
3674 -> unsigned {
3676}
3677
3678auto HexagonVectorCombine::getKnownBits(const Value *V,
3679 const Instruction *CtxI) const
3680 -> KnownBits {
3682}
3683
3684auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {
3685 if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||
3686 In.isFenceLike() || In.mayReadOrWriteMemory()) {
3687 return false;
3688 }
3690 return false;
3691 return true;
3692}
3693
3694template
3695auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
3697 const T &IgnoreInsts) const
3698 -> bool {
3699 auto getLocOrNone =
3700 [this](const Instruction &I) -> std::optional {
3702 switch (II->getIntrinsicID()) {
3703 case Intrinsic::masked_load:
3705 case Intrinsic::masked_store:
3707 }
3708 }
3710 };
3711
3712
3715
3717 return false;
3718
3720 return true;
3721 bool MayWrite = In.mayWriteToMemory();
3722 auto MaybeLoc = getLocOrNone(In);
3723
3724 auto From = In.getIterator();
3725 if (From == To)
3726 return true;
3727 bool MoveUp = (To != Block.end() && To->comesBefore(&In));
3729 MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
3730 for (auto It = Range.first; It != Range.second; ++It) {
3731 const Instruction &I = *It;
3732 if (llvm::is_contained(IgnoreInsts, &I))
3733 continue;
3734
3735 if (auto *II = dyn_cast(&I)) {
3736 if (II->getIntrinsicID() == Intrinsic::assume)
3737 continue;
3738 }
3739
3741 return false;
3743 if (!CB->hasFnAttr(Attribute::WillReturn))
3744 return false;
3745 if (!CB->hasFnAttr(Attribute::NoSync))
3746 return false;
3747 }
3748 if (I.mayReadOrWriteMemory()) {
3749 auto MaybeLocI = getLocOrNone(I);
3750 if (MayWrite || I.mayWriteToMemory()) {
3751 if (!MaybeLoc || !MaybeLocI)
3752 return false;
3753 if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
3754 return false;
3755 }
3756 }
3757 }
3758 return true;
3759}
3760
3761auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
3763 return VecTy->getElementType() == getByteTy();
3764 return false;
3765}
3766
3767auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
3770 assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
3771 SmallVector<int, 128> SMask(Length);
3772 std::iota(SMask.begin(), SMask.end(), Start);
3774}
3775
3776
3777
3778namespace {
3779class HexagonVectorCombineLegacy : public FunctionPass {
3780public:
3781 static char ID;
3782
3783 HexagonVectorCombineLegacy() : FunctionPass(ID) {}
3784
3785 StringRef getPassName() const override { return "Hexagon Vector Combine"; }
3786
3787 void getAnalysisUsage(AnalysisUsage &AU) const override {
3790 AU.addRequired();
3791 AU.addRequired();
3792 AU.addRequired();
3793 AU.addRequired();
3795 FunctionPass::getAnalysisUsage(AU);
3796 }
3797
3799 if (skipFunction(F))
3800 return false;
3801 AliasAnalysis &AA = getAnalysis().getAAResults();
3802 AssumptionCache &AC =
3803 getAnalysis().getAssumptionCache(F);
3804 DominatorTree &DT = getAnalysis().getDomTree();
3805 ScalarEvolution &SE = getAnalysis().getSE();
3806 TargetLibraryInfo &TLI =
3807 getAnalysis().getTLI(F);
3808 auto &TM = getAnalysis().getTM();
3809 HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);
3810 return HVC.run();
3811 }
3812};
3813}
3814
3815char HexagonVectorCombineLegacy::ID = 0;
3816
3818 "Hexagon Vector Combine", false, false)
3827
3829 return new HexagonVectorCombineLegacy();
3830}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static IntegerType * getIntTy(IRBuilderBase &B, const TargetLibraryInfo *TLI)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
static Value * locateIndexesFromIntrinsic(Instruction *In)
Definition HexagonVectorCombine.cpp:2047
Instruction * locateDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Definition HexagonVectorCombine.cpp:1918
Value * getReinterpretiveCast_i8_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
Definition HexagonVectorCombine.cpp:2079
static Value * locateIndexesFromGEP(Value *In)
Definition HexagonVectorCombine.cpp:2019
#define CallBuilder(B, F)
Value * getPointer(Value *Ptr)
Definition HexagonVectorCombine.cpp:1852
#define DEFAULT_HVX_VTCM_PAGE_SIZE
Definition HexagonVectorCombine.cpp:63
static Value * locateAddressFromIntrinsic(Instruction *In)
Definition HexagonVectorCombine.cpp:1957
static Instruction * selectDestination(Instruction *In, HvxIdioms::DstQualifier &Qual)
Definition HexagonVectorCombine.cpp:1865
Value * get_i32_Mask(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, unsigned int pattern)
Definition HexagonVectorCombine.cpp:2094
bool isArithmetic(unsigned Opc)
Definition HexagonVectorCombine.cpp:1834
static Type * getIndexType(Value *In)
Definition HexagonVectorCombine.cpp:2003
GetElementPtrInst * locateGepFromIntrinsic(Instruction *In)
Definition HexagonVectorCombine.cpp:1940
Value * getReinterpretiveCast_i16_to_i32(const HexagonVectorCombine &HVC, IRBuilderBase &Builder, LLVMContext &Ctx, Value *I)
Definition HexagonVectorCombine.cpp:2064
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static bool isCandidate(const MachineInstr *MI, Register &DefedReg, Register FrameReg)
static bool isUndef(const MachineInstr &MI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static ConstantInt * getConstInt(MDNode *MD, unsigned NumOp)
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Target-Independent Code Generator Pass Configuration Options pass.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
InstListType::const_iterator const_iterator
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
AttributeList getAttributes() const
Return the attributes for this call.
@ ICMP_ULT
unsigned less than
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
iterator_range< iterator > children()
DomTreeNodeBase< NodeT > * getRootNode()
getRootNode - This returns the entry node for the CFG of the function.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
const BasicBlock & back() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * getPointerOperand()
bool isHVXVectorType(EVT VecTy, bool IncludeBool=false) const
bool useHVXV62Ops() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV66Ops() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
Intrinsic::ID getIntrinsicId(unsigned Opc) const
Common base class shared among various IRBuilders.
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
const char * getOpcodeName() const
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
bool doesNotAccessMemory() const
Whether this function accesses no memory.
bool onlyAccessesInaccessibleMem() const
Whether this function only (at most) accesses inaccessible memory.
static LLVM_ABI std::optional< MemoryLocation > getOrNone(const Instruction *Inst)
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
Rounding
Possible values of current rounding mode, which is specified in bits 23:22 of FPCR.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI Instruction * getTerminator() const
LLVM_ABI Instruction & front() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createHexagonVectorCombineLegacyPass()
Definition HexagonVectorCombine.cpp:3828
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
DomTreeNodeBase< BasicBlock > DomTreeNode
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool mayHaveNonDefUseDependency(const Instruction &I)
Returns true if the result or effects of the given instructions I depend values not reachable through...
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.