LLVM: lib/Transforms/Scalar/Scalarizer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
43#include
44#include
45#include
46#include
47#include
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
53namespace {
54
57 if (isa(Itr))
59 if (Itr != BB->end())
61 return Itr;
62}
63
64
66
67
68
69
70
71
72
73using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
74
75
76
78
79struct VectorSplit {
80
82
83
84 unsigned NumPacked = 0;
85
86
87
88 unsigned NumFragments = 0;
89
90
91 Type *SplitTy = nullptr;
92
93
94
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102
103
104class Scatterer {
105public:
106 Scatterer() = default;
107
108
109
110
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114
115 Value *operator[](unsigned I);
116
117
118 unsigned size() const { return VS.NumFragments; }
119
120private:
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130
131
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
137 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
138 }
139
141};
142
143
144
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
150 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
151 }
152
154};
155
156
157
158struct UnarySplitter {
160
163 }
164
166};
167
168
169
170struct BinarySplitter {
172
175 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
176 }
177
179};
180
181
182struct VectorLayout {
183 VectorLayout() = default;
184
185
186 Align getFragmentAlign(unsigned Frag) {
188 }
189
190
191 VectorSplit VS;
192
193
195
196
198};
199
200static bool isStructOfMatchingFixedVectors(Type *Ty) {
201 if (!isa(Ty))
202 return false;
204 if (StructSize < 1)
205 return false;
207 if (!VecTy)
208 return false;
210 for (unsigned I = 1; I < StructSize; I++) {
213 return false;
214 }
215 return true;
216}
217
218
219
221 const VectorSplit &VS, Twine Name) {
222 unsigned NumElements = VS.VecTy->getNumElements();
225
226 if (VS.NumPacked > 1) {
227
228
229 ExtendMask.resize(NumElements, -1);
230 for (unsigned I = 0; I < VS.NumPacked; ++I)
232
233 InsertMask.resize(NumElements);
234 for (unsigned I = 0; I < NumElements; ++I)
236 }
237
239 for (unsigned I = 0; I < VS.NumFragments; ++I) {
240 Value *Fragment = Fragments[I];
241
242 unsigned NumPacked = VS.NumPacked;
243 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
244 if (auto *RemVecTy = dyn_cast(VS.RemainderTy))
245 NumPacked = RemVecTy->getNumElements();
246 else
247 NumPacked = 1;
248 }
249
250 if (NumPacked == 1) {
253 } else {
255 if (I == 0) {
256 Res = Fragment;
257 } else {
258 for (unsigned J = 0; J < NumPacked; ++J)
259 InsertMask[I * VS.NumPacked + J] = NumElements + J;
262 for (unsigned J = 0; J < NumPacked; ++J)
263 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
264 }
265 }
266 }
267
268 return Res;
269}
270
271class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
272public:
276 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
277 ScalarizeLoadStore(Options.ScalarizeLoadStore),
278 ScalarizeMinBits(Options.ScalarizeMinBits) {}
279
281
282
283
302
303private:
304 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
305 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
307 bool canTransferMetadata(unsigned Kind);
308 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
309 std::optional getVectorSplit(Type *Ty);
310 std::optional getVectorLayout(Type *Ty, Align Alignment,
312 bool finish();
313
314 template bool splitUnary(Instruction &, const T &);
315 template bool splitBinary(Instruction &, const T &);
316
317 bool splitCall(CallInst &CI);
318
319 ScatterMap Scattered;
320 GatherList Gathered;
321 bool Scalarized;
322
324
327
328 const bool ScalarizeVariableInsertExtract;
329 const bool ScalarizeLoadStore;
330 const unsigned ScalarizeMinBits;
331};
332
333class ScalarizerLegacyPass : public FunctionPass {
334public:
335 static char ID;
341};
342
343}
344
347
348void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
352}
353
354char ScalarizerLegacyPass::ID = 0;
356 "Scalarize vector operations", false, false)
360
362 const VectorSplit &VS, ValueVector *cachePtr)
363 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
364 IsPointer = V->getType()->isPointerTy();
365 if (!CachePtr) {
366 Tmp.resize(VS.NumFragments, nullptr);
367 } else {
368 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
369 IsPointer) &&
370 "Inconsistent vector sizes");
371 if (VS.NumFragments > CachePtr->size())
372 CachePtr->resize(VS.NumFragments, nullptr);
373 }
374}
375
376
377Value *Scatterer::operator[](unsigned Frag) {
378 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
379
380 if (CV[Frag])
381 return CV[Frag];
383 if (IsPointer) {
384 if (Frag == 0)
385 CV[Frag] = V;
386 else
387 CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
388 V->getName() + ".i" + Twine(Frag));
389 return CV[Frag];
390 }
391
392 Type *FragmentTy = VS.getFragmentType(Frag);
393
394 if (auto *VecTy = dyn_cast(FragmentTy)) {
396 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
397 Mask.push_back(Frag * VS.NumPacked + J);
398 CV[Frag] =
399 Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
400 V->getName() + ".i" + Twine(Frag));
401 } else {
402
403
404
405 while (true) {
407 if (!Insert)
408 break;
410 if ()
411 break;
412 unsigned J = Idx->getZExtValue();
413 V = Insert->getOperand(0);
414 if (Frag * VS.NumPacked == J) {
415 CV[Frag] = Insert->getOperand(1);
416 return CV[Frag];
417 }
418
419 if (VS.NumPacked == 1 && !CV[J]) {
420
421
422
423 CV[J] = Insert->getOperand(1);
424 }
425 }
426 CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
427 V->getName() + ".i" + Twine(Frag));
428 }
429
430 return CV[Frag];
431}
432
433bool ScalarizerLegacyPass::runOnFunction(Function &F) {
434 if (skipFunction(F))
435 return false;
436
437 DominatorTree *DT = &getAnalysis().getDomTree();
439 &getAnalysis().getTTI(F);
440 ScalarizerVisitor Impl(DT, TTI, Options);
441 return Impl.visit(F);
442}
443
445 return new ScalarizerLegacyPass(Options);
446}
447
448bool ScalarizerVisitor::visit(Function &F) {
449 assert(Gathered.empty() && Scattered.empty());
450
451 Scalarized = false;
452
453
454
460 ++II;
461 if (Done && I->getType()->isVoidTy())
462 I->eraseFromParent();
463 }
464 }
465 return finish();
466}
467
468
469
470Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
471 const VectorSplit &VS) {
472 if (Argument *VArg = dyn_cast(V)) {
473
474
477 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
478 }
479 if (Instruction *VOp = dyn_cast(V)) {
480
481
482
483
484
485
489
490
492 return Scatterer(
494 &Scattered[{V, VS.SplitTy}]);
495 }
496
497
499}
500
501
502
503
504
505void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
506 const VectorSplit &VS) {
507 transferMetadataAndIRFlags(Op, CV);
508
509
510
511 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
512 if (!SV.empty()) {
513 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
515 if (V == nullptr || SV[I] == CV[I])
516 continue;
517
519 if (isa(CV[I]))
520 CV[I]->takeName(Old);
522 PotentiallyDeadInstrs.emplace_back(Old);
523 }
524 }
525 SV = CV;
526 Gathered.push_back(GatherList::value_type(Op, &SV));
527}
528
529
531 if (CV != Op) {
532 Op->replaceAllUsesWith(CV);
533 PotentiallyDeadInstrs.emplace_back(Op);
534 Scalarized = true;
535 }
536}
537
538
539
540bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
541 return (Tag == LLVMContext::MD_tbaa
542 || Tag == LLVMContext::MD_fpmath
543 || Tag == LLVMContext::MD_tbaa_struct
544 || Tag == LLVMContext::MD_invariant_load
545 || Tag == LLVMContext::MD_alias_scope
546 || Tag == LLVMContext::MD_noalias
547 || Tag == LLVMContext::MD_mem_parallel_loop_access
548 || Tag == LLVMContext::MD_access_group);
549}
550
551
552
553void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
554 const ValueVector &CV) {
556 Op->getAllMetadataOtherThanDebugLoc(MDs);
557 for (Value *V : CV) {
558 if (Instruction *New = dyn_cast(V)) {
559 for (const auto &MD : MDs)
560 if (canTransferMetadata(MD.first))
561 New->setMetadata(MD.first, MD.second);
563 if (Op->getDebugLoc() && ->getDebugLoc())
564 New->setDebugLoc(Op->getDebugLoc());
565 }
566 }
567}
568
569
570std::optional ScalarizerVisitor::getVectorSplit(Type *Ty) {
571 VectorSplit Split;
572 Split.VecTy = dyn_cast(Ty);
573 if (.VecTy)
574 return {};
575
576 unsigned NumElems = Split.VecTy->getNumElements();
577 Type *ElemTy = Split.VecTy->getElementType();
578
579 if (NumElems == 1 || ElemTy->isPointerTy() ||
581 Split.NumPacked = 1;
582 Split.NumFragments = NumElems;
583 Split.SplitTy = ElemTy;
584 } else {
586 if (Split.NumPacked >= NumElems)
587 return {};
588
591
592 unsigned RemainderElems = NumElems % Split.NumPacked;
593 if (RemainderElems > 1)
595 else if (RemainderElems == 1)
596 Split.RemainderTy = ElemTy;
597 }
598
600}
601
602
603
604
605std::optional
606ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
608 std::optional VS = getVectorSplit(Ty);
609 if (!VS)
610 return {};
611
613 Layout.VS = *VS;
614
615 if (.typeSizeEqualsStoreSize(VS->SplitTy) ||
616 (VS->RemainderTy && .typeSizeEqualsStoreSize(VS->RemainderTy)))
617 return {};
618 Layout.VecAlign = Alignment;
619 Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
620 return Layout;
621}
622
623
624
625template
626bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
627 std::optional VS = getVectorSplit(I.getType());
628 if (!VS)
629 return false;
630
631 std::optional OpVS;
632 if (I.getOperand(0)->getType() == I.getType()) {
633 OpVS = VS;
634 } else {
635 OpVS = getVectorSplit(I.getOperand(0)->getType());
636 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
637 return false;
638 }
639
641 Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
642 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
643 ValueVector Res;
644 Res.resize(VS->NumFragments);
645 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
646 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
647 gather(&I, Res, *VS);
648 return true;
649}
650
651
652
653template
654bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
655 std::optional VS = getVectorSplit(I.getType());
656 if (!VS)
657 return false;
658
659 std::optional OpVS;
660 if (I.getOperand(0)->getType() == I.getType()) {
661 OpVS = VS;
662 } else {
663 OpVS = getVectorSplit(I.getOperand(0)->getType());
664 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
665 return false;
666 }
667
669 Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
670 Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
671 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
672 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
673 ValueVector Res;
674 Res.resize(VS->NumFragments);
675 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
676 Value *Op0 = VOp0[Frag];
677 Value *Op1 = VOp1[Frag];
678 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
679 }
680 gather(&I, Res, *VS);
681 return true;
682}
683
684
685
686bool ScalarizerVisitor::splitCall(CallInst &CI) {
688 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(CallType);
689 std::optional VS;
690 if (AreAllVectorsOfMatchingSize)
692 else
693 VS = getVectorSplit(CallType);
694 if (!VS)
695 return false;
696
698 if ()
699 return false;
700
702
704 return false;
705
706
707 unsigned NumArgs = CI.arg_size();
708
709 ValueVector ScalarOperands(NumArgs);
712
714
716 Tys.push_back(VS->SplitTy);
717
718 if (AreAllVectorsOfMatchingSize) {
720 std::optional CurrVS =
721 getVectorSplit(cast(CallType->getContainedType(I)));
722
723
724
725
726
727
728
729 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
730 return false;
732 Tys.push_back(CurrVS->SplitTy);
733 }
734 }
735
736
737 for (unsigned I = 0; I != NumArgs; ++I) {
739 if ([[maybe_unused]] auto *OpVecTy =
740 dyn_cast(OpI->getType())) {
741 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
742 std::optional OpVS = getVectorSplit(OpI->getType());
743 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
744
745
746
747
748
749
750
751 return false;
752 }
753
754 Scattered[I] = scatter(&CI, OpI, *OpVS);
756 OverloadIdx[I] = Tys.size();
757 Tys.push_back(OpVS->SplitTy);
758 }
759 } else {
760 ScalarOperands[I] = OpI;
763 }
764 }
765
766 ValueVector Res(VS->NumFragments);
767 ValueVector ScalarCallOps(NumArgs);
768
772
773
774 for (unsigned I = 0; I < VS->NumFragments; ++I) {
775 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
776 ScalarCallOps.clear();
777
778 if (IsRemainder)
780
781 for (unsigned J = 0; J != NumArgs; ++J) {
783 ScalarCallOps.push_back(ScalarOperands[J]);
784 } else {
785 ScalarCallOps.push_back(Scattered[J][I]);
786 if (IsRemainder && OverloadIdx[J] >= 0)
787 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
788 }
789 }
790
791 if (IsRemainder)
793
794 Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
796 }
797
798 gather(&CI, Res, *VS);
799 return true;
800}
801
802bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
803 std::optional VS = getVectorSplit(SI.getType());
804 if (!VS)
805 return false;
806
807 std::optional CondVS;
808 if (isa(SI.getCondition()->getType())) {
809 CondVS = getVectorSplit(SI.getCondition()->getType());
810 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
811
812 return false;
813 }
814 }
815
817 Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
818 Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
819 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
820 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
821 ValueVector Res;
822 Res.resize(VS->NumFragments);
823
824 if (CondVS) {
825 Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
826 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
827 for (unsigned I = 0; I < VS->NumFragments; ++I) {
831 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
832 SI.getName() + ".i" + Twine(I));
833 }
834 } else {
835 Value *Op0 = SI.getOperand(0);
836 for (unsigned I = 0; I < VS->NumFragments; ++I) {
839 Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
840 SI.getName() + ".i" + Twine(I));
841 }
842 }
843 gather(&SI, Res, *VS);
844 return true;
845}
846
847bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
848 return splitBinary(ICI, ICmpSplitter(ICI));
849}
850
851bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
852 return splitBinary(FCI, FCmpSplitter(FCI));
853}
854
855bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
856 return splitUnary(UO, UnarySplitter(UO));
857}
858
859bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
860 return splitBinary(BO, BinarySplitter(BO));
861}
862
863bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
864 std::optional VS = getVectorSplit(GEPI.getType());
865 if (!VS)
866 return false;
867
870
871
874
875 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
876 if (auto *VecTy =
878 std::optional OpVS = getVectorSplit(VecTy);
879 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
880
881 return false;
882 }
883 ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
884 } else {
886 }
887 }
888
889 ValueVector Res;
890 Res.resize(VS->NumFragments);
891 for (unsigned I = 0; I < VS->NumFragments; ++I) {
893 SplitOps.resize(1 + NumIndices);
894 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
895 if (ScalarOps[J])
896 SplitOps[J] = ScalarOps[J];
897 else
898 SplitOps[J] = ScatterOps[J][I];
899 }
901 ArrayRef(SplitOps).drop_front(),
904 if (GetElementPtrInst *NewGEPI = dyn_cast(Res[I]))
905 NewGEPI->setIsInBounds();
906 }
907 gather(&GEPI, Res, *VS);
908 return true;
909}
910
911bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
912 std::optional DestVS = getVectorSplit(CI.getDestTy());
913 if (!DestVS)
914 return false;
915
916 std::optional SrcVS = getVectorSplit(CI.getSrcTy());
917 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
918 return false;
919
921 Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
922 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
923 ValueVector Res;
924 Res.resize(DestVS->NumFragments);
925 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
926 Res[I] =
927 Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
929 gather(&CI, Res, *DestVS);
930 return true;
931}
932
933bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
934 std::optional DstVS = getVectorSplit(BCI.getDestTy());
935 std::optional SrcVS = getVectorSplit(BCI.getSrcTy());
936 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
937 return false;
938
939 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
940
941
942 assert( || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
943
945 Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
946 ValueVector Res;
947 Res.resize(DstVS->NumFragments);
948
949 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
950 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
951
952 if (isPointerTy || DstSplitBits == SrcSplitBits) {
953 assert(DstVS->NumFragments == SrcVS->NumFragments);
954 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
955 Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
957 }
958 } else if (SrcSplitBits % DstSplitBits == 0) {
959
960
961 VectorSplit MidVS;
962 MidVS.NumPacked = DstVS->NumPacked;
963 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
965 MidVS.NumPacked * MidVS.NumFragments);
966 MidVS.SplitTy = DstVS->SplitTy;
967
968 unsigned ResI = 0;
969 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
971
972
973
975 while ((VI = dyn_cast(V)) &&
976 VI->getOpcode() == Instruction::BitCast)
978
979 V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
980
981 Scatterer Mid = scatter(&BCI, V, MidVS);
982 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
983 Res[ResI++] = Mid[J];
984 }
985 } else if (DstSplitBits % SrcSplitBits == 0) {
986
987
988 VectorSplit MidVS;
989 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
990 MidVS.NumPacked = SrcVS->NumPacked;
992 MidVS.NumPacked * MidVS.NumFragments);
993 MidVS.SplitTy = SrcVS->SplitTy;
994
995 unsigned SrcI = 0;
997 ConcatOps.resize(MidVS.NumFragments);
998 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
999 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1000 ConcatOps[J] = Op0[SrcI++];
1001 Value *V = concatenate(Builder, ConcatOps, MidVS,
1003 Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
1005 }
1006 } else {
1007 return false;
1008 }
1009
1010 gather(&BCI, Res, *DstVS);
1011 return true;
1012}
1013
1014bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1015 std::optional VS = getVectorSplit(IEI.getType());
1016 if (!VS)
1017 return false;
1018
1020 Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
1023
1024 ValueVector Res;
1025 Res.resize(VS->NumFragments);
1026
1027 if (auto *CI = dyn_cast(InsIdx)) {
1028 unsigned Idx = CI->getZExtValue();
1029 unsigned Fragment = Idx / VS->NumPacked;
1030 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1031 if (I == Fragment) {
1033 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1034 ->RemainderTy->isVectorTy())
1036 if (IsPacked) {
1037 Res[I] =
1038 Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
1039 } else {
1040 Res[I] = NewElt;
1041 }
1042 } else {
1044 }
1045 }
1046 } else {
1047
1048 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1049 return false;
1050
1051 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1052 Value *ShouldReplace =
1053 Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
1056 Res[I] = Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1058 }
1059 }
1060
1061 gather(&IEI, Res, *VS);
1062 return true;
1063}
1064
1065bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1067 Type *OpTy = Op->getType();
1068 ValueVector Res;
1069 if (!isStructOfMatchingFixedVectors(OpTy))
1070 return false;
1071 if (CallInst *CI = dyn_cast(Op)) {
1072 Function *F = CI->getCalledFunction();
1073 if ()
1074 return false;
1077 return false;
1078
1079
1080 } else
1081 return false;
1083 std::optional VS = getVectorSplit(VecType);
1084 if (!VS)
1085 return false;
1087 Scatterer Op0 = scatter(&EVI, Op, *VS);
1089
1091 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1092 Value *ResElem = Builder.CreateExtractValue(
1093 Op0[OpIdx], Index, EVI.getName() + ".elem" + Twine(Index));
1094 Res.push_back(ResElem);
1095 }
1096
1097 gather(&EVI, Res, *VS);
1098 return true;
1099}
1100
1101bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1102 std::optional VS = getVectorSplit(EEI.getOperand(0)->getType());
1103 if (!VS)
1104 return false;
1105
1107 Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
1109
1110 if (auto *CI = dyn_cast(ExtIdx)) {
1111 unsigned Idx = CI->getZExtValue();
1112 unsigned Fragment = Idx / VS->NumPacked;
1113 Value *Res = Op0[Fragment];
1115 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1116 ->RemainderTy->isVectorTy())
1118 if (IsPacked)
1119 Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
1120 replaceUses(&EEI, Res);
1121 return true;
1122 }
1123
1124
1125 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1126 return false;
1127
1129 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1130 Value *ShouldExtract =
1131 Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
1134 Res = Builder.CreateSelect(ShouldExtract, Elt, Res,
1136 }
1137 replaceUses(&EEI, Res);
1138 return true;
1139}
1140
1141bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1142 std::optional VS = getVectorSplit(SVI.getType());
1143 std::optional VSOp =
1145 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1146 return false;
1147
1148 Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
1149 Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
1150 ValueVector Res;
1151 Res.resize(VS->NumFragments);
1152
1153 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1155 if (Selector < 0)
1157 else if (unsigned(Selector) < Op0.size())
1158 Res[I] = Op0[Selector];
1159 else
1160 Res[I] = Op1[Selector - Op0.size()];
1161 }
1162 gather(&SVI, Res, *VS);
1163 return true;
1164}
1165
1166bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1167 std::optional VS = getVectorSplit(PHI.getType());
1168 if (!VS)
1169 return false;
1170
1172 ValueVector Res;
1173 Res.resize(VS->NumFragments);
1174
1175 unsigned NumOps = PHI.getNumOperands();
1176 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1177 Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
1178 PHI.getName() + ".i" + Twine(I));
1179 }
1180
1181 for (unsigned I = 0; I < NumOps; ++I) {
1182 Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
1183 BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
1184 for (unsigned J = 0; J < VS->NumFragments; ++J)
1185 cast(Res[J])->addIncoming(Op[J], IncomingBlock);
1186 }
1187 gather(&PHI, Res, *VS);
1188 return true;
1189}
1190
1191bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1192 if (!ScalarizeLoadStore)
1193 return false;
1195 return false;
1196
1197 std::optional Layout = getVectorLayout(
1199 if (!Layout)
1200 return false;
1201
1204 ValueVector Res;
1205 Res.resize(Layout->VS.NumFragments);
1206
1207 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1208 Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
1209 Align(Layout->getFragmentAlign(I)),
1211 }
1212 gather(&LI, Res, Layout->VS);
1213 return true;
1214}
1215
1216bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1217 if (!ScalarizeLoadStore)
1218 return false;
1219 if (.isSimple())
1220 return false;
1221
1222 Value *FullValue = SI.getValueOperand();
1223 std::optional Layout = getVectorLayout(
1224 FullValue->getType(), SI.getAlign(), SI.getDataLayout());
1225 if (!Layout)
1226 return false;
1227
1229 Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
1230 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1231
1232 ValueVector Stores;
1233 Stores.resize(Layout->VS.NumFragments);
1234 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1237 Stores[I] =
1238 Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
1239 }
1240 transferMetadataAndIRFlags(&SI, Stores);
1241 return true;
1242}
1243
1244bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1245 return splitCall(CI);
1246}
1247
1248bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1251 });
1252}
1253
1254
1255
1256bool ScalarizerVisitor::finish() {
1257
1258
1259 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1260 return false;
1261 for (const auto &GMI : Gathered) {
1263 ValueVector &CV = *GMI.second;
1264 if (->use_empty()) {
1265
1266
1268 if (auto *Ty = dyn_cast(Op->getType())) {
1271 if (isa(Op))
1273
1274 VectorSplit VS = *getVectorSplit(Ty);
1275 assert(VS.NumFragments == CV.size());
1276
1277 Res = concatenate(Builder, CV, VS, Op->getName());
1278
1280 } else if (auto *Ty = dyn_cast(Op->getType())) {
1283 if (isa(Op))
1285
1286
1287 unsigned NumOfStructElements = Ty->getNumElements();
1289 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1290 for (auto *CVelem : CV) {
1292 CVelem, I, Op->getName() + ".elem" + Twine(I));
1293 ElemCV[I].push_back(Elem);
1294 }
1295 }
1297 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1298 Type *ElemTy = Ty->getElementType(I);
1299 assert(isa(ElemTy) &&
1300 "Only Structs of all FixedVectorType supported");
1301 VectorSplit VS = *getVectorSplit(ElemTy);
1302 assert(VS.NumFragments == CV.size());
1303
1304 Value *ConcatenatedVector =
1305 concatenate(Builder, ElemCV[I], VS, Op->getName());
1307 Op->getName() + ".insert");
1308 }
1309 } else {
1310 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1311 Res = CV[0];
1312 if (Op == Res)
1313 continue;
1314 }
1315 Op->replaceAllUsesWith(Res);
1316 }
1317 PotentiallyDeadInstrs.emplace_back(Op);
1318 }
1319 Gathered.clear();
1320 Scattered.clear();
1321 Scalarized = false;
1322
1324
1325 return true;
1326}
1327
1331 ScalarizerVisitor Impl(DT, TTI, Options);
1332 bool Changed = Impl.visit(F);
1336}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Scalarize vector operations
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Type * getSrcTy() const
Return the source type, as a convenience.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getDestTy() const
Return the destination type, as a convenience.
This is the shared class of boolean and integer constants.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateUnOp(Instruction::UnaryOps Opc, Value *V, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
RetTy visitFreezeInst(FreezeInst &I)
RetTy visitFCmpInst(FCmpInst &I)
RetTy visitExtractElementInst(ExtractElementInst &I)
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
RetTy visitBitCastInst(BitCastInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitPHINode(PHINode &I)
RetTy visitExtractValueInst(ExtractValueInst &I)
RetTy visitUnaryOperator(UnaryOperator &I)
RetTy visitStoreInst(StoreInst &I)
RetTy visitInsertElementInst(InsertElementInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitICmpInst(ICmpInst &I)
RetTy visitCallInst(CallInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitSelectInst(SelectInst &I)
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getNumContainedTypes() const
Return the number of types in the derived type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
ID ArrayRef< Type * > Tys
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
This is an optimization pass for GlobalISel generic memory operations.
bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
bool isPointerTy(const Type *T)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
FunctionPass * createScalarizerPass(const ScalarizerPassOptions &Options=ScalarizerPassOptions())
Create a legacy pass manager instance of the Scalarizer pass.
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
This struct is a compact representation of a valid (non-zero power of two) alignment.