LLVM: lib/Analysis/IRSimilarityIdentifier.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
37 cl::desc("disable outlining indirect calls."));
38
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
46 cl::desc("Don't match or outline intrinsics"));
47}
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56
57
58
61 if (Predicate != C->getPredicate())
63 }
64
65
66
69
70
72 continue;
73 }
74
76 }
77
78
79
80 if (PHINode *PN = dyn_cast(Inst))
83}
84
86 : IDL(&IDList) {}
87
90 assert(isa(Inst) && "Instruction must be branch");
91
94
95 BBNumIt = BasicBlockToInteger.find(BI->getParent());
96 assert(BBNumIt != BasicBlockToInteger.end() &&
97 "Could not find location for BasicBlock!");
98
99 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
100
104 assert(BBNumIt != BasicBlockToInteger.end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
107
108 int Relative = OtherBlockNumber - CurrentBlockNumber;
110 }
111}
112
115 isa(Inst)) && "Instruction must be branch or PHINode");
116
119 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
121 );
122
123 if (PHINode *PN = dyn_cast(Inst))
125 std::next(OperVals.begin(), PN->getNumIncomingValues()),
127 );
128
130}
131
134 assert(CI && "Instruction must be call");
135
138
139
140
143
144
148
149 else
151
152 return;
153 }
154
157}
158
161 assert(isa(Inst) && "Instruction must be phi node");
162
165
166 BBNumIt = BasicBlockToInteger.find(PN->getParent());
167 assert(BBNumIt != BasicBlockToInteger.end() &&
168 "Could not find location for BasicBlock!");
169
170 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
171
172
173
176 BBNumIt = BasicBlockToInteger.find(Incoming);
177 assert(BBNumIt != BasicBlockToInteger.end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
180
181 int Relative = OtherBlockNumber - CurrentBlockNumber;
183 }
184}
185
197 default:
199 }
200}
201
204 "Can only get a predicate from a compare instruction");
205
208
209 return cast(Inst)->getPredicate();
210}
211
214 "Can only get a name from a call instruction");
215
217
219}
220
223
224 if (.Legal ||
.Legal)
225 return false;
226
227
228
229 if (.Inst->isSameOperationAs(B.Inst)) {
230
231
232
233 if (isa(A.Inst) && isa(B.Inst)) {
234
235 if (A.getPredicate() != B.getPredicate())
236 return false;
237
238
239
240 auto ZippedTypes = zip(A.OperVals, B.OperVals);
241
243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
245 });
246 }
247
248 return false;
249 }
250
251
252
253
254 if (auto *GEP = dyn_cast(A.Inst)) {
255 auto *OtherGEP = cast(B.Inst);
256
257
258
259 if (GEP->isInBounds() != OtherGEP->isInBounds())
260 return false;
261
262 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
263
264
265
266
268 [](std::tuple<llvm::Use &, llvm::Use &> R) {
269 return std::get<0>(R) == std::get<1>(R);
270 });
271 }
272
273
274
275
276 if (isa(A.Inst) && isa(B.Inst)) {
277 if (A.getCalleeName() != B.getCalleeName())
278 return false;
279 }
280
281 if (isa(A.Inst) && isa(B.Inst) &&
282 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
283 return false;
284
285 return true;
286}
287
288
289
291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292 std::vector &IntegerMapping) {
294
295 std::vector IntegerMappingForBB;
296 std::vector<IRInstructionData *> InstrListForBB;
297
302 break;
305 break;
308 break;
309 }
310 }
311
318}
319
320
321
324 std::vector<IRInstructionData *> &InstrListForBB) {
325
326
328
329
330
334
335
336
338 InstrListForBB.push_back(ID);
339
340 if (isa(*It))
342
343 if (isa(*It))
345
346 if (isa(*It))
348
349
350 bool WasInserted;
352 ResultIt;
353 std::tie(ResultIt, WasInserted) =
355 unsigned INumber = ResultIt->second;
356
357
358 if (WasInserted)
360
361 IntegerMappingForBB.push_back(INumber);
362
363
365 "Instruction mapping overflow!");
366
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
371
372 return INumber;
373}
374
379}
380
384}
385
389}
390
391
392
395 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
396
398
399
402
404 if ()
406 else
408 InstrListForBB.push_back(ID);
409
410
414
416 "Instruction mapping overflow!");
417
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
420
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
423
424 return INumber;
425}
426
430 : StartIdx(StartIdx), Len(Len) {
431
432 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
433 assert(LastInstIt != nullptr && "Instruction is nullptr!");
434 assert(StartIdx + Len > StartIdx &&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len - 1 == static_cast<unsigned>(std::distance(
438 "Length of the first and last IRInstructionData do not match the "
439 "given length");
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456 unsigned LocalValNumber = 1;
458 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
459
460
461 for (Value *Arg : ID->OperVals)
462 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
463 NumberToValue.try_emplace(LocalValNumber, Arg);
464 LocalValNumber++;
465 }
466
467
468
469 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
470 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
471 LocalValNumber++;
472 }
473 }
474
475
476
477
478 FirstInst = FirstInstIt;
479 LastInst = LastInstIt;
480
481
485 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
486 NumberToValue.try_emplace(LocalValNumber, BB);
487 LocalValNumber++;
488 }
489 }
490}
491
494 if (A.getLength() != B.getLength())
495 return false;
496
497 auto InstrDataForBoth =
499
500 return all_of(InstrDataForBoth,
501 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
504 if (.Legal ||
.Legal)
505 return false;
507 });
508}
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
532
534
535 unsigned ArgVal;
536 bool WasInserted;
537
538
539
540
541
542 for (Value *V : SourceOperands) {
543 ArgVal = SourceValueToNumberMapping.find(V)->second;
544
545
546 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
547 std::make_pair(ArgVal, TargetValueNumbers));
548
549
550
551
553 for (unsigned &Curr : ValueMappingIt->second)
554
555 if (TargetValueNumbers.contains(Curr))
557
558
559 if (NewSet.empty())
560 return false;
561
562
563 if (NewSet.size() != ValueMappingIt->second.size())
564 ValueMappingIt->second.swap(NewSet);
565
566
567
568 if (ValueMappingIt->second.size() != 1)
569 continue;
570
571 unsigned ValToRemove = *ValueMappingIt->second.begin();
572
573
574
575 for (Value *InnerV : SourceOperands) {
576 if (V == InnerV)
577 continue;
578
579 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
580 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
581 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
582 continue;
583
584 ValueMappingIt->second.erase(ValToRemove);
585 if (ValueMappingIt->second.empty())
586 return false;
587 }
588 }
589
590 return true;
591}
592
593
594
595
596
597
598
599
600
601
602
603
606 unsigned SourceArgVal, unsigned TargetArgVal) {
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622 bool WasInserted;
624
625 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
627
628
629 if (WasInserted)
630 return true;
631
632
633
634
635
636
638 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
639 TargetSet.clear();
640 TargetSet.insert(TargetArgVal);
641 return true;
642 }
643
644
645 return TargetSet.contains(TargetArgVal);
646}
647
650
651
654 unsigned OperandLength = A.OperVals.size();
655
656
657 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
658 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
659 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
660
661
662
663
664
665
666
667
668
669
670
671
672
674 return false;
675
677 return false;
678 }
679 return true;
680}
681
686
689 unsigned OperandLength = A.OperVals.size();
690
691
692 for (unsigned Idx = 0; Idx < OperandLength;
693 Idx++, VItA++, VItB++) {
694 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
695 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
696 }
697
698
699
700
702 A.ValueNumberMapping, A.OperVals,
703 ValueNumbersB))
704 return false;
705
706
707
708
710 B.ValueNumberMapping, B.OperVals,
711 ValueNumbersA))
712 return false;
713
714 return true;
715}
716
718 const unsigned InstValA, const unsigned &InstValB,
722 bool WasInserted;
723 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
725 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
726 return false;
727 else if (ValueMappingIt->second.size() != 1) {
728 for (unsigned OtherVal : ValueMappingIt->second) {
729 if (OtherVal == InstValB)
730 continue;
731 if (!ValueNumberMappingA.contains(OtherVal))
732 continue;
733 if (!ValueNumberMappingA[OtherVal].contains(InstValA))
734 continue;
735 ValueNumberMappingA[OtherVal].erase(InstValA);
736 }
737 ValueNumberMappingA.erase(ValueMappingIt);
738 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
740 }
741
742 return true;
743}
744
747
748 BasicBlock *ABB = cast(A.OperVal);
749 BasicBlock *BBB = cast(B.OperVal);
750
751
754 A.IRSC.getBasicBlocks(BasicBlockA);
755 B.IRSC.getBasicBlocks(BasicBlockB);
756
757
758 bool AContained = BasicBlockA.contains(ABB);
759 bool BContained = BasicBlockB.contains(BBB);
760
761
762
763 if (AContained != BContained)
764 return false;
765
766
767
768 if (AContained)
769 return A.RelativeLocation == B.RelativeLocation;
770 return true;
771}
772
778}
779
784
789 if (A.getLength() != B.getLength())
790 return false;
791
792 if (A.ValueToNumber.size() != B.ValueToNumber.size())
793 return false;
794
797
798
799
800
801
802
803
804 unsigned SectionLength = A.getStartIdx() + A.getLength();
805 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
806 ItA++, ItB++, Loc++) {
807
808 if ((*ItA, *ItB))
809 return false;
810
813
814 if (!ItA->Legal || !ItB->Legal)
815 return false;
816
817
820
821 unsigned InstValA = A.ValueToNumber.find(IA)->second;
822 unsigned InstValB = B.ValueToNumber.find(IB)->second;
823
824
826 ValueNumberMappingB))
827 return false;
828
830 ValueNumberMappingA))
831 return false;
832
833
834
835
836 if (IA->isCommutative() && !isa(IA) &&
837 !isa(IA)) {
839 {A, OperValsA, ValueNumberMappingA},
840 {B, OperValsB, ValueNumberMappingB}))
841 return false;
842 continue;
843 }
844
845
847 {A, OperValsA, ValueNumberMappingA},
848 {B, OperValsB, ValueNumberMappingB}))
849 return false;
850
851
852
853
854
855
856
857
858
859
860
861
862 if (!(isa(IA) && isa(IB)) &&
863 !(isa(IA) && isa(IB)))
864 continue;
865
870
871
872
873 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
875 return false;
876
878 "Block information vectors not the same size.");
880 "Block information vectors not the same size.");
881
883 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
884 if (any_of(ZippedRelativeLocations,
885 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
887 {A, std::get<0>(R), std::get<2>(R)},
888 {B, std::get<1>(R), std::get<3>(R)});
889 }))
890 return false;
891 }
892 return true;
893}
894
899
900
901
902 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
903 };
904
905 return DoesOverlap(A, B) || DoesOverlap(B, A);
906}
907
908void IRSimilarityIdentifier::populateMapper(
909 Module &M, std::vector<IRInstructionData *> &InstrList,
910 std::vector &IntegerMapping) {
911
912 std::vector<IRInstructionData *> InstrListForModule;
913 std::vector IntegerMappingForModule;
914
915
917
919
920 if (F.empty())
921 continue;
922
924
925
926
927
929 IntegerMappingForModule);
930 }
931
934 true);
935 if (InstrListForModule.size() > 0)
936 Mapper.IDL->push_back(*InstrListForModule.back());
937 }
938
939
940
942
944}
945
946void IRSimilarityIdentifier::populateMapper(
947 ArrayRef<std::unique_ptr> &Modules,
948 std::vector<IRInstructionData *> &InstrList,
949 std::vector &IntegerMapping) {
950
951
952 for (const std::unique_ptr &M : Modules)
953 populateMapper(*M, InstrList, IntegerMapping);
954}
955
956
957
958
959
960
961
962
963
964
966 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
968 std::vector &CandsForRepSubstring) {
969
970 unsigned StringLen = RS.Length;
971 if (StringLen < 2)
972 return;
973
974
975 for (const unsigned &StartIdx : RS.StartIndices) {
976 unsigned EndIdx = StartIdx + StringLen - 1;
977
978
979 bool ContainsIllegal = false;
980 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
981 unsigned Key = IntegerMapping[CurrIdx];
983 ContainsIllegal = true;
984 break;
985 }
986 }
987
988
989
990 if (ContainsIllegal)
991 continue;
992
993
994
995
996 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
997 std::advance(StartIt, StartIdx);
998 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
999 std::advance(EndIt, EndIdx);
1000
1001 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1002 }
1003}
1004
1009 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1010 "Base canonical relationship is empty!");
1011 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1012 "Base canonical relationship is empty!");
1013
1014 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1015 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1016
1018
1019
1020
1021 for (std::pair<unsigned, DenseSet> &GVNMapping : ToSourceMapping) {
1022 unsigned SourceGVN = GVNMapping.first;
1023
1024 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1025
1026 unsigned ResultGVN;
1027
1028
1029
1030
1031 if (GVNMapping.second.size() > 1) {
1032 bool Found = false;
1033 for (unsigned Val : GVNMapping.second) {
1034
1036 continue;
1037
1038
1040 FromSourceMapping.find(Val);
1041
1042 if (!It->second.contains(SourceGVN))
1043 continue;
1044
1045
1046 Found = true;
1047 ResultGVN = Val;
1048 break;
1049 }
1050
1051 assert(Found && "Could not find matching value for source GVN");
1052 (void)Found;
1053
1054 } else
1055 ResultGVN = *GVNMapping.second.begin();
1056
1057
1058 UsedGVNs.insert(ResultGVN);
1059
1060 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1061 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1062 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1063 }
1064
1067
1068
1069
1070
1071
1072
1074 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1075
1076
1077
1078 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1079 continue;
1080
1081
1082
1083
1086 : &*BB->instructionsWithoutDebug().begin();
1087
1088 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1089 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1090 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1091 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1093 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1094 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1095 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1096 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1097 }
1098}
1099
1103 assert(!SourceCand.CanonNumToNumber.empty() &&
1104 "Canonical Relationship is non-empty");
1105 assert(!SourceCand.NumberToCanonNum.empty() &&
1106 "Canonical Relationship is non-empty");
1107
1108 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1109 "Canonical Relationship is non-empty");
1110 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1111 "Canonical Relationship is non-empty");
1112
1113 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1114 "Canonical Relationship is non-empty");
1115 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1116 "Canonical Relationship is non-empty");
1117
1118 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1119 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1120
1121
1122
1123
1124
1125 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1126 Value *CurrVal = ValueNumPair.first;
1127 unsigned TargetCandGVN = ValueNumPair.second;
1128
1129
1130
1131 std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1132 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1133
1134
1135 std::optional OTargetCandCanon =
1136 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1137 assert(OTargetCandCanon.has_value() &&
1138 "Canononical Number not found for GVN");
1139
1140
1141 std::optional OLargeSourceGVN =
1143 assert(OLargeSourceGVN.has_value() &&
1144 "GVN Number not found for Canonical Number");
1145
1146
1147 std::optional<Value *> OLargeSourceV =
1148 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1149 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1150
1151
1152 std::optional OSourceGVN =
1153 SourceCand.getGVN(OLargeSourceV.value());
1154 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1155
1156
1157 std::optional OSourceCanon =
1159 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1160
1161
1162
1163 CanonNumToNumber.insert(
1164 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1165 NumberToCanonNum.insert(
1166 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1167 }
1168}
1169
1172 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1173 "Canonical Relationship is non-empty");
1174 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1175 "Canonical Relationship is non-empty");
1176
1177 unsigned CanonNum = 0;
1178
1179
1180 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1181 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1182 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1183 CanonNum++;
1184 }
1185}
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200static std::optional<
1201 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1210
1211
1212
1213 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1214 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1215 Result;
1216
1217 unsigned CandAStart = CandA.getStartIdx();
1218 unsigned CandAEnd = CandA.getEndIdx();
1219 unsigned CandBStart = CandB.getStartIdx();
1220 unsigned CandBEnd = CandB.getEndIdx();
1221 if (IdxToCandidateIt == IndexToIncludedCand.end())
1222 return Result;
1224 if (MatchedCand->getStartIdx() > CandAStart ||
1225 (MatchedCand->getEndIdx() < CandAEnd))
1226 continue;
1227 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1228 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1229 IncludedGroupsA.insert(GroupNum);
1230 }
1231
1232
1233
1234 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1235 if (IdxToCandidateIt == IndexToIncludedCand.end())
1236 return Result;
1238 if (MatchedCand->getStartIdx() > CandBStart ||
1239 MatchedCand->getEndIdx() < CandBEnd)
1240 continue;
1241 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1242 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1243 IncludedGroupsB.insert(GroupNum);
1244 }
1245
1246
1247
1248 set_intersect(IncludedGroupsA, IncludedGroupsB);
1249
1250
1251
1252 if (IncludedGroupsA.empty())
1253 return Result;
1254
1255
1256 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1257 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1258 Result = std::make_pair(ItA->second, ItB->second);
1259 return Result;
1260}
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1276 std::vector &CandsForRepSubstring,
1280 ) {
1281 std::vector::iterator CandIt, CandEndIt, InnerCandIt,
1282 InnerCandEndIt;
1283
1284
1285
1286
1287
1288
1290
1291
1292
1293
1294 bool SameStructure;
1295 bool Inserted;
1296 unsigned CurrentGroupNum = 0;
1297 unsigned OuterGroupNum;
1301
1302
1303
1306 for (CandIt = CandsForRepSubstring.begin(),
1307 CandEndIt = CandsForRepSubstring.end();
1308 CandIt != CandEndIt; CandIt++) {
1309
1310
1311 CandToGroupIt = CandToGroup.find(&*CandIt);
1312 if (CandToGroupIt == CandToGroup.end()) {
1313
1314 std::tie(CandToGroupIt, Inserted) =
1315 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1316 }
1317
1318
1319 OuterGroupNum = CandToGroupIt->second;
1320
1321
1322
1323 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1324 if (CurrentGroupPair == StructuralGroups.end()) {
1326 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1327 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1328 }
1329
1330
1331
1332
1333
1334 for (InnerCandIt = std::next(CandIt),
1335 InnerCandEndIt = CandsForRepSubstring.end();
1336 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1337
1338
1339 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1340 if (CandToGroupItInner != CandToGroup.end())
1341 continue;
1342
1343
1344
1345 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1347 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1348
1349
1350
1351
1352 if (LargerPair.has_value()) {
1353 SameStructure = true;
1354 InnerCandIt->createCanonicalRelationFrom(
1355 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1356 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1357 CurrentGroupPair->second.push_back(*InnerCandIt);
1358 continue;
1359 }
1360
1361
1362
1363 ValueNumberMappingA.clear();
1364 ValueNumberMappingB.clear();
1366 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1367 if (!SameStructure)
1368 continue;
1369
1370 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1371 ValueNumberMappingB);
1372 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1373 CurrentGroupPair->second.push_back(*InnerCandIt);
1374 }
1375 }
1376}
1377
1378void IRSimilarityIdentifier::findCandidates(
1379 std::vector<IRInstructionData *> &InstrList,
1380 std::vector &IntegerMapping) {
1382
1383 std::vector CandsForRepSubstring;
1384 std::vector NewCandidateGroups;
1385
1389
1390
1391
1392
1393
1394
1395 std::vectorSuffixTree::RepeatedSubstring RSes;
1397 RSes.push_back(RS);
1398
1401 return LHS.Length > RHS.Length;
1402 });
1405 CandsForRepSubstring);
1406
1407 if (CandsForRepSubstring.size() < 2)
1408 continue;
1409
1411 IndexToIncludedCand, CandToGroup);
1412 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1413
1414
1415
1416 if (Group.second.size() > 1) {
1417 SimilarityCandidates->push_back(Group.second);
1418
1419
1420
1422 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1424 IndexToIncludedCand[Idx].insert(&IRCand);
1425
1427 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1428 }
1429 }
1430 }
1431
1432 CandsForRepSubstring.clear();
1433 StructuralGroups.clear();
1434 NewCandidateGroups.clear();
1435 }
1436}
1437
1439 ArrayRef<std::unique_ptr> Modules) {
1441
1442 std::vector<IRInstructionData *> InstrList;
1443 std::vector IntegerMapping;
1449
1450 populateMapper(Modules, InstrList, IntegerMapping);
1451 findCandidates(InstrList, IntegerMapping);
1452
1453 return *SimilarityCandidates;
1454}
1455
1463
1464 std::vector<IRInstructionData *> InstrList;
1465 std::vector IntegerMapping;
1466
1467 populateMapper(M, InstrList, IntegerMapping);
1468 findCandidates(InstrList, IntegerMapping);
1469
1470 return *SimilarityCandidates;
1471}
1472
1474 "ir-similarity-identifier", false, true)
1475
1480}
1481
1485 false));
1486 return false;
1487}
1488
1490 IRSI.reset();
1491 return false;
1492}
1493
1495 IRSI->findSimilarity(M);
1496 return false;
1497}
1498
1504 false);
1505 IRSI.findSimilarity(M);
1506 return IRSI;
1507}
1508
1512 std::optional &SimilarityCandidatesOpt =
1514
1515 for (std::vector &CandVec : *SimilarityCandidatesOpt) {
1516 OS << CandVec.size() << " candidates of length "
1517 << CandVec.begin()->getLength() << ". Found in: \n";
1519 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1520 << ", Basic Block: ";
1521 if (Cand.front()->Inst->getParent()->getName().str() == "")
1522 OS << "(unnamed)";
1523 else
1524 OS << Cand.front()->Inst->getParent()->getName().str();
1525 OS << "\n Start Instruction: ";
1526 Cand.frontInstruction()->print(OS);
1527 OS << "\n End Instruction: ";
1528 Cand.backInstruction()->print(OS);
1529 OS << "\n";
1530 }
1531 }
1532
1534}
1535
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines generic set operations that may be used on set's of different types,...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ ICMP_SGE
signed greater or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Predicate getPredicate() const
Return the predicate for this instruction.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent function types.
ArrayRef< Type * > params() const
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
unsigned getEndIdx() const
static bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
IRInstructionDataList::iterator iterator
Instruction * frontInstruction()
static bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
unsigned getStartIdx() const
BasicBlock * getStartBB()
IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
void resetSimilarityCandidates()
SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
iterator find(const_arg_type_t< ValueT > V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
const ParentTy * getParent() const
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
void push_back(reference Node)
Insert a node at the back; never copies.
@ C
The default llvm calling convention, compatible with C.
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &)
cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
An information struct used to provide DenseMap with the various necessary components for a given valu...
This provides the utilities for hashing an Instruction to an unsigned integer.
StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
IRInstructionDataList * IDL
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
unsigned Length
The length of the string.