LLVM: lib/Analysis/IRSimilarityIdentifier.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

23

24using namespace llvm;

25using namespace IRSimilarity;

26

27namespace llvm {

31 cl::desc("disable similarity matching, and outlining, "

32 "across branches for debugging purposes."));

33

37 cl::desc("disable outlining indirect calls."));

38

41 cl::desc("only allow matching call instructions if the "

42 "name and type signature match."));

43

46 cl::desc("Don't match or outline intrinsics"));

47}

48

51 : Inst(&I), Legal(Legality), IDL(&IDList) {

53}

54

56

57

58

61 if (Predicate != C->getPredicate())

63 }

64

65

66

69

70

72 continue;

73 }

74

76 }

77

78

79

80 if (PHINode *PN = dyn_cast(Inst))

83}

84

86 : IDL(&IDList) {}

87

90 assert(isa(Inst) && "Instruction must be branch");

91

94

95 BBNumIt = BasicBlockToInteger.find(BI->getParent());

96 assert(BBNumIt != BasicBlockToInteger.end() &&

97 "Could not find location for BasicBlock!");

98

99 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);

100

104 assert(BBNumIt != BasicBlockToInteger.end() &&

105 "Could not find number for BasicBlock!");

106 int OtherBlockNumber = static_cast<int>(BBNumIt->second);

107

108 int Relative = OtherBlockNumber - CurrentBlockNumber;

110 }

111}

112

115 isa(Inst)) && "Instruction must be branch or PHINode");

116

119 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),

121 );

122

123 if (PHINode *PN = dyn_cast(Inst))

125 std::next(OperVals.begin(), PN->getNumIncomingValues()),

127 );

128

130}

131

134 assert(CI && "Instruction must be call");

135

138

139

140

143

144

148

149 else

151

152 return;

153 }

154

157}

158

161 assert(isa(Inst) && "Instruction must be phi node");

162

165

166 BBNumIt = BasicBlockToInteger.find(PN->getParent());

167 assert(BBNumIt != BasicBlockToInteger.end() &&

168 "Could not find location for BasicBlock!");

169

170 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);

171

172

173

176 BBNumIt = BasicBlockToInteger.find(Incoming);

177 assert(BBNumIt != BasicBlockToInteger.end() &&

178 "Could not find number for BasicBlock!");

179 int OtherBlockNumber = static_cast<int>(BBNumIt->second);

180

181 int Relative = OtherBlockNumber - CurrentBlockNumber;

183 }

184}

185

197 default:

199 }

200}

201

204 "Can only get a predicate from a compare instruction");

205

208

209 return cast(Inst)->getPredicate();

210}

211

214 "Can only get a name from a call instruction");

215

217

219}

220

223

224 if (A.Legal || B.Legal)

225 return false;

226

227

228

229 if (A.Inst->isSameOperationAs(B.Inst)) {

230

231

232

233 if (isa(A.Inst) && isa(B.Inst)) {

234

235 if (A.getPredicate() != B.getPredicate())

236 return false;

237

238

239

240 auto ZippedTypes = zip(A.OperVals, B.OperVals);

241

243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {

244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();

245 });

246 }

247

248 return false;

249 }

250

251

252

253

254 if (auto *GEP = dyn_cast(A.Inst)) {

255 auto *OtherGEP = cast(B.Inst);

256

257

258

259 if (GEP->isInBounds() != OtherGEP->isInBounds())

260 return false;

261

262 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());

263

264

265

266

268 [](std::tuple<llvm::Use &, llvm::Use &> R) {

269 return std::get<0>(R) == std::get<1>(R);

270 });

271 }

272

273

274

275

276 if (isa(A.Inst) && isa(B.Inst)) {

277 if (A.getCalleeName() != B.getCalleeName())

278 return false;

279 }

280

281 if (isa(A.Inst) && isa(B.Inst) &&

282 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())

283 return false;

284

285 return true;

286}

287

288

289

291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,

292 std::vector &IntegerMapping) {

294

295 std::vector IntegerMappingForBB;

296 std::vector<IRInstructionData *> InstrListForBB;

297

302 break;

305 break;

308 break;

309 }

310 }

311

318}

319

320

321

324 std::vector<IRInstructionData *> &InstrListForBB) {

325

326

328

329

330

334

335

336

338 InstrListForBB.push_back(ID);

339

340 if (isa(*It))

342

343 if (isa(*It))

345

346 if (isa(*It))

348

349

350 bool WasInserted;

352 ResultIt;

353 std::tie(ResultIt, WasInserted) =

355 unsigned INumber = ResultIt->second;

356

357

358 if (WasInserted)

360

361 IntegerMappingForBB.push_back(INumber);

362

363

365 "Instruction mapping overflow!");

366

368 "Tried to assign DenseMap tombstone or empty key to instruction.");

370 "Tried to assign DenseMap tombstone or empty key to instruction.");

371

372 return INumber;

373}

374

379}

380

384}

385

389}

390

391

392

395 std::vector<IRInstructionData *> &InstrListForBB, bool End) {

396

398

399

402

404 if (End)

406 else

408 InstrListForBB.push_back(ID);

409

410

414

416 "Instruction mapping overflow!");

417

419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");

420

422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");

423

424 return INumber;

425}

426

430 : StartIdx(StartIdx), Len(Len) {

431

432 assert(FirstInstIt != nullptr && "Instruction is nullptr!");

433 assert(LastInstIt != nullptr && "Instruction is nullptr!");

434 assert(StartIdx + Len > StartIdx &&

435 "Overflow for IRSimilarityCandidate range?");

436 assert(Len - 1 == static_cast<unsigned>(std::distance(

438 "Length of the first and last IRInstructionData do not match the "

439 "given length");

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456 unsigned LocalValNumber = 1;

458 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {

459

460

461 for (Value *Arg : ID->OperVals)

462 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {

463 NumberToValue.try_emplace(LocalValNumber, Arg);

464 LocalValNumber++;

465 }

466

467

468

469 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {

470 NumberToValue.try_emplace(LocalValNumber, ID->Inst);

471 LocalValNumber++;

472 }

473 }

474

475

476

477

478 FirstInst = FirstInstIt;

479 LastInst = LastInstIt;

480

481

485 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {

486 NumberToValue.try_emplace(LocalValNumber, BB);

487 LocalValNumber++;

488 }

489 }

490}

491

494 if (A.getLength() != B.getLength())

495 return false;

496

497 auto InstrDataForBoth =

499

500 return all_of(InstrDataForBoth,

501 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {

504 if (A.Legal || B.Legal)

505 return false;

507 });

508}

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

532

534

535 unsigned ArgVal;

536 bool WasInserted;

537

538

539

540

541

542 for (Value *V : SourceOperands) {

543 ArgVal = SourceValueToNumberMapping.find(V)->second;

544

545

546 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(

547 std::make_pair(ArgVal, TargetValueNumbers));

548

549

550

551

553 for (unsigned &Curr : ValueMappingIt->second)

554

555 if (TargetValueNumbers.contains(Curr))

557

558

559 if (NewSet.empty())

560 return false;

561

562

563 if (NewSet.size() != ValueMappingIt->second.size())

564 ValueMappingIt->second.swap(NewSet);

565

566

567

568 if (ValueMappingIt->second.size() != 1)

569 continue;

570

571 unsigned ValToRemove = *ValueMappingIt->second.begin();

572

573

574

575 for (Value *InnerV : SourceOperands) {

576 if (V == InnerV)

577 continue;

578

579 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;

580 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);

581 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())

582 continue;

583

584 ValueMappingIt->second.erase(ValToRemove);

585 if (ValueMappingIt->second.empty())

586 return false;

587 }

588 }

589

590 return true;

591}

592

593

594

595

596

597

598

599

600

601

602

603

606 unsigned SourceArgVal, unsigned TargetArgVal) {

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622 bool WasInserted;

624

625 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(

627

628

629 if (WasInserted)

630 return true;

631

632

633

634

635

636

638 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {

639 TargetSet.clear();

640 TargetSet.insert(TargetArgVal);

641 return true;

642 }

643

644

645 return TargetSet.contains(TargetArgVal);

646}

647

650

651

654 unsigned OperandLength = A.OperVals.size();

655

656

657 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {

658 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;

659 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;

660

661

662

663

664

665

666

667

668

669

670

671

672

674 return false;

675

677 return false;

678 }

679 return true;

680}

681

686

689 unsigned OperandLength = A.OperVals.size();

690

691

692 for (unsigned Idx = 0; Idx < OperandLength;

693 Idx++, VItA++, VItB++) {

694 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);

695 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);

696 }

697

698

699

700

702 A.ValueNumberMapping, A.OperVals,

703 ValueNumbersB))

704 return false;

705

706

707

708

710 B.ValueNumberMapping, B.OperVals,

711 ValueNumbersA))

712 return false;

713

714 return true;

715}

716

718 const unsigned InstValA, const unsigned &InstValB,

722 bool WasInserted;

723 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(

725 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))

726 return false;

727 else if (ValueMappingIt->second.size() != 1) {

728 for (unsigned OtherVal : ValueMappingIt->second) {

729 if (OtherVal == InstValB)

730 continue;

731 if (!ValueNumberMappingA.contains(OtherVal))

732 continue;

733 if (!ValueNumberMappingA[OtherVal].contains(InstValA))

734 continue;

735 ValueNumberMappingA[OtherVal].erase(InstValA);

736 }

737 ValueNumberMappingA.erase(ValueMappingIt);

738 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(

740 }

741

742 return true;

743}

744

747

748 BasicBlock *ABB = cast(A.OperVal);

749 BasicBlock *BBB = cast(B.OperVal);

750

751

754 A.IRSC.getBasicBlocks(BasicBlockA);

755 B.IRSC.getBasicBlocks(BasicBlockB);

756

757

758 bool AContained = BasicBlockA.contains(ABB);

759 bool BContained = BasicBlockB.contains(BBB);

760

761

762

763 if (AContained != BContained)

764 return false;

765

766

767

768 if (AContained)

769 return A.RelativeLocation == B.RelativeLocation;

770 return true;

771}

772

778}

779

784

789 if (A.getLength() != B.getLength())

790 return false;

791

792 if (A.ValueToNumber.size() != B.ValueToNumber.size())

793 return false;

794

797

798

799

800

801

802

803

804 unsigned SectionLength = A.getStartIdx() + A.getLength();

805 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;

806 ItA++, ItB++, Loc++) {

807

808 if (isClose(*ItA, *ItB))

809 return false;

810

813

814 if (!ItA->Legal || !ItB->Legal)

815 return false;

816

817

820

821 unsigned InstValA = A.ValueToNumber.find(IA)->second;

822 unsigned InstValB = B.ValueToNumber.find(IB)->second;

823

824

826 ValueNumberMappingB))

827 return false;

828

830 ValueNumberMappingA))

831 return false;

832

833

834

835

836 if (IA->isCommutative() && !isa(IA) &&

837 !isa(IA)) {

839 {A, OperValsA, ValueNumberMappingA},

840 {B, OperValsB, ValueNumberMappingB}))

841 return false;

842 continue;

843 }

844

845

847 {A, OperValsA, ValueNumberMappingA},

848 {B, OperValsB, ValueNumberMappingB}))

849 return false;

850

851

852

853

854

855

856

857

858

859

860

861

862 if (!(isa(IA) && isa(IB)) &&

863 !(isa(IA) && isa(IB)))

864 continue;

865

870

871

872

873 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&

875 return false;

876

878 "Block information vectors not the same size.");

880 "Block information vectors not the same size.");

881

883 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);

884 if (any_of(ZippedRelativeLocations,

885 [&A, &B](std::tuple<int, int, Value *, Value *> R) {

887 {A, std::get<0>(R), std::get<2>(R)},

888 {B, std::get<1>(R), std::get<3>(R)});

889 }))

890 return false;

891 }

892 return true;

893}

894

899

900

901

902 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;

903 };

904

905 return DoesOverlap(A, B) || DoesOverlap(B, A);

906}

907

908void IRSimilarityIdentifier::populateMapper(

909 Module &M, std::vector<IRInstructionData *> &InstrList,

910 std::vector &IntegerMapping) {

911

912 std::vector<IRInstructionData *> InstrListForModule;

913 std::vector IntegerMappingForModule;

914

915

917

919

920 if (F.empty())

921 continue;

922

924

925

926

927

929 IntegerMappingForModule);

930 }

931

934 true);

935 if (InstrListForModule.size() > 0)

936 Mapper.IDL->push_back(*InstrListForModule.back());

937 }

938

939

940

942

944}

945

946void IRSimilarityIdentifier::populateMapper(

947 ArrayRef<std::unique_ptr> &Modules,

948 std::vector<IRInstructionData *> &InstrList,

949 std::vector &IntegerMapping) {

950

951

952 for (const std::unique_ptr &M : Modules)

953 populateMapper(*M, InstrList, IntegerMapping);

954}

955

956

957

958

959

960

961

962

963

964

966 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,

968 std::vector &CandsForRepSubstring) {

969

970 unsigned StringLen = RS.Length;

971 if (StringLen < 2)

972 return;

973

974

975 for (const unsigned &StartIdx : RS.StartIndices) {

976 unsigned EndIdx = StartIdx + StringLen - 1;

977

978

979 bool ContainsIllegal = false;

980 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {

981 unsigned Key = IntegerMapping[CurrIdx];

983 ContainsIllegal = true;

984 break;

985 }

986 }

987

988

989

990 if (ContainsIllegal)

991 continue;

992

993

994

995

996 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();

997 std::advance(StartIt, StartIdx);

998 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();

999 std::advance(EndIt, EndIdx);

1000

1001 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);

1002 }

1003}

1004

1009 assert(SourceCand.CanonNumToNumber.size() != 0 &&

1010 "Base canonical relationship is empty!");

1011 assert(SourceCand.NumberToCanonNum.size() != 0 &&

1012 "Base canonical relationship is empty!");

1013

1014 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");

1015 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");

1016

1018

1019

1020

1021 for (std::pair<unsigned, DenseSet> &GVNMapping : ToSourceMapping) {

1022 unsigned SourceGVN = GVNMapping.first;

1023

1024 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");

1025

1026 unsigned ResultGVN;

1027

1028

1029

1030

1031 if (GVNMapping.second.size() > 1) {

1032 bool Found = false;

1033 for (unsigned Val : GVNMapping.second) {

1034

1036 continue;

1037

1038

1040 FromSourceMapping.find(Val);

1041

1042 if (!It->second.contains(SourceGVN))

1043 continue;

1044

1045

1046 Found = true;

1047 ResultGVN = Val;

1048 break;

1049 }

1050

1051 assert(Found && "Could not find matching value for source GVN");

1052 (void)Found;

1053

1054 } else

1055 ResultGVN = *GVNMapping.second.begin();

1056

1057

1058 UsedGVNs.insert(ResultGVN);

1059

1060 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);

1061 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));

1062 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));

1063 }

1064

1067

1068

1069

1070

1071

1072

1074 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;

1075

1076

1077

1078 if (NumberToCanonNum.contains(BBGVNForCurrCand))

1079 continue;

1080

1081

1082

1083

1086 : &*BB->instructionsWithoutDebug().begin();

1087

1088 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);

1089 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);

1090 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);

1091 Value *SourceV = *SourceCand.fromGVN(SourceGVN);

1093 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);

1094 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);

1095 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));

1096 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));

1097 }

1098}

1099

1103 assert(!SourceCand.CanonNumToNumber.empty() &&

1104 "Canonical Relationship is non-empty");

1105 assert(!SourceCand.NumberToCanonNum.empty() &&

1106 "Canonical Relationship is non-empty");

1107

1108 assert(!SourceCandLarge.CanonNumToNumber.empty() &&

1109 "Canonical Relationship is non-empty");

1110 assert(!SourceCandLarge.NumberToCanonNum.empty() &&

1111 "Canonical Relationship is non-empty");

1112

1113 assert(!TargetCandLarge.CanonNumToNumber.empty() &&

1114 "Canonical Relationship is non-empty");

1115 assert(!TargetCandLarge.NumberToCanonNum.empty() &&

1116 "Canonical Relationship is non-empty");

1117

1118 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");

1119 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");

1120

1121

1122

1123

1124

1125 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {

1126 Value *CurrVal = ValueNumPair.first;

1127 unsigned TargetCandGVN = ValueNumPair.second;

1128

1129

1130

1131 std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);

1132 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");

1133

1134

1135 std::optional OTargetCandCanon =

1136 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());

1137 assert(OTargetCandCanon.has_value() &&

1138 "Canononical Number not found for GVN");

1139

1140

1141 std::optional OLargeSourceGVN =

1143 assert(OLargeSourceGVN.has_value() &&

1144 "GVN Number not found for Canonical Number");

1145

1146

1147 std::optional<Value *> OLargeSourceV =

1148 SourceCandLarge.fromGVN(OLargeSourceGVN.value());

1149 assert(OLargeSourceV.has_value() && "Value not found for GVN");

1150

1151

1152 std::optional OSourceGVN =

1153 SourceCand.getGVN(OLargeSourceV.value());

1154 assert(OSourceGVN.has_value() && "GVN Number not found for Value");

1155

1156

1157 std::optional OSourceCanon =

1159 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");

1160

1161

1162

1163 CanonNumToNumber.insert(

1164 std::make_pair(OSourceCanon.value(), TargetCandGVN));

1165 NumberToCanonNum.insert(

1166 std::make_pair(TargetCandGVN, OSourceCanon.value()));

1167 }

1168}

1169

1172 assert(CurrCand.CanonNumToNumber.size() == 0 &&

1173 "Canonical Relationship is non-empty");

1174 assert(CurrCand.NumberToCanonNum.size() == 0 &&

1175 "Canonical Relationship is non-empty");

1176

1177 unsigned CanonNum = 0;

1178

1179

1180 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {

1181 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));

1182 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));

1183 CanonNum++;

1184 }

1185}

1186

1187

1188

1189

1190

1191

1192

1193

1194

1195

1196

1197

1198

1199

1200static std::optional<

1201 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>

1210

1211

1212

1213 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());

1214 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>

1215 Result;

1216

1217 unsigned CandAStart = CandA.getStartIdx();

1218 unsigned CandAEnd = CandA.getEndIdx();

1219 unsigned CandBStart = CandB.getStartIdx();

1220 unsigned CandBEnd = CandB.getEndIdx();

1221 if (IdxToCandidateIt == IndexToIncludedCand.end())

1222 return Result;

1224 if (MatchedCand->getStartIdx() > CandAStart ||

1225 (MatchedCand->getEndIdx() < CandAEnd))

1226 continue;

1227 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;

1228 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));

1229 IncludedGroupsA.insert(GroupNum);

1230 }

1231

1232

1233

1234 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);

1235 if (IdxToCandidateIt == IndexToIncludedCand.end())

1236 return Result;

1238 if (MatchedCand->getStartIdx() > CandBStart ||

1239 MatchedCand->getEndIdx() < CandBEnd)

1240 continue;

1241 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;

1242 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));

1243 IncludedGroupsB.insert(GroupNum);

1244 }

1245

1246

1247

1248 set_intersect(IncludedGroupsA, IncludedGroupsB);

1249

1250

1251

1252 if (IncludedGroupsA.empty())

1253 return Result;

1254

1255

1256 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());

1257 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());

1258 Result = std::make_pair(ItA->second, ItB->second);

1259 return Result;

1260}

1261

1262

1263

1264

1265

1266

1267

1268

1269

1270

1271

1272

1273

1274

1276 std::vector &CandsForRepSubstring,

1280 ) {

1281 std::vector::iterator CandIt, CandEndIt, InnerCandIt,

1282 InnerCandEndIt;

1283

1284

1285

1286

1287

1288

1290

1291

1292

1293

1294 bool SameStructure;

1295 bool Inserted;

1296 unsigned CurrentGroupNum = 0;

1297 unsigned OuterGroupNum;

1301

1302

1303

1306 for (CandIt = CandsForRepSubstring.begin(),

1307 CandEndIt = CandsForRepSubstring.end();

1308 CandIt != CandEndIt; CandIt++) {

1309

1310

1311 CandToGroupIt = CandToGroup.find(&*CandIt);

1312 if (CandToGroupIt == CandToGroup.end()) {

1313

1314 std::tie(CandToGroupIt, Inserted) =

1315 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));

1316 }

1317

1318

1319 OuterGroupNum = CandToGroupIt->second;

1320

1321

1322

1323 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);

1324 if (CurrentGroupPair == StructuralGroups.end()) {

1326 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(

1327 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));

1328 }

1329

1330

1331

1332

1333

1334 for (InnerCandIt = std::next(CandIt),

1335 InnerCandEndIt = CandsForRepSubstring.end();

1336 InnerCandIt != InnerCandEndIt; InnerCandIt++) {

1337

1338

1339 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);

1340 if (CandToGroupItInner != CandToGroup.end())

1341 continue;

1342

1343

1344

1345 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>

1347 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);

1348

1349

1350

1351

1352 if (LargerPair.has_value()) {

1353 SameStructure = true;

1354 InnerCandIt->createCanonicalRelationFrom(

1355 *CandIt, *LargerPair.value().first, *LargerPair.value().second);

1356 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));

1357 CurrentGroupPair->second.push_back(*InnerCandIt);

1358 continue;

1359 }

1360

1361

1362

1363 ValueNumberMappingA.clear();

1364 ValueNumberMappingB.clear();

1366 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);

1367 if (!SameStructure)

1368 continue;

1369

1370 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,

1371 ValueNumberMappingB);

1372 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));

1373 CurrentGroupPair->second.push_back(*InnerCandIt);

1374 }

1375 }

1376}

1377

1378void IRSimilarityIdentifier::findCandidates(

1379 std::vector<IRInstructionData *> &InstrList,

1380 std::vector &IntegerMapping) {

1382

1383 std::vector CandsForRepSubstring;

1384 std::vector NewCandidateGroups;

1385

1389

1390

1391

1392

1393

1394

1395 std::vectorSuffixTree::RepeatedSubstring RSes;

1397 RSes.push_back(RS);

1398

1401 return LHS.Length > RHS.Length;

1402 });

1405 CandsForRepSubstring);

1406

1407 if (CandsForRepSubstring.size() < 2)

1408 continue;

1409

1411 IndexToIncludedCand, CandToGroup);

1412 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {

1413

1414

1415

1416 if (Group.second.size() > 1) {

1417 SimilarityCandidates->push_back(Group.second);

1418

1419

1420

1422 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();

1424 IndexToIncludedCand[Idx].insert(&IRCand);

1425

1427 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));

1428 }

1429 }

1430 }

1431

1432 CandsForRepSubstring.clear();

1433 StructuralGroups.clear();

1434 NewCandidateGroups.clear();

1435 }

1436}

1437

1439 ArrayRef<std::unique_ptr> Modules) {

1441

1442 std::vector<IRInstructionData *> InstrList;

1443 std::vector IntegerMapping;

1449

1450 populateMapper(Modules, InstrList, IntegerMapping);

1451 findCandidates(InstrList, IntegerMapping);

1452

1453 return *SimilarityCandidates;

1454}

1455

1463

1464 std::vector<IRInstructionData *> InstrList;

1465 std::vector IntegerMapping;

1466

1467 populateMapper(M, InstrList, IntegerMapping);

1468 findCandidates(InstrList, IntegerMapping);

1469

1470 return *SimilarityCandidates;

1471}

1472

1474 "ir-similarity-identifier", false, true)

1475

1480}

1481

1485 false));

1486 return false;

1487}

1488

1490 IRSI.reset();

1491 return false;

1492}

1493

1495 IRSI->findSimilarity(M);

1496 return false;

1497}

1498

1504 false);

1505 IRSI.findSimilarity(M);

1506 return IRSI;

1507}

1508

1512 std::optional &SimilarityCandidatesOpt =

1514

1515 for (std::vector &CandVec : *SimilarityCandidatesOpt) {

1516 OS << CandVec.size() << " candidates of length "

1517 << CandVec.begin()->getLength() << ". Found in: \n";

1519 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()

1520 << ", Basic Block: ";

1521 if (Cand.front()->Inst->getParent()->getName().str() == "")

1522 OS << "(unnamed)";

1523 else

1524 OS << Cand.front()->Inst->getParent()->getName().str();

1525 OS << "\n Start Instruction: ";

1526 Cand.frontInstruction()->print(OS);

1527 OS << "\n End Instruction: ";

1528 Cand.backInstruction()->print(OS);

1529 OS << "\n";

1530 }

1531 }

1532

1534}

1535

static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx

This file defines the DenseMap class.

static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")

bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)

Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...

detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT

static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)

From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...

static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)

From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...

static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)

Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...

static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)

Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...

uint64_t IntrinsicInst * II

static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")

#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)

This file defines generic set operations that may be used on set's of different types,...

A container for analyses that lazily runs them and caches their results.

PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)

Get the result of an analysis pass for a given IR unit.

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

LLVM Basic Block Representation.

iterator begin()

Instruction iterator methods.

const Function * getParent() const

Return the enclosing method, or null if none.

InstListType::iterator iterator

Instruction iterators...

Conditional or Unconditional Branch instruction.

Function * getCalledFunction() const

Returns the function called, or null if this is an indirect function invocation or the function signa...

bool isIndirectCall() const

Return true if the callsite is an indirect call.

This class represents a function call, abstracting a target machine's calling convention.

This class is the base class for the comparison instructions.

Predicate

This enumeration lists the possible predicates for CmpInst subclasses.

@ FCMP_OGT

0 0 1 0 True if ordered and greater than

@ FCMP_OGE

0 0 1 1 True if ordered and greater than or equal

@ ICMP_UGE

unsigned greater or equal

@ ICMP_UGT

unsigned greater than

@ ICMP_SGT

signed greater than

@ FCMP_UGT

1 0 1 0 True if unordered or greater than

@ ICMP_SGE

signed greater or equal

@ FCMP_UGE

1 0 1 1 True if unordered, greater than, or equal

Predicate getSwappedPredicate() const

For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.

Predicate getPredicate() const

Return the predicate for this instruction.

iterator find(const_arg_type_t< KeyT > Val)

std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)

bool erase(const KeyT &Val)

bool contains(const_arg_type_t< KeyT > Val) const

Return true if the specified key is in the map, false otherwise.

std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)

Implements a dense probed hash-table based set.

Class to represent function types.

ArrayRef< Type * > params() const

PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)

An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.

Result run(Module &M, ModuleAnalysisManager &)

An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...

bool doInitialization(Module &M) override

doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...

bool doFinalization(Module &M) override

doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...

bool runOnModule(Module &M) override

runOnModule - Virtual method overriden by subclasses to process the module being operated on.

This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...

void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)

Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...

static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)

std::optional< unsigned > getGVN(Value *V)

Finds the positive number associated with V if it has been mapped.

unsigned getEndIdx() const

static bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)

Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...

void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const

std::optional< Value * > fromGVN(unsigned Num)

Finds the Value associate with Num if it exists.

IRInstructionDataList::iterator iterator

Instruction * frontInstruction()

static bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)

Compare the relative locations in A and B and check that the distances match if both locations are co...

static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)

static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)

Create a mapping from the value numbering to a different separate set of numbers.

static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)

Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.

unsigned getStartIdx() const

BasicBlock * getStartBB()

IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)

std::optional< unsigned > getCanonicalNum(unsigned N)

Find the canonical number from the global value number N stored in the candidate.

std::optional< unsigned > fromCanonicalNum(unsigned N)

Find the global value number from the canonical number N stored in the candidate.

static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)

Compare the operands in A and B and check that the current mapping of global value numbers from A to ...

static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)

Compare the operands in A and B and check that the current mapping of global value numbers from A to ...

This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...

void resetSimilarityCandidates()

SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)

std::optional< SimilarityGroupList > & getSimilarity()

void visit(Iterator Start, Iterator End)

A wrapper class for inspecting calls to intrinsic functions.

ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...

A Module instance is used to store all the information related to an LLVM module.

BasicBlock * getIncomingBlock(unsigned i) const

Return incoming basic block number i.

unsigned getNumIncomingValues() const

Return the number of incoming edges.

static PassRegistry * getPassRegistry()

getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...

A set of analyses that are preserved following a run of a transformation pass.

static PreservedAnalyses all()

Construct a special preserved set that preserves all passes.

iterator insert(iterator I, T &&Elt)

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

StringRef - Represent a constant reference to a string, i.e.

std::string str() const

str - Get the contents as an std::string.

A Use represents the edge between a Value definition and its users.

LLVM Value Representation.

StringRef getName() const

Return a constant reference to the value's name.

std::pair< iterator, bool > insert(const ValueT &V)

iterator find(const_arg_type_t< ValueT > V)

bool contains(const_arg_type_t< ValueT > V) const

Check if the set contains the given element.

const ParentTy * getParent() const

typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator

void push_back(reference Node)

Insert a node at the back; never copies.

@ C

The default llvm calling convention, compatible with C.

std::vector< SimilarityGroup > SimilarityGroupList

std::vector< IRSimilarityCandidate > SimilarityGroup

bool isClose(const IRInstructionData &A, const IRInstructionData &B)

Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...

StringRef getName(ID id)

Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".

bool isOverloaded(ID id)

Returns true if the intrinsic can be overloaded.

initializer< Ty > init(const Ty &Val)

This is an optimization pass for GlobalISel generic memory operations.

auto drop_begin(T &&RangeOrContainer, size_t N=1)

Return a range covering RangeOrContainer with the first N elements excluded.

detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)

zip iterator for two or more iteratable types.

void stable_sort(R &&Range)

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

void set_intersect(S1Ty &S1, const S2Ty &S2)

set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...

iterator_range< T > make_range(T x, T y)

Convenience function for iterating over sub-ranges.

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))

bool any_of(R &&range, UnaryPredicate P)

Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.

cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))

cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))

void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &)

cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))

A special type used by analysis passes to provide an address that identifies that particular analysis...

An information struct used to provide DenseMap with the various necessary components for a given valu...

This provides the utilities for hashing an Instruction to an unsigned integer.

StringRef getCalleeName() const

Get the callee name that the call instruction is using for hashing the instruction.

void initializeInstruction()

Fills data stuctures for IRInstructionData when it is constructed from a.

SmallVector< int, 4 > RelativeBlockLocations

This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...

std::optional< std::string > CalleeName

This is only relevant if we are wrapping a CallInst.

IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)

Gather the information that is difficult to gather for an Instruction, or is changed.

ArrayRef< Value * > getBlockOperVals()

Get the BasicBlock based operands for PHINodes and BranchInsts.

Instruction * Inst

The source Instruction that is being wrapped.

static CmpInst::Predicate predicateForConsistency(CmpInst *CI)

A function that swaps the predicates to their less than form if they are in a greater than form.

void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)

For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...

SmallVector< Value *, 4 > OperVals

The values of the operands in the Instruction.

void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)

For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...

void setCalleeName(bool MatchByName=true)

For an IRInstructionData containing a CallInst, set the function name appropriately.

CmpInst::Predicate getPredicate() const

Get the predicate that the compare instruction is using for hashing the instruction.

std::optional< CmpInst::Predicate > RevisedPredicate

This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...

Helper struct for converting the Instructions in a Module into a vector of unsigned integers.

DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap

Correspondence from IRInstructionData to unsigned integers.

SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator

This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...

SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator

This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...

bool EnableMatchCallsByName

Marks whether we should use exact function names, as well as types to find similarity between calls.

unsigned LegalInstrNumber

The next available integer to assign to a legal Instruction to.

unsigned IllegalInstrNumber

The starting illegal instruction number to map to.

InstructionClassification InstClassifier

Maps an Instruction to a member of InstrType.

void initializeForBBs(Function &F, unsigned &BBNumber)

Assigns values to all the basic blocks in function F starting from integer BBNumber.

bool HaveLegalRange

Marks whether we have found a set of instructions that is long enough to be considered for similarity...

IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)

Get an allocated IRInstructionData struct using the InstDataAllocator.

bool CanCombineWithPrevInstr

Marks whether we found a illegal instruction in the previous step.

IRInstructionDataList * IDL

DenseMap< BasicBlock *, unsigned > BasicBlockToInteger

A mapping for a basic block in a module to its assigned number/location in the module.

IRInstructionDataList * allocateIRInstructionDataList()

Get an allocated IRInstructionDataList object using the IDLAllocator.

unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)

Maps an Instruction to a legal integer.

void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)

Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.

bool AddedIllegalLastTime

Set if we added an illegal number in the previous step.

unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)

Maps an Instruction to an illegal integer.

A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...

Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...

A repeated substring in the tree.

SmallVector< unsigned > StartIndices

The start indices of each occurrence.

unsigned Length

The length of the string.