clang: lib/Analysis/CloneDetection.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
18#include "llvm/Support/MD5.h"
19#include "llvm/Support/Path.h"
20
21using namespace clang;
22
24 unsigned StartIndex, unsigned EndIndex)
25 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {
26 assert(Stmt && "Stmt must not be a nullptr");
27 assert(StartIndex < EndIndex && "Given array should not be empty");
28 assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
29}
30
32 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {}
33
35 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}
36
38
39
41 return false;
42
44
45
46
47 bool StartIsInBounds =
50 if (!StartIsInBounds)
51 return false;
52
53 bool EndIsInBounds =
54 SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) ||
56 return EndIsInBounds;
57}
58
61 return &S;
62 }
63 auto CS = cast(S);
64 return CS->body_begin() + StartIndex;
65}
66
70 }
71 auto CS = cast(S);
72 return CS->body_begin() + EndIndex;
73}
74
76 assert(D);
78}
79
82}
83
85
88}
89
91 assert(D);
93
95}
96
97
98
102 if (Seq.contains(GroupSeq))
103 return true;
104 }
105 return false;
106}
107
108
109
112
113
114
115
116 if (Group.size() < OtherGroup.size())
117 return false;
118
121 return false;
122 }
123 return true;
124}
125
127 std::vectorCloneDetector::CloneGroup &Result) {
128 std::vector IndexesToRemove;
129
130
131
132
133
134 for (unsigned i = 0; i < Result.size(); ++i) {
135 for (unsigned j = 0; j < Result.size(); ++j) {
136
137 if (i == j)
138 continue;
139
141 IndexesToRemove.push_back(i);
142 break;
143 }
144 }
145 }
146
147
148
149
150 for (unsigned I : llvm::reverse(IndexesToRemove))
152}
153
158 return false;
159
162 StringRef Filename = llvm::sys::path::filename(
163 SM.getFilename(S.getContainingDecl()->getLocation()));
165 return true;
166 }
167
168 return false;
169}
170
171
172
173
174
175
176
177
178namespace {
179template
180class CloneTypeIIStmtDataCollector
183
184 T &DataConsumer;
185
186 template void addData(const Ty &Data) {
188 }
189
190public:
191 CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context,
192 T &DataConsumer)
193 : Context(Context), DataConsumer(DataConsumer) {
194 this->Visit(S);
195 }
196
197
198
199
200#define DEF_ADD_DATA(CLASS, CODE) \
201 template <class = void> void Visit##CLASS(const CLASS *S) { \
202 CODE; \
203 ConstStmtVisitor<CloneTypeIIStmtDataCollector>::Visit##CLASS(S); \
204 }
205
206#include "clang/AST/StmtDataCollectors.inc"
207
208
209#define SKIP(CLASS) \
210 void Visit##CLASS(const CLASS *S) { \
211 ConstStmtVisitor<CloneTypeIIStmtDataCollector>::Visit##CLASS(S); \
212 }
220#undef SKIP
221};
222}
223
225 size_t HashCode;
226
227
228 llvm::MD5::MD5Result HashResult;
229 Hash.final(HashResult);
230
231
232
233 std::memcpy(&HashCode, &HashResult,
234 std::min(sizeof(HashCode), sizeof(HashResult)));
235
236 return HashCode;
237}
238
239
240
241
242
243
244
245
246
247
248static size_t
250 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
251 llvm::MD5 Hash;
253
254 CloneTypeIIStmtDataCollectorllvm::MD5(S, Context, Hash);
255
256 auto CS = dyn_cast(S);
258
259 for (const Stmt *Child : S->children()) {
260 if (Child == nullptr) {
261 ChildHashes.push_back(0);
262 continue;
263 }
264 size_t ChildHash = saveHash(Child, D, StmtsByHash);
265 Hash.update(
266 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
267 ChildHashes.push_back(ChildHash);
268 }
269
270 if (CS) {
271
272
273
274 for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {
275
276
277
278 llvm::MD5 Hash;
279 for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {
280
281
282 size_t ChildHash = ChildHashes[Pos + Length - 1];
283 Hash.update(
284 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));
285
286
287 if (Length > 1) {
288 llvm::MD5 SubHash = Hash;
289 StmtsByHash.push_back(std::make_pair(
291 }
292 }
293 }
294 }
295
297 StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D)));
298 return HashCode;
299}
300
301namespace {
302
303
304class FoldingSetNodeIDWrapper {
305
306 llvm::FoldingSetNodeID &FS;
307
308public:
309 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}
310
311 void update(StringRef Str) { FS.AddString(Str); }
312};
313}
314
315
316
318 FoldingSetNodeIDWrapper &OutputData) {
319 for (const Stmt *S : Sequence) {
320 CloneTypeIIStmtDataCollector(
322
323 for (const Stmt *Child : S->children()) {
324 if (!Child)
325 continue;
326
328 OutputData);
329 }
330 }
331}
332
333
336
337
338
339
340 llvm::FoldingSetNodeID DataLHS, DataRHS;
341 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);
342 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);
343
346
347 return DataLHS == DataRHS;
348}
349
351 std::vectorCloneDetector::CloneGroup &Sequences) {
352
353 std::vectorCloneDetector::CloneGroup Result;
354
356
357
358 if (Group.empty())
359 continue;
360
361 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;
362
363
365 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);
366 }
367
368
369 llvm::stable_sort(StmtsByHash, llvm::less_first());
370
371
372
373
374
375 for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {
376 const auto Current = StmtsByHash[i];
377
378
379
380
382
383 size_t PrototypeHash = Current.first;
384
385 for (; i < StmtsByHash.size(); ++i) {
386
387 if (PrototypeHash != StmtsByHash[i].first) {
388
389
390
391
392 assert(i != 0);
393 --i;
394 break;
395 }
396
397
398 NewGroup.push_back(StmtsByHash[i].second);
399 }
400
401
402
403 Result.push_back(NewGroup);
404 }
405 }
406
408}
409
411 std::vectorCloneDetector::CloneGroup &Sequences) {
415 });
416}
417
420 const std::string &ParentMacroStack) {
421 if (Seq.empty())
422 return 0;
423
424 size_t Complexity = 1;
425
427
428
429 std::string MacroStack =
431
432
433
434
435
436
437
438
439
440 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {
441 Complexity = 0;
442 }
443
444
445
446 if (Seq.holdsSequence()) {
447 for (const Stmt *S : Seq) {
449 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
450 if (Complexity >= Limit)
451 return Limit;
452 }
453 } else {
454 for (const Stmt *S : Seq.front()->children()) {
456 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);
457 if (Complexity >= Limit)
458 return Limit;
459 }
460 }
461 return Complexity;
462}
463
465 std::vectorCloneDetector::CloneGroup &CloneGroups) {
471 });
472}
473
475 std::vectorCloneDetector::CloneGroup &CloneGroups,
477 Compare) {
478 std::vectorCloneDetector::CloneGroup Result;
479 for (auto &HashGroup : CloneGroups) {
480
481
482 std::vector Indexes;
483 Indexes.resize(HashGroup.size());
484
485 for (unsigned i = 0; i < HashGroup.size(); ++i) {
486
487 if (Indexes[i])
488 continue;
489
490
491
492
493
496 ++Indexes[i];
497
498
499 for (unsigned j = i + 1; j < HashGroup.size(); ++j) {
500
501 if (Indexes[j])
502 continue;
503
504
505 const StmtSequence &Candidate = HashGroup[j];
506
507 if (!Compare(Prototype, Candidate))
508 continue;
509
510 PotentialGroup.push_back(Candidate);
511
512 ++Indexes[j];
513 }
514
515
516
517 Result.push_back(PotentialGroup);
518 }
519
520 assert(llvm::all_of(Indexes, [](char c) { return c == 1; }));
521 }
522 CloneGroups = Result;
523}
524
525void VariablePattern::addVariableOccurence(const VarDecl *VarDecl,
526 const Stmt *Mention) {
527
528 for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
529 if (Variables[KindIndex] == VarDecl) {
530
531
532 Occurences.emplace_back(KindIndex, Mention);
533 return;
534 }
535 }
536
537
538 Occurences.emplace_back(Variables.size(), Mention);
539 Variables.push_back(VarDecl);
540}
541
542void VariablePattern::addVariables(const Stmt *S) {
543
544
545
546 if (!S)
547 return;
548
549
550 if (auto D = dyn_cast(S)) {
551 if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl()))
552 addVariableOccurence(VD, D);
553 }
554
555
556 for (const Stmt *Child : S->children()) {
557 addVariables(Child);
558 }
559}
560
564 unsigned NumberOfDifferences = 0;
565
566 assert(Other.Occurences.size() == Occurences.size());
567 for (unsigned i = 0; i < Occurences.size(); ++i) {
568 auto ThisOccurence = Occurences[i];
569 auto OtherOccurence = Other.Occurences[i];
570 if (ThisOccurence.KindID == OtherOccurence.KindID)
571 continue;
572
573 ++NumberOfDifferences;
574
575
576
577 if (FirstMismatch == nullptr)
578 continue;
579
580
581
582 if (NumberOfDifferences != 1)
583 continue;
584
585 const VarDecl *FirstSuggestion = nullptr;
586
587
588
589 if (OtherOccurence.KindID < Variables.size())
590 FirstSuggestion = Variables[OtherOccurence.KindID];
591
592
595 Variables[ThisOccurence.KindID], ThisOccurence.Mention,
596 FirstSuggestion);
597
598
599
600
601 const VarDecl *SecondSuggestion = nullptr;
602 if (ThisOccurence.KindID < Other.Variables.size())
603 SecondSuggestion = Other.Variables[ThisOccurence.KindID];
604
605
608 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,
609 SecondSuggestion);
610
611
612
613
614
615
618
619
621 }
622
623 return NumberOfDifferences;
624}
static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)
Returns true if and only if Stmt contains at least one other sequence in the Group.
static size_t createHash(llvm::MD5 &Hash)
static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)
Writes the relevant data from all statements and child statements in the given StmtSequence into the ...
static size_t saveHash(const Stmt *S, const Decl *D, std::vector< std::pair< size_t, StmtSequence > > &StmtsByHash)
Generates and saves a hash code for the given Stmt.
static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)
Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group.
static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)
Returns true if both sequences are clones of each other.
This file defines classes for searching and analyzing source code clones.
This file declares helper methods for collecting data from AST nodes.
Defines the C++ template declaration subclasses.
Defines the SourceManager interface.
__device__ __2f16 float c
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceManager & getSourceManager()
A boolean literal, per ([C++ lex.bool] Boolean literals).
static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, llvm::function_ref< bool(const StmtSequence &, const StmtSequence &)> Compare)
Splits the given CloneGroups until the given Compare function returns true for all clones in a single...
void analyzeCodeBody(const Decl *D)
Generates and stores search data for all statements in the body of the given Decl.
CompoundStmt - This represents a group of statements like { stmt stmt }.
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
A reference to a declared variable, function, enum, etc.
Decl - This represents one declaration (or definition), e.g.
ASTContext & getASTContext() const LLVM_READONLY
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack="")
Calculates the complexity of the given StmtSequence.
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)
ASTContext & getASTContext() const
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Identifies a list of statements.
bool contains(const StmtSequence &Other) const
Returns true if and only if this sequence covers a source range that contains the source range of the...
iterator begin() const
Returns an iterator pointing to the first statement in this sequence.
const Stmt *const * iterator
const Decl * getContainingDecl() const
Returns the declaration that contains the stored Stmts.
StmtSequence()
Constructs an empty StmtSequence.
ASTContext & getASTContext() const
Returns the related ASTContext for the stored Stmts.
unsigned size() const
Returns the number of statements this object holds.
iterator end() const
Returns an iterator pointing behind the last statement in this sequence.
SourceLocation getEndLoc() const
Returns the end sourcelocation of the last statement in this sequence.
const Stmt * front() const
Returns the first statement in this sequence.
bool holdsSequence() const
Returns true if this objects holds a list of statements.
SourceLocation getBeginLoc() const
Returns the start sourcelocation of the first statement in this sequence.
SourceRange getSourceRange() const
Returns the source range of the whole sequence - from the beginning of the first statement to the end...
const Stmt * back() const
Returns the last statement in this sequence.
Stmt - This represents one statement.
SourceLocation getEndLoc() const LLVM_READONLY
SourceLocation getBeginLoc() const LLVM_READONLY
StringLiteral - This represents a string literal expression, e.g.
Represents a variable declaration or definition.
Analyzes the pattern of the referenced variables in a statement.
unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)
Counts the differences between this pattern and the given one.
void addDataToConsumer(T &DataConsumer, llvm::StringRef Str)
Utility functions for implementing addData() for a consumer that has a method update(StringRef)
std::string getMacroStack(SourceLocation Loc, ASTContext &Context)
Returns a string that represents all macro expansions that expanded into the given SourceLocation.
The JSON file list parser is used to communicate input to InstallAPI.
@ Seq
'seq' clause, allowed on 'loop' and 'routine' directives.
@ Result
The result type of a method or function.
const FunctionProtoType * T
@ Other
Other implicit parameter.
std::shared_ptr< llvm::Regex > IgnoredFilesRegex
bool isAutoGenerated(const CloneDetector::CloneGroup &Group)
StringRef IgnoredFilesPattern
void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)
void constrain(std::vector< CloneDetector::CloneGroup > &Result)
Utility class holding the relevant information about a single clone in this pair.
const VarDecl * Suggestion
The variable that should have been referenced to follow the pattern.
Describes two clones that reference their variables in a different pattern which could indicate a pro...
SuspiciousCloneInfo SecondCloneInfo
This other clone in the pair which can have a suggested variable.
SuspiciousCloneInfo FirstCloneInfo
The first clone in the pair which always has a suggested variable.