clang: lib/Analysis/CloneDetection.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

18#include "llvm/Support/MD5.h"

19#include "llvm/Support/Path.h"

20

21using namespace clang;

22

24 unsigned StartIndex, unsigned EndIndex)

25 : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) {

26 assert(Stmt && "Stmt must not be a nullptr");

27 assert(StartIndex < EndIndex && "Given array should not be empty");

28 assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");

29}

30

32 : S(Stmt), D(D), StartIndex(0), EndIndex(0) {}

33

35 : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {}

36

38

39

41 return false;

42

44

45

46

47 bool StartIsInBounds =

50 if (!StartIsInBounds)

51 return false;

52

53 bool EndIsInBounds =

54 SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) ||

56 return EndIsInBounds;

57}

58

61 return &S;

62 }

63 auto CS = cast(S);

64 return CS->body_begin() + StartIndex;

65}

66

70 }

71 auto CS = cast(S);

72 return CS->body_begin() + EndIndex;

73}

74

76 assert(D);

78}

79

82}

83

85

88}

89

91 assert(D);

93

95}

96

97

98

102 if (Seq.contains(GroupSeq))

103 return true;

104 }

105 return false;

106}

107

108

109

112

113

114

115

116 if (Group.size() < OtherGroup.size())

117 return false;

118

121 return false;

122 }

123 return true;

124}

125

127 std::vectorCloneDetector::CloneGroup &Result) {

128 std::vector IndexesToRemove;

129

130

131

132

133

134 for (unsigned i = 0; i < Result.size(); ++i) {

135 for (unsigned j = 0; j < Result.size(); ++j) {

136

137 if (i == j)

138 continue;

139

141 IndexesToRemove.push_back(i);

142 break;

143 }

144 }

145 }

146

147

148

149

150 for (unsigned I : llvm::reverse(IndexesToRemove))

152}

153

158 return false;

159

162 StringRef Filename = llvm::sys::path::filename(

163 SM.getFilename(S.getContainingDecl()->getLocation()));

165 return true;

166 }

167

168 return false;

169}

170

171

172

173

174

175

176

177

178namespace {

179template

180class CloneTypeIIStmtDataCollector

183

184 T &DataConsumer;

185

186 template void addData(const Ty &Data) {

188 }

189

190public:

191 CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context,

192 T &DataConsumer)

193 : Context(Context), DataConsumer(DataConsumer) {

194 this->Visit(S);

195 }

196

197

198

199

200#define DEF_ADD_DATA(CLASS, CODE) \

201 template <class = void> void Visit##CLASS(const CLASS *S) { \

202 CODE; \

203 ConstStmtVisitor<CloneTypeIIStmtDataCollector>::Visit##CLASS(S); \

204 }

205

206#include "clang/AST/StmtDataCollectors.inc"

207

208

209#define SKIP(CLASS) \

210 void Visit##CLASS(const CLASS *S) { \

211 ConstStmtVisitor<CloneTypeIIStmtDataCollector>::Visit##CLASS(S); \

212 }

220#undef SKIP

221};

222}

223

225 size_t HashCode;

226

227

228 llvm::MD5::MD5Result HashResult;

229 Hash.final(HashResult);

230

231

232

233 std::memcpy(&HashCode, &HashResult,

234 std::min(sizeof(HashCode), sizeof(HashResult)));

235

236 return HashCode;

237}

238

239

240

241

242

243

244

245

246

247

248static size_t

250 std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {

251 llvm::MD5 Hash;

253

254 CloneTypeIIStmtDataCollectorllvm::MD5(S, Context, Hash);

255

256 auto CS = dyn_cast(S);

258

259 for (const Stmt *Child : S->children()) {

260 if (Child == nullptr) {

261 ChildHashes.push_back(0);

262 continue;

263 }

264 size_t ChildHash = saveHash(Child, D, StmtsByHash);

265 Hash.update(

266 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));

267 ChildHashes.push_back(ChildHash);

268 }

269

270 if (CS) {

271

272

273

274 for (unsigned Pos = 0; Pos < CS->size(); ++Pos) {

275

276

277

278 llvm::MD5 Hash;

279 for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) {

280

281

282 size_t ChildHash = ChildHashes[Pos + Length - 1];

283 Hash.update(

284 StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash)));

285

286

287 if (Length > 1) {

288 llvm::MD5 SubHash = Hash;

289 StmtsByHash.push_back(std::make_pair(

291 }

292 }

293 }

294 }

295

297 StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D)));

298 return HashCode;

299}

300

301namespace {

302

303

304class FoldingSetNodeIDWrapper {

305

306 llvm::FoldingSetNodeID &FS;

307

308public:

309 FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {}

310

311 void update(StringRef Str) { FS.AddString(Str); }

312};

313}

314

315

316

318 FoldingSetNodeIDWrapper &OutputData) {

319 for (const Stmt *S : Sequence) {

320 CloneTypeIIStmtDataCollector(

322

323 for (const Stmt *Child : S->children()) {

324 if (!Child)

325 continue;

326

328 OutputData);

329 }

330 }

331}

332

333

336

337

338

339

340 llvm::FoldingSetNodeID DataLHS, DataRHS;

341 FoldingSetNodeIDWrapper LHSWrapper(DataLHS);

342 FoldingSetNodeIDWrapper RHSWrapper(DataRHS);

343

346

347 return DataLHS == DataRHS;

348}

349

351 std::vectorCloneDetector::CloneGroup &Sequences) {

352

353 std::vectorCloneDetector::CloneGroup Result;

354

356

357

358 if (Group.empty())

359 continue;

360

361 std::vector<std::pair<size_t, StmtSequence>> StmtsByHash;

362

363

365 saveHash(S.front(), S.getContainingDecl(), StmtsByHash);

366 }

367

368

369 llvm::stable_sort(StmtsByHash, llvm::less_first());

370

371

372

373

374

375 for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) {

376 const auto Current = StmtsByHash[i];

377

378

379

380

382

383 size_t PrototypeHash = Current.first;

384

385 for (; i < StmtsByHash.size(); ++i) {

386

387 if (PrototypeHash != StmtsByHash[i].first) {

388

389

390

391

392 assert(i != 0);

393 --i;

394 break;

395 }

396

397

398 NewGroup.push_back(StmtsByHash[i].second);

399 }

400

401

402

403 Result.push_back(NewGroup);

404 }

405 }

406

408}

409

411 std::vectorCloneDetector::CloneGroup &Sequences) {

415 });

416}

417

420 const std::string &ParentMacroStack) {

421 if (Seq.empty())

422 return 0;

423

424 size_t Complexity = 1;

425

427

428

429 std::string MacroStack =

431

432

433

434

435

436

437

438

439

440 if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) {

441 Complexity = 0;

442 }

443

444

445

446 if (Seq.holdsSequence()) {

447 for (const Stmt *S : Seq) {

449 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);

450 if (Complexity >= Limit)

451 return Limit;

452 }

453 } else {

454 for (const Stmt *S : Seq.front()->children()) {

456 StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack);

457 if (Complexity >= Limit)

458 return Limit;

459 }

460 }

461 return Complexity;

462}

463

465 std::vectorCloneDetector::CloneGroup &CloneGroups) {

471 });

472}

473

475 std::vectorCloneDetector::CloneGroup &CloneGroups,

477 Compare) {

478 std::vectorCloneDetector::CloneGroup Result;

479 for (auto &HashGroup : CloneGroups) {

480

481

482 std::vector Indexes;

483 Indexes.resize(HashGroup.size());

484

485 for (unsigned i = 0; i < HashGroup.size(); ++i) {

486

487 if (Indexes[i])

488 continue;

489

490

491

492

493

496 ++Indexes[i];

497

498

499 for (unsigned j = i + 1; j < HashGroup.size(); ++j) {

500

501 if (Indexes[j])

502 continue;

503

504

505 const StmtSequence &Candidate = HashGroup[j];

506

507 if (!Compare(Prototype, Candidate))

508 continue;

509

510 PotentialGroup.push_back(Candidate);

511

512 ++Indexes[j];

513 }

514

515

516

517 Result.push_back(PotentialGroup);

518 }

519

520 assert(llvm::all_of(Indexes, [](char c) { return c == 1; }));

521 }

522 CloneGroups = Result;

523}

524

525void VariablePattern::addVariableOccurence(const VarDecl *VarDecl,

526 const Stmt *Mention) {

527

528 for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {

529 if (Variables[KindIndex] == VarDecl) {

530

531

532 Occurences.emplace_back(KindIndex, Mention);

533 return;

534 }

535 }

536

537

538 Occurences.emplace_back(Variables.size(), Mention);

539 Variables.push_back(VarDecl);

540}

541

542void VariablePattern::addVariables(const Stmt *S) {

543

544

545

546 if (!S)

547 return;

548

549

550 if (auto D = dyn_cast(S)) {

551 if (auto VD = dyn_cast(D->getDecl()->getCanonicalDecl()))

552 addVariableOccurence(VD, D);

553 }

554

555

556 for (const Stmt *Child : S->children()) {

557 addVariables(Child);

558 }

559}

560

564 unsigned NumberOfDifferences = 0;

565

566 assert(Other.Occurences.size() == Occurences.size());

567 for (unsigned i = 0; i < Occurences.size(); ++i) {

568 auto ThisOccurence = Occurences[i];

569 auto OtherOccurence = Other.Occurences[i];

570 if (ThisOccurence.KindID == OtherOccurence.KindID)

571 continue;

572

573 ++NumberOfDifferences;

574

575

576

577 if (FirstMismatch == nullptr)

578 continue;

579

580

581

582 if (NumberOfDifferences != 1)

583 continue;

584

585 const VarDecl *FirstSuggestion = nullptr;

586

587

588

589 if (OtherOccurence.KindID < Variables.size())

590 FirstSuggestion = Variables[OtherOccurence.KindID];

591

592

595 Variables[ThisOccurence.KindID], ThisOccurence.Mention,

596 FirstSuggestion);

597

598

599

600

601 const VarDecl *SecondSuggestion = nullptr;

602 if (ThisOccurence.KindID < Other.Variables.size())

603 SecondSuggestion = Other.Variables[ThisOccurence.KindID];

604

605

608 Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention,

609 SecondSuggestion);

610

611

612

613

614

615

618

619

621 }

622

623 return NumberOfDifferences;

624}

static bool containsAnyInGroup(StmtSequence &Seq, CloneDetector::CloneGroup &Group)

Returns true if and only if Stmt contains at least one other sequence in the Group.

static size_t createHash(llvm::MD5 &Hash)

static void CollectStmtSequenceData(const StmtSequence &Sequence, FoldingSetNodeIDWrapper &OutputData)

Writes the relevant data from all statements and child statements in the given StmtSequence into the ...

static size_t saveHash(const Stmt *S, const Decl *D, std::vector< std::pair< size_t, StmtSequence > > &StmtsByHash)

Generates and saves a hash code for the given Stmt.

static bool containsGroup(CloneDetector::CloneGroup &Group, CloneDetector::CloneGroup &OtherGroup)

Returns true if and only if all sequences in OtherGroup are contained by a sequence in Group.

static bool areSequencesClones(const StmtSequence &LHS, const StmtSequence &RHS)

Returns true if both sequences are clones of each other.

This file defines classes for searching and analyzing source code clones.

This file declares helper methods for collecting data from AST nodes.

Defines the C++ template declaration subclasses.

Defines the SourceManager interface.

__device__ __2f16 float c

Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...

SourceManager & getSourceManager()

A boolean literal, per ([C++ lex.bool] Boolean literals).

static void splitCloneGroups(std::vector< CloneDetector::CloneGroup > &CloneGroups, llvm::function_ref< bool(const StmtSequence &, const StmtSequence &)> Compare)

Splits the given CloneGroups until the given Compare function returns true for all clones in a single...

void analyzeCodeBody(const Decl *D)

Generates and stores search data for all statements in the body of the given Decl.

CompoundStmt - This represents a group of statements like { stmt stmt }.

ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.

A reference to a declared variable, function, enum, etc.

Decl - This represents one declaration (or definition), e.g.

ASTContext & getASTContext() const LLVM_READONLY

virtual Stmt * getBody() const

getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...

virtual bool hasBody() const

Returns true if this Decl represents a declaration for a body of code, such as a function or method d...

virtual Decl * getCanonicalDecl()

Retrieves the "canonical" declaration of the given declaration.

MemberExpr - [C99 6.5.2.3] Structure and Union Members.

size_t calculateStmtComplexity(const StmtSequence &Seq, std::size_t Limit, const std::string &ParentMacroStack="")

Calculates the complexity of the given StmtSequence.

void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)

void constrain(std::vector< CloneDetector::CloneGroup > &Sequences)

ASTContext & getASTContext() const

Encodes a location in the source.

This class handles loading and caching of source files into memory.

A trivial tuple used to represent a source range.

Identifies a list of statements.

bool contains(const StmtSequence &Other) const

Returns true if and only if this sequence covers a source range that contains the source range of the...

iterator begin() const

Returns an iterator pointing to the first statement in this sequence.

const Stmt *const * iterator

const Decl * getContainingDecl() const

Returns the declaration that contains the stored Stmts.

StmtSequence()

Constructs an empty StmtSequence.

ASTContext & getASTContext() const

Returns the related ASTContext for the stored Stmts.

unsigned size() const

Returns the number of statements this object holds.

iterator end() const

Returns an iterator pointing behind the last statement in this sequence.

SourceLocation getEndLoc() const

Returns the end sourcelocation of the last statement in this sequence.

const Stmt * front() const

Returns the first statement in this sequence.

bool holdsSequence() const

Returns true if this objects holds a list of statements.

SourceLocation getBeginLoc() const

Returns the start sourcelocation of the first statement in this sequence.

SourceRange getSourceRange() const

Returns the source range of the whole sequence - from the beginning of the first statement to the end...

const Stmt * back() const

Returns the last statement in this sequence.

Stmt - This represents one statement.

SourceLocation getEndLoc() const LLVM_READONLY

SourceLocation getBeginLoc() const LLVM_READONLY

StringLiteral - This represents a string literal expression, e.g.

Represents a variable declaration or definition.

Analyzes the pattern of the referenced variables in a statement.

unsigned countPatternDifferences(const VariablePattern &Other, VariablePattern::SuspiciousClonePair *FirstMismatch=nullptr)

Counts the differences between this pattern and the given one.

void addDataToConsumer(T &DataConsumer, llvm::StringRef Str)

Utility functions for implementing addData() for a consumer that has a method update(StringRef)

std::string getMacroStack(SourceLocation Loc, ASTContext &Context)

Returns a string that represents all macro expansions that expanded into the given SourceLocation.

The JSON file list parser is used to communicate input to InstallAPI.

@ Seq

'seq' clause, allowed on 'loop' and 'routine' directives.

@ Result

The result type of a method or function.

const FunctionProtoType * T

@ Other

Other implicit parameter.

std::shared_ptr< llvm::Regex > IgnoredFilesRegex

bool isAutoGenerated(const CloneDetector::CloneGroup &Group)

StringRef IgnoredFilesPattern

void constrain(std::vector< CloneDetector::CloneGroup > &CloneGroups)

void constrain(std::vector< CloneDetector::CloneGroup > &Result)

Utility class holding the relevant information about a single clone in this pair.

const VarDecl * Suggestion

The variable that should have been referenced to follow the pattern.

Describes two clones that reference their variables in a different pattern which could indicate a pro...

SuspiciousCloneInfo SecondCloneInfo

This other clone in the pair which can have a suggested variable.

SuspiciousCloneInfo FirstCloneInfo

The first clone in the pair which always has a suggested variable.