clang: lib/ASTMatchers/Dynamic/Parser.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

19#include "llvm/ADT/StringRef.h"

20#include "llvm/Support/ErrorHandling.h"

21#include "llvm/Support/ManagedStatic.h"

22#include

23#include

24#include

25#include

26#include

27#include

28#include

29#include

30#include

31

33namespace ast_matchers {

34namespace dynamic {

35

36

38

52

53

56

58

63};

64

67

68

70public:

72 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {

73 NextToken = getNextToken();

74 }

75

77 unsigned CodeCompletionOffset)

78 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),

79 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {

80 NextToken = getNextToken();

81 }

82

83

85

86

89 NextToken = getNextToken();

90 return ThisToken;

91 }

92

95 NextToken = getNextToken();

96 return NextToken;

97 }

98

102 return NextToken;

104 }

105

107

108private:

110 consumeWhitespace();

112 Result.Range.Start = currentLocation();

113

114 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {

116 Result.Text = StringRef(CodeCompletionLocation, 0);

117 CodeCompletionLocation = nullptr;

119 }

120

121 if (Code.empty()) {

125 }

126

127 switch (Code[0]) {

128 case '#':

129 Code = Code.drop_until([](char c) { return c == '\n'; });

130 return getNextToken();

131 case ',':

133 Result.Text = Code.substr(0, 1);

134 Code = Code.drop_front();

135 break;

136 case '.':

138 Result.Text = Code.substr(0, 1);

139 Code = Code.drop_front();

140 break;

141 case '\n':

142 ++Line;

143 StartOfLine = Code.drop_front();

145 Result.Text = Code.substr(0, 1);

146 Code = Code.drop_front();

147 break;

148 case '(':

150 Result.Text = Code.substr(0, 1);

151 Code = Code.drop_front();

152 break;

153 case ')':

155 Result.Text = Code.substr(0, 1);

156 Code = Code.drop_front();

157 break;

158

159 case '"':

160 case '\'':

161

162 consumeStringLiteral(&Result);

163 break;

164

165 case '0': case '1': case '2': case '3': case '4':

166 case '5': case '6': case '7': case '8': case '9':

167

168 consumeNumberLiteral(&Result);

169 break;

170

171 default:

173

174 size_t TokenLength = 1;

175 while (true) {

176

177

178

179 if (CodeCompletionLocation == Code.data() + TokenLength) {

180 CodeCompletionLocation = nullptr;

182 Result.Text = Code.substr(0, TokenLength);

183 Code = Code.drop_front(TokenLength);

185 }

186 if (TokenLength == Code.size() || isAlphanumeric(Code[TokenLength]))

187 break;

188 ++TokenLength;

189 }

190 if (TokenLength == 4 && Code.starts_with("true")) {

193 } else if (TokenLength == 5 && Code.starts_with("false")) {

195 Result.Value = false;

196 } else {

198 Result.Text = Code.substr(0, TokenLength);

199 }

200 Code = Code.drop_front(TokenLength);

201 } else {

203 Result.Text = Code.substr(0, 1);

204 Code = Code.drop_front(1);

205 }

206 break;

207 }

208

209 Result.Range.End = currentLocation();

211 }

212

213

214 void consumeNumberLiteral(TokenInfo *Result) {

215 bool isFloatingLiteral = false;

216 unsigned Length = 1;

217 if (Code.size() > 1) {

218

220 case 'x': case 'b': Length = 2;

221 }

222 }

223 while (Length < Code.size() && isHexDigit(Code[Length]))

224 ++Length;

225

226

227 while (Length < Code.size()) {

228 char c = Code[Length];

229 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {

230 isFloatingLiteral = true;

231 Length++;

232 } else {

233 break;

234 }

235 }

236

237 Result->Text = Code.substr(0, Length);

238 Code = Code.drop_front(Length);

239

240 if (isFloatingLiteral) {

241 char *end;

242 errno = 0;

243 std::string Text = Result->Text.str();

244 double doubleValue = strtod(Text.c_str(), &end);

245 if (*end == 0 && errno == 0) {

247 Result->Value = doubleValue;

248 return;

249 }

250 } else {

252 if (Result->Text.getAsInteger(0, Value)) {

255 return;

256 }

257 }

258

259 SourceRange Range;

261 Range.End = currentLocation();

264 }

265

266

267

268

269

270 void consumeStringLiteral(TokenInfo *Result) {

271 bool InEscape = false;

272 const char Marker = Code[0];

273 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {

274 if (InEscape) {

275 InEscape = false;

276 continue;

277 }

278 if (Code[Length] == '\\') {

279 InEscape = true;

280 continue;

281 }

282 if (Code[Length] == Marker) {

284 Result->Text = Code.substr(0, Length + 1);

285 Result->Value = Code.substr(1, Length - 1);

286 Code = Code.drop_front(Length + 1);

287 return;

288 }

289 }

290

291 StringRef ErrorText = Code;

292 Code = Code.drop_front(Code.size());

293 SourceRange Range;

295 Range.End = currentLocation();

298 }

299

300

301 void consumeWhitespace() {

302

303 Code = Code.ltrim(" \t\v\f\r");

304 }

305

306 SourceLocation currentLocation() {

307 SourceLocation Location;

308 Location.Line = Line;

309 Location.Column = Code.data() - StartOfLine.data() + 1;

310 return Location;

311 }

312

313 StringRef &Code;

314 StringRef StartOfLine;

315 unsigned Line = 1;

316 Diagnostics *Error;

317 TokenInfo NextToken;

318 const char *CodeCompletionLocation = nullptr;

319};

320

322

324 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {

325 return {};

326}

327

328std::vector

330 return {};

331}

332

335

337 P->ContextStack.push_back(std::make_pair(C, 0u));

338 }

339

341 P->ContextStack.pop_back();

342 }

343

345 ++P->ContextStack.back().second;

346 }

347};

348

349

350

351

352

353

356

358

360 NamedValues ? NamedValues->lookup(NameToken.Text)

362

364 *Value = NamedValue;

365 return true;

366 }

367

368 std::string BindID;

372 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));

373 return false;

374 }

375

379 Error->addError(ChainCallToken.Range,

381 return false;

382 }

384

386 NameToken.Text, NameToken.Range);

387

388 Error->addError(ChainCallToken.Range,

390 return false;

391 }

392 if (!parseBindID(BindID))

393 return false;

394

395 assert(NamedValue.isMatcher());

396 std::optional Result =

397 NamedValue.getMatcher().getSingleMatcher();

399 std::optional Bound = Result->tryBind(BindID);

400 if (Bound) {

402 return true;

403 }

404 }

405 return false;

406 }

407

411 << "NewLine";

412 return false;

413 }

414

415

416

423 << NameToken.Text;

424 return false;

425 }

426

427 }

428

430

435 << OpenToken.Text;

436 return false;

437 }

438

439 std::optional Ctor = S->lookupMatcherCtor(NameToken.Text);

440

441

442 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);

443}

444

445bool Parser::parseBindID(std::string &BindID) {

446

450

451

452

455 return false;

456 }

459 return false;

460 }

463 return false;

464 }

465 BindID = IDToken.Value.getString();

466 return true;

467}

468

469bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,

470 const TokenInfo &OpenToken,

471 VariantValue *Value) {

472 std::vector Args;

473 TokenInfo EndToken;

474

476

477 {

478 ScopedContextEntry SCE(this, Ctor);

479

482

484 break;

485 }

486 if (!Args.empty()) {

487

491 << CommaToken.Text;

492 return false;

493 }

494 }

495

497 NameToken.Text, NameToken.Range,

498 Args.size() + 1);

499 ParserValue ArgValue;

501

503 addExpressionCompletions();

504 return false;

505 }

506

508

511 << NameToken.Text;

512 return false;

513 }

514

515 ArgValue.Text = NodeMatcherToken.Text;

516 ArgValue.Range = NodeMatcherToken.Range;

517

518 std::optional MappedMatcher =

520

521 if (!MappedMatcher) {

522 Error->addError(NodeMatcherToken.Range,

524 << NodeMatcherToken.Text;

525 return false;

526 }

527

529

530 if (NK.isNone()) {

531 Error->addError(NodeMatcherToken.Range,

533 << NodeMatcherToken.Text;

534 return false;

535 }

536

537 ArgValue.Value = NK;

538

540 Args.push_back(ArgValue);

541

542 SCE.nextArg();

543 }

544 }

545

548 return false;

549 }

550

551 internal::MatcherDescriptorPtr BuiltCtor =

553

554 if (!BuiltCtor.get()) {

556 << NameToken.Text;

557 return false;

558 }

559

560 std::string BindID;

565 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));

566 addCompletion(ChainCallToken, MatcherCompletion("with(", "with", 1));

567 return false;

568 }

572 Error->addError(ChainCallToken.Range,

574 return false;

575 }

577 if (!parseBindID(BindID))

578 return false;

580 NameToken.Text, NameToken.Range);

581 SourceRange MatcherRange = NameToken.Range;

582 MatcherRange.End = ChainCallToken.Range.End;

584 BuiltCtor.get(), MatcherRange, BindID, {}, Error);

586 return false;

587

589 return true;

592

595 ? StringRef("EOF")

596 : Tokenizer->peekNextToken().Text;

599 << ErrTxt;

600 return false;

601 }

602

604

605 return parseMatcherExpressionImpl(NameToken, WithOpenToken,

606 BuiltCtor.get(), Value);

607 }

608 }

609

611 NameToken.Text, NameToken.Range);

612 SourceRange MatcherRange = NameToken.Range;

613 MatcherRange.End = EndToken.Range.End;

615 BuiltCtor.get(), MatcherRange, BindID, {}, Error);

617 return false;

618

620 return true;

621}

622

623

624

625

626

627bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,

628 const TokenInfo &OpenToken,

629 std::optional Ctor,

630 VariantValue *Value) {

631 if (!Ctor) {

633 << NameToken.Text;

634

635 }

636

638 return parseMatcherBuilder(*Ctor, NameToken, OpenToken, Value);

639

640 std::vector Args;

641 TokenInfo EndToken;

642

644

645 {

646 ScopedContextEntry SCE(this, Ctor.value_or(nullptr));

647

650

652 break;

653 }

654 if (!Args.empty()) {

655

659 << CommaToken.Text;

660 return false;

661 }

662 }

663

665 NameToken.Text, NameToken.Range,

666 Args.size() + 1);

667 ParserValue ArgValue;

671 if (!parseExpressionImpl(&ArgValue.Value)) {

672 return false;

673 }

674

676 Args.push_back(ArgValue);

677 SCE.nextArg();

678 }

679 }

680

683 return false;

684 }

685

686 std::string BindID;

691 addCompletion(ChainCallToken, MatcherCompletion("bind(\"", "bind", 1));

692 return false;

693 }

694

696 Error->addError(ChainCallToken.Range,

698 return false;

699 }

701

703 NameToken.Text, NameToken.Range);

704

705 Error->addError(ChainCallToken.Range,

707 return false;

708 }

710 Error->addError(ChainCallToken.Range,

712 return false;

713 }

714 if (!parseBindID(BindID))

715 return false;

716 }

717

718 if (!Ctor)

719 return false;

720

721

723 NameToken.Text, NameToken.Range);

724 SourceRange MatcherRange = NameToken.Range;

725 MatcherRange.End = EndToken.Range.End;

727 *Ctor, MatcherRange, BindID, Args, Error);

728 if (Result.isNull()) return false;

729

731 return true;

732}

733

734

735

736void Parser::addCompletion(const TokenInfo &CompToken,

737 const MatcherCompletion& Completion) {

738 if (StringRef(Completion.TypedText).starts_with(CompToken.Text) &&

739 Completion.Specificity > 0) {

740 Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),

741 Completion.MatcherDecl, Completion.Specificity);

742 }

743}

744

745std::vector Parser::getNamedValueCompletions(

746 ArrayRef AcceptedTypes) {

747 if (!NamedValues) return std::vector();

748 std::vector Result;

749 for (const auto &Entry : *NamedValues) {

750 unsigned Specificity;

751 if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {

752 std::string Decl =

753 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();

754 Result.emplace_back(Entry.getKey(), Decl, Specificity);

755 }

756 }

758}

759

760void Parser::addExpressionCompletions() {

763

764

765

766 for (ContextStackTy::iterator I = ContextStack.begin(),

767 E = ContextStack.end();

768 I != E; ++I) {

769 if (!I->first)

770 return;

771 }

772

775 addCompletion(CompToken, Completion);

776 }

777

778 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {

779 addCompletion(CompToken, Completion);

780 }

781}

782

783

784bool Parser::parseExpressionImpl(VariantValue *Value) {

788 return true;

789

791 return parseIdentifierPrefixImpl(Value);

792

794 addExpressionCompletions();

795 return false;

796

800 return false;

801

803

804 return false;

814 return false;

815 }

816

817 llvm_unreachable("Unknown token kind.");

818}

819

821

822Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,

825 NamedValues(NamedValues), Error(Error) {}

826

827Parser::RegistrySema::~RegistrySema() = default;

828

829std::optional

830Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {

831 return Registry::lookupMatcherCtor(MatcherName);

832}

833

837 if (BindID.empty()) {

838 return Registry::constructMatcher(Ctor, NameRange, Args, Error);

839 } else {

840 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,

841 Error);

842 }

843}

844

845std::vector Parser::RegistrySema::getAcceptedCompletionTypes(

846 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {

847 return Registry::getAcceptedCompletionTypes(Context);

848}

849

850std::vector Parser::RegistrySema::getMatcherCompletions(

852 return Registry::getMatcherCompletions(AcceptedTypes);

853}

854

855bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {

856 return Registry::isBuilderMatcher(Ctor);

857}

858

860 return Registry::nodeMatcherType(Ctor);

861}

862

867 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);

868}

869

870bool Parser::parseExpression(StringRef &Code, Sema *S,

874 if (Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))

875 return false;

876 auto NT = Tokenizer.peekNextToken();

877 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {

878 Error->addError(Tokenizer.peekNextToken().Range,

879 Error->ET_ParserTrailingCode);

880 return false;

881 }

882 return true;

883}

884

885std::vector

886Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,

889 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);

890 Parser P(&Tokenizer, S, NamedValues, &Error);

892 P.parseExpressionImpl(&Dummy);

893

894

895 llvm::sort(P.Completions,

897 if (A.Specificity != B.Specificity)

898 return A.Specificity > B.Specificity;

899 return A.TypedText < B.TypedText;

900 });

901

902 return P.Completions;

903}

904

905std::optional

906Parser::parseMatcherExpression(StringRef &Code, Sema *S,

910 if (!parseExpression(Code, S, NamedValues, &Value, Error))

911 return std::nullopt;

912 if (Value.isMatcher()) {

913 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);

914 return std::nullopt;

915 }

916 std::optional Result = Value.getMatcher().getSingleMatcher();

918 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)

919 << Value.getTypeAsString();

920 }

922}

923

924}

925}

926}

Simple matcher expression parser.

Diagnostics class to manage error messages.

Registry of all known matchers.

__device__ __2f16 float c

Sema - This implements semantic analysis and AST building for C.

Helper class to manage error messages.

ArgStream addError(SourceRange Range, ErrorType Error)

Add an error to the diagnostics.

@ ET_ParserMalformedBindExpr

@ ET_RegistryValueNotFound

@ ET_ParserFailedToBuildMatcher

@ ET_RegistryMatcherNotFound

@ ET_RegistryNonNodeMatcher

@ ET_ParserMalformedChainedExpr

@ ET_RegistryMatcherNoWithSupport

Simple tokenizer for the parser.

TokenInfo consumeNextTokenIgnoreNewlines()

CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, unsigned CodeCompletionOffset)

TokenInfo::TokenKind nextTokenKind() const

CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)

TokenInfo consumeNextToken()

Consumes and returns the next token.

const TokenInfo & peekNextToken() const

Returns but doesn't consume the next token.

Interface to connect the parser with the registry and more.

virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, ArrayRef< ParserValue > Args, Diagnostics *Error)=0

Process a matcher expression.

virtual internal::MatcherDescriptorPtr buildMatcherCtor(MatcherCtor, SourceRange NameRange, ArrayRef< ParserValue > Args, Diagnostics *Error) const =0

virtual std::optional< MatcherCtor > lookupMatcherCtor(StringRef MatcherName)=0

Look up a matcher by name.

virtual std::vector< ArgKind > getAcceptedCompletionTypes(llvm::ArrayRef< std::pair< MatcherCtor, unsigned > > Context)

Compute the list of completion types for Context.

virtual bool isBuilderMatcher(MatcherCtor) const =0

virtual std::vector< MatcherCompletion > getMatcherCompletions(llvm::ArrayRef< ArgKind > AcceptedTypes)

Compute the list of completions that match any of AcceptedTypes.

virtual ASTNodeKind nodeMatcherType(MatcherCtor) const =0

Matcher expression parser.

llvm::StringMap< VariantValue > NamedValueMap

A variant matcher object.

static VariantMatcher SingleMatcher(const DynTypedMatcher &Matcher)

Clones the provided matcher.

A smart (owning) pointer for MatcherDescriptor.

Matcher descriptor interface.

@ Decl

The l-value was an access to a declared entity or something equivalently strong, like the address of ...

static llvm::ManagedStatic< Parser::RegistrySema > DefaultRegistrySema

const internal::MatcherDescriptor * MatcherCtor

The JSON file list parser is used to communicate input to InstallAPI.

LLVM_READONLY char toLowercase(char c)

Converts the given ASCII character to its lowercase equivalent.

LLVM_READONLY bool isAlphanumeric(unsigned char c)

Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].

@ Result

The result type of a method or function.

LLVM_READONLY bool isHexDigit(unsigned char c)

Return true if this character is an ASCII hex digit: [0-9a-fA-F].

ScopedContextEntry(Parser *P, MatcherCtor C)

Simple structure to hold information for one token from the parser.

TokenKind

Different possible tokens.

static const char *const ID_Bind

Some known identifiers.

static const char *const ID_With