clang: include/clang/Lex/Lexer.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13#ifndef LLVM_CLANG_LEX_LEXER_H

14#define LLVM_CLANG_LEX_LEXER_H

15

22#include "llvm/ADT/SmallVector.h"

23#include "llvm/ADT/StringRef.h"

24#include

25#include

26#include

27#include

28

29namespace llvm {

30

31class MemoryBufferRef;

32

33}

34

36

37class DiagnosticBuilder;

38class Preprocessor;

39class SourceManager;

40class LangOptions;

41

42

43

45

47

48

49

51

52

53

56

57

58

59

61

63

64

65

66

67

69

72};

73

74

75

76

77

80

81 void anchor() override;

82

83

84

85

86

87 const char *BufferStart;

88

89

90 const char *BufferEnd;

91

92

94

95

96

97

98

99

100

101

102

104

105

106 bool LineComment;

107

108

109 bool Is_PragmaLexer;

110

111

112

113

114

115

116

117

118

119

120

121

122 unsigned char ExtendedTokenMode;

123

124

125

126

127

128

129

130

131 const char *BufferPtr;

132

133

134

135 bool IsAtStartOfLine;

136

137 bool IsAtPhysicalStartOfLine;

138

139 bool HasLeadingSpace;

140

141 bool HasLeadingEmptyMacro;

142

143

144 bool IsFirstTimeLexingFile;

145

146

147

148 const char *NewLinePtr;

149

150

152

153

155

156

157

158 unsigned NextDepDirectiveTokenIndex = 0;

159

160 void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);

161

162public:

163

164

165

166

168 bool IsFirstIncludeOfFile = true);

169

170

171

172

174 const char *BufStart, const char *BufPtr, const char *BufEnd,

175 bool IsFirstIncludeOfFile = true);

176

177

178

179

180 Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,

182 bool IsFirstIncludeOfFile = true);

183

186

187

188

189

194

195

196

197

198

200

201

202

204

205private:

206

207 bool LexDependencyDirectiveToken(Token &Result);

208

209

210

211 bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);

212

213

214

215 bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }

216

217

218

219

220 const char *convertDependencyDirectiveToken(

221 const dependency_directives_scan::Token &DDTok, Token &Result);

222

223public:

224

226

227private:

228

229

231

232public:

233

234

235

237 assert(LexingRawMode && "Not already in raw mode!");

239

240

241 return BufferPtr == BufferEnd;

242 }

243

244

245

246

247

249 return ExtendedTokenMode > 1;

250 }

251

252

253

255 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&

256 "Can only retain whitespace in raw mode or -traditional-cpp");

257 ExtendedTokenMode = Val ? 2 : 0;

258 }

259

260

261

263 return ExtendedTokenMode > 0;

264 }

265

266

267

268

271 "Can't play with comment retention state when retaining whitespace");

272 ExtendedTokenMode = Mode ? 1 : 0;

273 }

274

275

276

277

278

279

280

282

283

285 return StringRef(BufferStart, BufferEnd - BufferStart);

286 }

287

288

289

291

292

293

294

296

297

298

300

301

302

305 }

306

307

309

310

312 assert(BufferPtr >= BufferStart && "Invalid buffer state");

313 return BufferPtr - BufferStart;

314 }

315

316

317 void seek(unsigned Offset, bool IsAtStartOfLine);

318

319

320

321

322 static std::string Stringify(StringRef Str, bool Charify = false);

323

324

325

327

328

329

330

331

332

333

334

335

336

337

338 static unsigned getSpelling(const Token &Tok, const char *&Buffer,

341 bool *Invalid = nullptr);

342

343

344

345

346

347

351 bool *Invalid = nullptr);

352

353

354

355

356

357

358

359

360

365 bool *invalid = nullptr);

366

367

368

369

370

374

375

376

380 bool IgnoreWhiteSpace = false);

381

382

383

384

388

389

390

392 unsigned CharNo,

395

396

397

398

400 unsigned Characters,

405 }

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

425

426

427

428

429

436 Range.getBegin(), End);

437 }

441 return Range.isTokenRange()

444 }

445

446

447

448

449

450

455

456

457

458

459

460

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

495

496

500 bool *Invalid = nullptr);

501

502

503

504

505

506

507

508

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

550 unsigned MaxLines = 0);

551

552

553

554

558 bool IncludeComments = false);

559

560

564 bool IncludeComments);

565

566

567

568

569

570

575 bool SkipTrailingWhitespaceAndNewLine);

576

577

580

581

582

583 static bool isNewLineEscaped(const char *BufferStart, const char *Str);

584

585

589 };

590

591

592

595

596

597 if (isObviouslySimpleCharacter(Ptr[0])) {

598 return {*Ptr, 1u};

599 }

600

601 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);

602 }

603

604

605

608

609

611

612private:

613

614

615

616

617

618

619 bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);

620

621 bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);

622

623 bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);

624

625

626

627

628

629

630 void FormTokenWithChars(Token &Result, const char *TokEnd,

632 unsigned TokLen = TokEnd-BufferPtr;

633 Result.setLength(TokLen);

636 BufferPtr = TokEnd;

637 }

638

639

640

641

642 unsigned isNextPPTokenLParen();

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666 static bool isObviouslySimpleCharacter(char C) {

667 return C != '?' && C != '\\';

668 }

669

670

671

672

673

674 inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {

675

676

677 if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;

678

679 auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);

681 return C;

682 }

683

684

685

686

687

688 const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {

689

690 if (Size == 1)

691 return Ptr+Size;

692

693

694

695 return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;

696 }

697

698

699

700

701

702 inline char getCharAndSize(const char *Ptr, unsigned &Size) {

703

704

705 if (isObviouslySimpleCharacter(Ptr[0])) {

707 return *Ptr;

708 }

709

710 auto CharAndSize = getCharAndSizeSlow(Ptr);

711 Size = CharAndSize.Size;

712 return CharAndSize.Char;

713 }

714

715

716

717 SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);

718

719

720

721

722 static unsigned getEscapedNewLineSize(const char *P);

723

724

725

726

727 static const char *SkipEscapedNewLines(const char *P);

728

729

730

731 static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,

732 const LangOptions &LangOpts);

733

734

735

736

737 void SetByteOffset(unsigned Offset, bool StartOfLine);

738

739 void PropagateLineStartLeadingSpaceInfo(Token &Result);

740

741 const char *LexUDSuffix(Token &Result, const char *CurPtr,

742 bool IsStringLiteral);

743

744

745

746

747

748 bool LexIdentifierContinue(Token &Result, const char *CurPtr);

749

750 bool LexNumericConstant (Token &Result, const char *CurPtr);

751 bool LexStringLiteral (Token &Result, const char *CurPtr,

753 bool LexRawStringLiteral (Token &Result, const char *CurPtr,

755 bool LexAngledStringLiteral(Token &Result, const char *CurPtr);

756 bool LexCharConstant (Token &Result, const char *CurPtr,

758 bool LexEndOfFile (Token &Result, const char *CurPtr);

759 bool SkipWhitespace (Token &Result, const char *CurPtr,

760 bool &TokAtPhysicalStartOfLine);

761 bool SkipLineComment (Token &Result, const char *CurPtr,

762 bool &TokAtPhysicalStartOfLine);

763 bool SkipBlockComment (Token &Result, const char *CurPtr,

764 bool &TokAtPhysicalStartOfLine);

765 bool SaveLineComment (Token &Result, const char *CurPtr);

766

767 bool IsStartOfConflictMarker(const char *CurPtr);

768 bool HandleEndOfConflictMarker(const char *CurPtr);

769

770 bool lexEditorPlaceholder(Token &Result, const char *CurPtr);

771

772 bool isCodeCompletionPoint(const char *CurPtr) const;

773 void cutOffLexing() { BufferPtr = BufferEnd; }

774

775 bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);

776

777 void codeCompleteIncludedFile(const char *PathStart,

778 const char *CompletionPoint, bool IsAngled);

779

780 std::optional<uint32_t>

781 tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);

782 std::optional<uint32_t> tryReadNamedUCN(const char *&StartPtr,

783 const char *SlashLoc, Token *Result);

784

785

786

787

788

789

790

791

792

793

794

795

796

797 uint32_t tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);

798

799

800

801

802

803

804

805

806

807

808

809

810 bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,

812

813

814

815

816

817

818

819 bool tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result);

820};

821

822}

823

824#endif

enum clang::sema::@1727::IndirectLocalPathEntry::EntryKind Kind

This is the interface for scanning header and source files to get the minimum necessary preprocessor ...

Defines the clang::LangOptions interface.

Defines the PreprocessorLexer interface.

Defines the clang::SourceLocation class and associated facilities.

Defines the clang::TokenKind enum and support functions.

__device__ __2f16 float c

Represents a character-granular source range.

static CharSourceRange getCharRange(SourceRange R)

A little helper class used to produce diagnostics.

An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...

Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...

Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.

static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)

Returns a string for the source that the range encompasses.

void SetKeepWhitespaceMode(bool Val)

SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.

static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)

Checks that the given token is the first token that occurs after the given location (this excludes co...

static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

bool LexFromRawLexer(Token &Result)

LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...

bool inKeepCommentMode() const

inKeepCommentMode - Return true if the lexer should return comments as tokens.

void SetCommentRetentionState(bool Mode)

SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.

static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)

Finds the token that comes before the given location.

void seek(unsigned Offset, bool IsAtStartOfLine)

Set the lexer's buffer pointer to Offset.

static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)

ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.

static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Given a token range, produce a corresponding CharSourceRange that is not a token range.

static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)

Returns true if the given MacroID location points at the first token of the macro expansion.

static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)

AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...

DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const

Diag - Forwarding function for diagnostics.

StringRef getBuffer() const

Gets source code buffer.

const char * getBufferLocation() const

Return the current location in the buffer.

bool Lex(Token &Result)

Lex - Return the next token in the file.

bool isPragmaLexer() const

isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.

static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)

Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...

static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)

Returns true if the given MacroID location points at the last token of the macro expansion.

SourceLocation getSourceLocation() override

getSourceLocation - Return a source location for the next character in the current file.

static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Accepts a range and returns a character range with file locations.

unsigned getCurrentBufferOffset()

Returns the current lexing offset.

static bool isNewLineEscaped(const char *BufferStart, const char *Str)

Checks whether new line pointed by Str is preceded by escape sequence.

SourceLocation getFileLoc() const

getFileLoc - Return the File Location for the file we are lexing out of.

static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)

Returns the leading whitespace for line that corresponds to the given location Loc.

static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)

getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...

Lexer & operator=(const Lexer &)=delete

bool isKeepWhitespaceMode() const

isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...

static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)

Returns true if the given character could appear in an identifier.

static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)

Finds the token that comes right after the given location.

static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...

static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Given a location any where in a source buffer, find the location that corresponds to the beginning of...

void resetExtendedTokenMode()

Sets the extended token mode back to its initial value, according to the language options and preproc...

static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)

Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.

static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)

Compute the preamble of the given file.

Lexer(const Lexer &)=delete

static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)

Relex the token at the specified location.

static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)

Computes the source location just past the end of the token at this source location.

static std::string Stringify(StringRef Str, bool Charify=false)

Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...

static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)

getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.

bool isFirstTimeLexingFile() const

Check if this is the first time we're lexing the input file.

bool LexingRawMode

True if in raw mode.

const FileID FID

The SourceManager FileID corresponding to the file being lexed.

Engages in a tight little dance with the lexer to efficiently preprocess tokens.

Encodes a location in the source.

SourceLocation getLocWithOffset(IntTy Offset) const

Return a source location with the specified offset from this SourceLocation.

This class handles loading and caching of source files into memory.

A trivial tuple used to represent a source range.

Token - This structure provides full information about a lexed token.

TokenKind

Provides a simple uniform namespace for tokens from all C languages.

The JSON file list parser is used to communicate input to InstallAPI.

ConflictMarkerKind

ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.

@ CMK_Perforce

A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.

@ CMK_None

Not within a conflict marker.

@ CMK_Normal

A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...

@ Result

The result type of a method or function.

Diagnostic wrappers for TextAPI types for error reporting.

Represents a char and the number of bytes parsed to produce it.

Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...

unsigned Size

Size of the preamble in bytes.

bool PreambleEndsAtStartOfLine

Whether the preamble ends at the start of a new line.

PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)