clang: include/clang/Lex/Lexer.h Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13#ifndef LLVM_CLANG_LEX_LEXER_H

14#define LLVM_CLANG_LEX_LEXER_H

15

22#include "llvm/ADT/SmallVector.h"

23#include "llvm/ADT/StringRef.h"

24#include

25#include

26#include

27#include

28

29namespace llvm {

30

31class MemoryBufferRef;

32

33}

34

36

41

42

43

45

47

48

49

51

52

53

55};

56

57

58

59

61

63

64

65

66

67

69

72};

73

74

75

76

77

80

81 void anchor() override;

82

83

84

85

86

87 const char *BufferStart;

88

89

90 const char *BufferEnd;

91

92

94

95

96

97

98

99

100

101

102

104

105

106 bool LineComment;

107

108

109 bool Is_PragmaLexer;

110

111

112

113

114

115

116

117

118

119

120

121

122 unsigned char ExtendedTokenMode;

123

124

125

126

127

128

129

130

131 const char *BufferPtr;

132

133

134

135 bool IsAtStartOfLine;

136

137 bool IsAtPhysicalStartOfLine;

138

139 bool HasLeadingSpace;

140

141 bool HasLeadingEmptyMacro;

142

143

144 bool IsFirstTimeLexingFile;

145

146

147

148 const char *NewLinePtr;

149

150

152

153

155

156

157

158 unsigned NextDepDirectiveTokenIndex = 0;

159

160 void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);

161

162public:

163

164

165

166

168 bool IsFirstIncludeOfFile = true);

169

170

171

172

174 const char *BufStart, const char *BufPtr, const char *BufEnd,

175 bool IsFirstIncludeOfFile = true);

176

177

178

179

180 Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,

182 bool IsFirstIncludeOfFile = true);

183

186

187

188

189

194

195

196

197

198

200

201

202

204

205private:

206

207 bool LexDependencyDirectiveToken(Token &Result);

208

209

210

211 bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);

212

213

214

215 bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }

216

217

218

219

220 const char *convertDependencyDirectiveToken(

221 const dependency_directives_scan::Token &DDTok, Token &Result);

222

223public:

224

226

227private:

228

229

231

232public:

233

234

235

237 assert(LexingRawMode && "Not already in raw mode!");

239

240

241 return BufferPtr == BufferEnd;

242 }

243

244

245

246

247

249 return ExtendedTokenMode > 1;

250 }

251

252

253

255 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&

256 "Can only retain whitespace in raw mode or -traditional-cpp");

257 ExtendedTokenMode = Val ? 2 : 0;

258 }

259

260

261

263 return ExtendedTokenMode > 0;

264 }

265

266

267

268

271 "Can't play with comment retention state when retaining whitespace");

272 ExtendedTokenMode = Mode ? 1 : 0;

273 }

274

275

276

277

278

279

280

282

283

285 return StringRef(BufferStart, BufferEnd - BufferStart);

286 }

287

288

289

291

292

293

294

296

297

298

300

301

302

306

307

309

310

312 assert(BufferPtr >= BufferStart && "Invalid buffer state");

313 return BufferPtr - BufferStart;

314 }

315

316

317 void seek(unsigned Offset, bool IsAtStartOfLine);

318

319

320

321

322 static std::string Stringify(StringRef Str, bool Charify = false);

323

324

325

327

328

329

330

331

332

333

334

335

336

337

341 bool *Invalid = nullptr);

342

343

344

345

346

347

351 bool *Invalid = nullptr);

352

353

354

355

356

357

358

359

360

365 bool *invalid = nullptr);

366

367

368

369

370

374

375

376

380 bool IgnoreWhiteSpace = false);

381

382

383

384

388

389

390

392 unsigned CharNo,

395

396

397

398

400 unsigned Characters,

405 }

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

425

426

427

428

429

436 Range.getBegin(), End);

437 }

441 return Range.isTokenRange()

443 : Range;

444 }

445

446

447

448

449

450

455

456

457

458

459

460

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

495

496

500 bool *Invalid = nullptr);

501

502

503

504

505

506

507

508

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

550 unsigned MaxLines = 0);

551

552

553

554

558 bool IncludeComments = false);

559

560

564 bool IncludeComments);

565

566

567

568

569

570

575 bool SkipTrailingWhitespaceAndNewLine);

576

577

580

581

582

583 static bool isNewLineEscaped(const char *BufferStart, const char *Str);

584

585

586

587

589

590

592 bool Named,

595

596

601

602

603

606

607

608 if (isObviouslySimpleCharacter(Ptr[0])) {

609 return {*Ptr, 1u};

610 }

611

612 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);

613 }

614

615

616

619

620

622

623private:

624

625

626

627

628

629

630 bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);

631

632 bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);

633

634 bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);

635

636

637

638

639

640

641 void FormTokenWithChars(Token &Result, const char *TokEnd,

643 unsigned TokLen = TokEnd-BufferPtr;

644 Result.setLength(TokLen);

646 Result.setKind(Kind);

647 BufferPtr = TokEnd;

648 }

649

650

651

652

653 std::optional peekNextPPToken();

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677 static bool isObviouslySimpleCharacter(char C) {

678 return C != '?' && C != '\\';

679 }

680

681

682

683

684

685 inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {

686

687

688 if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;

689

690 auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);

692 return C;

693 }

694

695

696

697

698

699 const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {

700

701 if (Size == 1)

702 return Ptr+Size;

703

704

705

706 return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;

707 }

708

709

710

711

712

713 inline char getCharAndSize(const char *Ptr, unsigned &Size) {

714

715

716 if (isObviouslySimpleCharacter(Ptr[0])) {

718 return *Ptr;

719 }

720

721 auto CharAndSize = getCharAndSizeSlow(Ptr);

722 Size = CharAndSize.Size;

723 return CharAndSize.Char;

724 }

725

726

727

728 SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);

729

730

731

732

733 static const char *SkipEscapedNewLines(const char *P);

734

735

736

737 static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,

738 const LangOptions &LangOpts);

739

740

741

742

743 void SetByteOffset(unsigned Offset, bool StartOfLine);

744

745 void PropagateLineStartLeadingSpaceInfo(Token &Result);

746

747 const char *LexUDSuffix(Token &Result, const char *CurPtr,

748 bool IsStringLiteral);

749

750

751

752

753

754 bool LexIdentifierContinue(Token &Result, const char *CurPtr);

755

756 bool LexNumericConstant (Token &Result, const char *CurPtr);

757 bool LexStringLiteral (Token &Result, const char *CurPtr,

759 bool LexRawStringLiteral (Token &Result, const char *CurPtr,

761 bool LexAngledStringLiteral(Token &Result, const char *CurPtr);

762 bool LexCharConstant (Token &Result, const char *CurPtr,

764 bool LexEndOfFile (Token &Result, const char *CurPtr);

765 bool SkipWhitespace (Token &Result, const char *CurPtr,

766 bool &TokAtPhysicalStartOfLine);

767 bool SkipLineComment (Token &Result, const char *CurPtr,

768 bool &TokAtPhysicalStartOfLine);

769 bool SkipBlockComment (Token &Result, const char *CurPtr,

770 bool &TokAtPhysicalStartOfLine);

771 bool SaveLineComment (Token &Result, const char *CurPtr);

772

773 bool IsStartOfConflictMarker(const char *CurPtr);

774 bool HandleEndOfConflictMarker(const char *CurPtr);

775

776 bool lexEditorPlaceholder(Token &Result, const char *CurPtr);

777

778 bool isCodeCompletionPoint(const char *CurPtr) const;

779 void cutOffLexing() { BufferPtr = BufferEnd; }

780

781 bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);

782

783 void codeCompleteIncludedFile(const char *PathStart,

784 const char *CompletionPoint, bool IsAngled);

785

786 std::optional<uint32_t>

787 tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);

788 std::optional<uint32_t> tryReadNamedUCN(const char *&StartPtr,

789 const char *SlashLoc, Token *Result);

790

791

792

793

794

795

796

797

798

799

800

801

802

803 uint32_t tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);

804

805

806

807

808

809

810

811

812

813

814

815

816 bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,

818

819

820

821

822

823

824

825 bool tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result);

826};

827

828}

829

830#endif

This is the interface for scanning header and source files to get the minimum necessary preprocessor ...

Defines the clang::LangOptions interface.

Defines the PreprocessorLexer interface.

Defines the clang::SourceLocation class and associated facilities.

Defines the clang::TokenKind enum and support functions.

__device__ __2f16 float c

Represents a character-granular source range.

static CharSourceRange getCharRange(SourceRange R)

A little helper class used to produce diagnostics.

Concrete class used by the front-end to report problems and issues.

An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...

Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...

static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)

Returns a string for the source that the range encompasses.

friend class Preprocessor

Definition Lexer.h:79

void SetKeepWhitespaceMode(bool Val)

SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.

Definition Lexer.h:254

static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)

Checks that the given token is the first token that occurs after the given location (this excludes co...

static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Definition Lexer.h:438

bool LexFromRawLexer(Token &Result)

LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...

Definition Lexer.h:236

static unsigned getEscapedNewLineSize(const char *P)

getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...

bool inKeepCommentMode() const

inKeepCommentMode - Return true if the lexer should return comments as tokens.

Definition Lexer.h:262

void SetCommentRetentionState(bool Mode)

SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.

Definition Lexer.h:269

static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)

Finds the token that comes before the given location.

void seek(unsigned Offset, bool IsAtStartOfLine)

Set the lexer's buffer pointer to Offset.

static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)

ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.

static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Given a token range, produce a corresponding CharSourceRange that is not a token range.

Definition Lexer.h:430

static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)

Returns true if the given MacroID location points at the first token of the macro expansion.

static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)

AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...

Definition Lexer.h:399

DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const

Diag - Forwarding function for diagnostics.

StringRef getBuffer() const

Gets source code buffer.

Definition Lexer.h:284

const char * getBufferLocation() const

Return the current location in the buffer.

Definition Lexer.h:308

bool Lex(Token &Result)

Lex - Return the next token in the file.

bool isPragmaLexer() const

isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.

Definition Lexer.h:225

static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)

Diagnose use of a delimited or named escape sequence.

static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)

Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...

Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)

Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...

static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)

Returns true if the given MacroID location points at the last token of the macro expansion.

SourceLocation getSourceLocation() override

getSourceLocation - Return a source location for the next character in the current file.

Definition Lexer.h:303

static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Accepts a range and returns a character range with file locations.

unsigned getCurrentBufferOffset()

Returns the current lexing offset.

Definition Lexer.h:311

static bool isNewLineEscaped(const char *BufferStart, const char *Str)

Checks whether new line pointed by Str is preceded by escape sequence.

SourceLocation getFileLoc() const

getFileLoc - Return the File Location for the file we are lexing out of.

Definition Lexer.h:199

static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)

Returns the leading whitespace for line that corresponds to the given location Loc.

static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)

getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...

Lexer & operator=(const Lexer &)=delete

bool isKeepWhitespaceMode() const

isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...

Definition Lexer.h:248

static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)

Returns true if the given character could appear in an identifier.

static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)

Finds the token that comes right after the given location.

static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...

static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Given a location any where in a source buffer, find the location that corresponds to the beginning of...

void resetExtendedTokenMode()

Sets the extended token mode back to its initial value, according to the language options and preproc...

static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)

Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.

static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)

Compute the preamble of the given file.

Lexer(const Lexer &)=delete

static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)

Relex the token at the specified location.

static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)

Computes the source location just past the end of the token at this source location.

static std::string Stringify(StringRef Str, bool Charify=false)

Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...

static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)

getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.

Definition Lexer.h:604

bool isFirstTimeLexingFile() const

Check if this is the first time we're lexing the input file.

Definition Lexer.h:621

bool LexingRawMode

True if in raw mode.

const FileID FID

The SourceManager FileID corresponding to the file being lexed.

Engages in a tight little dance with the lexer to efficiently preprocess tokens.

Encodes a location in the source.

SourceLocation getLocWithOffset(IntTy Offset) const

Return a source location with the specified offset from this SourceLocation.

This class handles loading and caching of source files into memory.

A trivial tuple used to represent a source range.

Token - This structure provides full information about a lexed token.

TokenKind

Provides a simple uniform namespace for tokens from all C languages.

The JSON file list parser is used to communicate input to InstallAPI.

ConflictMarkerKind

ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.

Definition Lexer.h:44

@ CMK_Perforce

A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.

Definition Lexer.h:54

@ CMK_None

Not within a conflict marker.

Definition Lexer.h:46

@ CMK_Normal

A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...

Definition Lexer.h:50

@ Result

The result type of a method or function.

Diagnostic wrappers for TextAPI types for error reporting.

Represents a char and the number of bytes parsed to produce it.

Definition Lexer.h:597

char Char

Definition Lexer.h:598

unsigned Size

Definition Lexer.h:599

Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...

Definition Lexer.h:60

unsigned Size

Size of the preamble in bytes.

Definition Lexer.h:62

bool PreambleEndsAtStartOfLine

Whether the preamble ends at the start of a new line.

Definition Lexer.h:68

PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)

Definition Lexer.h:70