Lexer.h Source File (original) (raw)

13#ifndef LLVM_CLANG_LEX_LEXER_H

14#define LLVM_CLANG_LEX_LEXER_H

22#include "llvm/ADT/SmallVector.h"

23#include "llvm/ADT/StringRef.h"

24#include

25#include

26#include

27#include

29namespace llvm {

31class MemoryBufferRef;

33}

37class DiagnosticBuilder;

38class Preprocessor;

39class SourceManager;

40class LangOptions;

72};

81 void anchor() override;

87 const char *BufferStart;

90 const char *BufferEnd;

100

101

102

104

105

106 bool LineComment;

107

108

109 bool Is_PragmaLexer;

110

111

112

113

114

115

116

117

118

119

120

121

122 unsigned char ExtendedTokenMode;

123

124

125

126

127

128

129

130

131 const char *BufferPtr;

132

133

134

135 bool IsAtStartOfLine;

136

137 bool IsAtPhysicalStartOfLine;

138

139 bool HasLeadingSpace;

140

141 bool HasLeadingEmptyMacro;

142

143

144 bool IsFirstTimeLexingFile;

145

146

147

148 const char *NewLinePtr;

149

150

152

153

155

156

157

158 unsigned NextDepDirectiveTokenIndex = 0;

159

160 void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);

161

162public:

163

164

165

166

168 bool IsFirstIncludeOfFile = true);

169

170

171

172

174 const char *BufStart, const char *BufPtr, const char *BufEnd,

175 bool IsFirstIncludeOfFile = true);

176

177

178

179

180 Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,

182 bool IsFirstIncludeOfFile = true);

183

186

187

188

189

194

195

196

197

198

200

201

202

204

205private:

206

207 bool LexDependencyDirectiveToken(Token &Result);

208

209

210

211 bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);

212

213

214

215 bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }

216

217

218

219

220 const char *convertDependencyDirectiveToken(

221 const dependency_directives_scan::Token &DDTok, Token &Result);

222

223public:

224

226

227private:

228

229

231

232public:

233

234

235

237 assert(LexingRawMode && "Not already in raw mode!");

239

240

241 return BufferPtr == BufferEnd;

242 }

243

244

245

246

247

249 return ExtendedTokenMode > 1;

250 }

251

252

253

255 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&

256 "Can only retain whitespace in raw mode or -traditional-cpp");

257 ExtendedTokenMode = Val ? 2 : 0;

258 }

259

260

261

263 return ExtendedTokenMode > 0;

264 }

265

266

267

268

271 "Can't play with comment retention state when retaining whitespace");

272 ExtendedTokenMode = Mode ? 1 : 0;

273 }

274

275

276

277

278

279

280

282

283

285 return StringRef(BufferStart, BufferEnd - BufferStart);

286 }

287

288

289

291

292

293

294

296

297

298

300

301

302

305 }

306

307

309

310

312 assert(BufferPtr >= BufferStart && "Invalid buffer state");

313 return BufferPtr - BufferStart;

314 }

315

316

317 void seek(unsigned Offset, bool IsAtStartOfLine);

318

319

320

321

322 static std::string Stringify(StringRef Str, bool Charify = false);

323

324

325

327

328

329

330

331

332

333

334

335

336

337

338 static unsigned getSpelling(const Token &Tok, const char *&Buffer,

341 bool *Invalid = nullptr);

342

343

344

345

346

347

351 bool *Invalid = nullptr);

352

353

354

355

356

357

358

359

360

365 bool *invalid = nullptr);

366

367

368

369

370

374

375

376

380 bool IgnoreWhiteSpace = false);

381

382

383

384

388

389

390

392 unsigned CharNo,

395

396

397

398

400 unsigned Characters,

405 }

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

425

426

427

428

429

436 Range.getBegin(), End);

437 }

441 return Range.isTokenRange()

444 }

445

446

447

448

449

450

455

456

457

458

459

460

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

495

496

500 bool *Invalid = nullptr);

501

502

503

504

505

506

507

508

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

550 unsigned MaxLines = 0);

551

552

553

554

558 bool IncludeComments = false);

559

560

561

562

563

564

569 bool SkipTrailingWhitespaceAndNewLine);

570

571

574

575

576

577 static bool isNewLineEscaped(const char *BufferStart, const char *Str);

578

579

583 };

584

585

586

589

590

591 if (isObviouslySimpleCharacter(Ptr[0])) {

592 return {*Ptr, 1u};

593 }

594

595 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);

596 }

597

598

599

602

603

605

606private:

607

608

609

610

611

612

613 bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);

614

615 bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);

616

617 bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);

618

619

620

621

622

623

624 void FormTokenWithChars(Token &Result, const char *TokEnd,

626 unsigned TokLen = TokEnd-BufferPtr;

627 Result.setLength(TokLen);

630 BufferPtr = TokEnd;

631 }

632

633

634

635

636 unsigned isNextPPTokenLParen();

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660 static bool isObviouslySimpleCharacter(char C) {

661 return C != '?' && C != '\\';

662 }

663

664

665

666

667

668 inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {

669

670

671 if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;

672

673 auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);

675 return C;

676 }

677

678

679

680

681

682 const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {

683

684 if (Size == 1)

685 return Ptr+Size;

686

687

688

689 return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;

690 }

691

692

693

694

695

696 inline char getCharAndSize(const char *Ptr, unsigned &Size) {

697

698

699 if (isObviouslySimpleCharacter(Ptr[0])) {

701 return *Ptr;

702 }

703

704 auto CharAndSize = getCharAndSizeSlow(Ptr);

705 Size = CharAndSize.Size;

706 return CharAndSize.Char;

707 }

708

709

710

711 SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);

712

713

714

715

716 static unsigned getEscapedNewLineSize(const char *P);

717

718

719

720

721 static const char *SkipEscapedNewLines(const char *P);

722

723

724

725 static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,

726 const LangOptions &LangOpts);

727

728

729

730

731 void SetByteOffset(unsigned Offset, bool StartOfLine);

732

733 void PropagateLineStartLeadingSpaceInfo(Token &Result);

734

735 const char *LexUDSuffix(Token &Result, const char *CurPtr,

736 bool IsStringLiteral);

737

738

739

740

741

742 bool LexIdentifierContinue(Token &Result, const char *CurPtr);

743

744 bool LexNumericConstant (Token &Result, const char *CurPtr);

745 bool LexStringLiteral (Token &Result, const char *CurPtr,

747 bool LexRawStringLiteral (Token &Result, const char *CurPtr,

749 bool LexAngledStringLiteral(Token &Result, const char *CurPtr);

750 bool LexCharConstant (Token &Result, const char *CurPtr,

752 bool LexEndOfFile (Token &Result, const char *CurPtr);

753 bool SkipWhitespace (Token &Result, const char *CurPtr,

754 bool &TokAtPhysicalStartOfLine);

755 bool SkipLineComment (Token &Result, const char *CurPtr,

756 bool &TokAtPhysicalStartOfLine);

757 bool SkipBlockComment (Token &Result, const char *CurPtr,

758 bool &TokAtPhysicalStartOfLine);

759 bool SaveLineComment (Token &Result, const char *CurPtr);

760

761 bool IsStartOfConflictMarker(const char *CurPtr);

762 bool HandleEndOfConflictMarker(const char *CurPtr);

763

764 bool lexEditorPlaceholder(Token &Result, const char *CurPtr);

765

766 bool isCodeCompletionPoint(const char *CurPtr) const;

767 void cutOffLexing() { BufferPtr = BufferEnd; }

768

769 bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);

770

771 void codeCompleteIncludedFile(const char *PathStart,

772 const char *CompletionPoint, bool IsAngled);

773

774 std::optional<uint32_t>

775 tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);

776 std::optional<uint32_t> tryReadNamedUCN(const char *&StartPtr,

777 const char *SlashLoc, Token *Result);

778

779

780

781

782

783

784

785

786

787

788

789

790

791 uint32_t tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);

792

793

794

795

796

797

798

799

800

801

802

803

804 bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,

806

807

808

809

810

811

812

813 bool tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result);

814};

815

816}

817

818#endif

enum clang::sema::@1726::IndirectLocalPathEntry::EntryKind Kind

This is the interface for scanning header and source files to get the minimum necessary preprocessor ...

Defines the clang::LangOptions interface.

Defines the PreprocessorLexer interface.

Defines the clang::SourceLocation class and associated facilities.

Defines the clang::TokenKind enum and support functions.

__device__ __2f16 float c

Represents a character-granular source range.

static CharSourceRange getCharRange(SourceRange R)

A little helper class used to produce diagnostics.

An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...

Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...

Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.

static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)

Returns a string for the source that the range encompasses.

void SetKeepWhitespaceMode(bool Val)

SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.

static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)

Checks that the given token is the first token that occurs after the given location (this excludes co...

static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

bool LexFromRawLexer(Token &Result)

LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...

bool inKeepCommentMode() const

inKeepCommentMode - Return true if the lexer should return comments as tokens.

void SetCommentRetentionState(bool Mode)

SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.

void seek(unsigned Offset, bool IsAtStartOfLine)

Set the lexer's buffer pointer to Offset.

static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)

ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.

static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Given a token range, produce a corresponding CharSourceRange that is not a token range.

static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)

Returns true if the given MacroID location points at the first token of the macro expansion.

static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)

AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...

DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const

Diag - Forwarding function for diagnostics.

StringRef getBuffer() const

Gets source code buffer.

const char * getBufferLocation() const

Return the current location in the buffer.

bool Lex(Token &Result)

Lex - Return the next token in the file.

bool isPragmaLexer() const

isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.

static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)

Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...

static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)

Returns true if the given MacroID location points at the last token of the macro expansion.

SourceLocation getSourceLocation() override

getSourceLocation - Return a source location for the next character in the current file.

static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)

Accepts a range and returns a character range with file locations.

unsigned getCurrentBufferOffset()

Returns the current lexing offset.

static bool isNewLineEscaped(const char *BufferStart, const char *Str)

Checks whether new line pointed by Str is preceded by escape sequence.

SourceLocation getFileLoc() const

getFileLoc - Return the File Location for the file we are lexing out of.

static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)

Returns the leading whitespace for line that corresponds to the given location Loc.

static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)

getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...

Lexer & operator=(const Lexer &)=delete

bool isKeepWhitespaceMode() const

isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...

static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)

Returns true if the given character could appear in an identifier.

static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)

Finds the token that comes right after the given location.

static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...

static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Given a location any where in a source buffer, find the location that corresponds to the beginning of...

void resetExtendedTokenMode()

Sets the extended token mode back to its initial value, according to the language options and preproc...

static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)

Retrieve the name of the immediate macro expansion.

static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)

Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.

static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)

Compute the preamble of the given file.

Lexer(const Lexer &)=delete

static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)

Relex the token at the specified location.

static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)

Computes the source location just past the end of the token at this source location.

static std::string Stringify(StringRef Str, bool Charify=false)

Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...

static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)

getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.

bool isFirstTimeLexingFile() const

Check if this is the first time we're lexing the input file.

bool LexingRawMode

True if in raw mode.

const FileID FID

The SourceManager FileID corresponding to the file being lexed.

Engages in a tight little dance with the lexer to efficiently preprocess tokens.

Encodes a location in the source.

SourceLocation getLocWithOffset(IntTy Offset) const

Return a source location with the specified offset from this SourceLocation.

This class handles loading and caching of source files into memory.

A trivial tuple used to represent a source range.

Token - This structure provides full information about a lexed token.

TokenKind

Provides a simple uniform namespace for tokens from all C languages.

The JSON file list parser is used to communicate input to InstallAPI.

ConflictMarkerKind

ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.

@ CMK_Perforce

A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.

@ CMK_None

Not within a conflict marker.

@ CMK_Normal

A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...

@ Result

The result type of a method or function.

Diagnostic wrappers for TextAPI types for error reporting.

Represents a char and the number of bytes parsed to produce it.

Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...

unsigned Size

Size of the preamble in bytes.

bool PreambleEndsAtStartOfLine

Whether the preamble ends at the start of a new line.

PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)