clang: include/clang/Lex/Lexer.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
15
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include
25#include
26#include
27#include
28
29namespace llvm {
30
31class MemoryBufferRef;
32
33}
34
36
41
42
43
45
47
48
49
51
52
53
55};
56
57
58
59
61
63
64
65
66
67
69
72};
73
74
75
76
77
80
81 void anchor() override;
82
83
84
85
86
87 const char *BufferStart;
88
89
90 const char *BufferEnd;
91
92
94
95
96
97
98
99
100
101
102
104
105
106 bool LineComment;
107
108
109 bool Is_PragmaLexer;
110
111
112
113
114
115
116
117
118
119
120
121
122 unsigned char ExtendedTokenMode;
123
124
125
126
127
128
129
130
131 const char *BufferPtr;
132
133
134
135 bool IsAtStartOfLine;
136
137 bool IsAtPhysicalStartOfLine;
138
139 bool HasLeadingSpace;
140
141 bool HasLeadingEmptyMacro;
142
143
144 bool IsFirstTimeLexingFile;
145
146
147
148 const char *NewLinePtr;
149
150
152
153
155
156
157
158 unsigned NextDepDirectiveTokenIndex = 0;
159
160 void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
161
162public:
163
164
165
166
168 bool IsFirstIncludeOfFile = true);
169
170
171
172
174 const char *BufStart, const char *BufPtr, const char *BufEnd,
175 bool IsFirstIncludeOfFile = true);
176
177
178
179
180 Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,
182 bool IsFirstIncludeOfFile = true);
183
186
187
188
189
194
195
196
197
198
200
201
202
204
205private:
206
207 bool LexDependencyDirectiveToken(Token &Result);
208
209
210
211 bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
212
213
214
215 bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
216
217
218
219
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &Result);
222
223public:
224
226
227private:
228
229
231
232public:
233
234
235
237 assert(LexingRawMode && "Not already in raw mode!");
239
240
241 return BufferPtr == BufferEnd;
242 }
243
244
245
246
247
249 return ExtendedTokenMode > 1;
250 }
251
252
253
255 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
258 }
259
260
261
263 return ExtendedTokenMode > 0;
264 }
265
266
267
268
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
273 }
274
275
276
277
278
279
280
282
283
285 return StringRef(BufferStart, BufferEnd - BufferStart);
286 }
287
288
289
291
292
293
294
296
297
298
300
301
302
306
307
309
310
312 assert(BufferPtr >= BufferStart && "Invalid buffer state");
313 return BufferPtr - BufferStart;
314 }
315
316
317 void seek(unsigned Offset, bool IsAtStartOfLine);
318
319
320
321
322 static std::string Stringify(StringRef Str, bool Charify = false);
323
324
325
327
328
329
330
331
332
333
334
335
336
337
341 bool *Invalid = nullptr);
342
343
344
345
346
347
351 bool *Invalid = nullptr);
352
353
354
355
356
357
358
359
360
365 bool *invalid = nullptr);
366
367
368
369
370
374
375
376
380 bool IgnoreWhiteSpace = false);
381
382
383
384
388
389
390
392 unsigned CharNo,
395
396
397
398
400 unsigned Characters,
405 }
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
425
426
427
428
429
436 Range.getBegin(), End);
437 }
441 return Range.isTokenRange()
443 : Range;
444 }
445
446
447
448
449
450
455
456
457
458
459
460
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
495
496
500 bool *Invalid = nullptr);
501
502
503
504
505
506
507
508
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
550 unsigned MaxLines = 0);
551
552
553
554
558 bool IncludeComments = false);
559
560
564 bool IncludeComments);
565
566
567
568
569
570
575 bool SkipTrailingWhitespaceAndNewLine);
576
577
580
581
582
583 static bool isNewLineEscaped(const char *BufferStart, const char *Str);
584
585
586
587
589
590
592 bool Named,
595
596
601
602
603
606
607
608 if (isObviouslySimpleCharacter(Ptr[0])) {
609 return {*Ptr, 1u};
610 }
611
612 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
613 }
614
615
616
619
620
622
623private:
624
625
626
627
628
629
630 bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
631
632 bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
633
634 bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);
635
636
637
638
639
640
641 void FormTokenWithChars(Token &Result, const char *TokEnd,
643 unsigned TokLen = TokEnd-BufferPtr;
644 Result.setLength(TokLen);
646 Result.setKind(Kind);
647 BufferPtr = TokEnd;
648 }
649
650
651
652
653 std::optional peekNextPPToken();
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677 static bool isObviouslySimpleCharacter(char C) {
678 return C != '?' && C != '\\';
679 }
680
681
682
683
684
685 inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {
686
687
688 if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;
689
690 auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);
692 return C;
693 }
694
695
696
697
698
699 const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {
700
701 if (Size == 1)
702 return Ptr+Size;
703
704
705
706 return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;
707 }
708
709
710
711
712
713 inline char getCharAndSize(const char *Ptr, unsigned &Size) {
714
715
716 if (isObviouslySimpleCharacter(Ptr[0])) {
718 return *Ptr;
719 }
720
721 auto CharAndSize = getCharAndSizeSlow(Ptr);
722 Size = CharAndSize.Size;
723 return CharAndSize.Char;
724 }
725
726
727
728 SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);
729
730
731
732
733 static const char *SkipEscapedNewLines(const char *P);
734
735
736
737 static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
738 const LangOptions &LangOpts);
739
740
741
742
743 void SetByteOffset(unsigned Offset, bool StartOfLine);
744
745 void PropagateLineStartLeadingSpaceInfo(Token &Result);
746
747 const char *LexUDSuffix(Token &Result, const char *CurPtr,
748 bool IsStringLiteral);
749
750
751
752
753
754 bool LexIdentifierContinue(Token &Result, const char *CurPtr);
755
756 bool LexNumericConstant (Token &Result, const char *CurPtr);
757 bool LexStringLiteral (Token &Result, const char *CurPtr,
759 bool LexRawStringLiteral (Token &Result, const char *CurPtr,
761 bool LexAngledStringLiteral(Token &Result, const char *CurPtr);
762 bool LexCharConstant (Token &Result, const char *CurPtr,
764 bool LexEndOfFile (Token &Result, const char *CurPtr);
765 bool SkipWhitespace (Token &Result, const char *CurPtr,
766 bool &TokAtPhysicalStartOfLine);
767 bool SkipLineComment (Token &Result, const char *CurPtr,
768 bool &TokAtPhysicalStartOfLine);
769 bool SkipBlockComment (Token &Result, const char *CurPtr,
770 bool &TokAtPhysicalStartOfLine);
771 bool SaveLineComment (Token &Result, const char *CurPtr);
772
773 bool IsStartOfConflictMarker(const char *CurPtr);
774 bool HandleEndOfConflictMarker(const char *CurPtr);
775
776 bool lexEditorPlaceholder(Token &Result, const char *CurPtr);
777
778 bool isCodeCompletionPoint(const char *CurPtr) const;
779 void cutOffLexing() { BufferPtr = BufferEnd; }
780
781 bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);
782
783 void codeCompleteIncludedFile(const char *PathStart,
784 const char *CompletionPoint, bool IsAngled);
785
786 std::optional<uint32_t>
787 tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);
788 std::optional<uint32_t> tryReadNamedUCN(const char *&StartPtr,
789 const char *SlashLoc, Token *Result);
790
791
792
793
794
795
796
797
798
799
800
801
802
803 uint32_t tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);
804
805
806
807
808
809
810
811
812
813
814
815
816 bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
818
819
820
821
822
823
824
825 bool tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result);
826};
827
828}
829
830#endif
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
friend class Preprocessor
Definition Lexer.h:79
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
Definition Lexer.h:254
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Definition Lexer.h:438
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
Definition Lexer.h:236
static unsigned getEscapedNewLineSize(const char *P)
getEscapedNewLineSize - Return the size of the specified escaped newline, or 0 if it is not an escape...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
Definition Lexer.h:262
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
Definition Lexer.h:269
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
Definition Lexer.h:430
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
Definition Lexer.h:399
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
Definition Lexer.h:284
const char * getBufferLocation() const
Return the current location in the buffer.
Definition Lexer.h:308
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
Definition Lexer.h:225
static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, bool Named, const LangOptions &Opts, DiagnosticsEngine &Diags)
Diagnose use of a delimited or named escape sequence.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
Lexer(FileID FID, const llvm::MemoryBufferRef &InputFile, Preprocessor &PP, bool IsFirstIncludeOfFile=true)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
Definition Lexer.h:303
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
Definition Lexer.h:311
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
Definition Lexer.h:199
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
Definition Lexer.h:248
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
Definition Lexer.h:604
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
Definition Lexer.h:621
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
Definition Lexer.h:44
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
Definition Lexer.h:54
@ CMK_None
Not within a conflict marker.
Definition Lexer.h:46
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
Definition Lexer.h:50
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
Represents a char and the number of bytes parsed to produce it.
Definition Lexer.h:597
char Char
Definition Lexer.h:598
unsigned Size
Definition Lexer.h:599
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
Definition Lexer.h:60
unsigned Size
Size of the preamble in bytes.
Definition Lexer.h:62
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
Definition Lexer.h:68
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
Definition Lexer.h:70