clang: include/clang/Lex/Lexer.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
15
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include
25#include
26#include
27#include
28
29namespace llvm {
30
31class MemoryBufferRef;
32
33}
34
36
37class DiagnosticBuilder;
38class Preprocessor;
39class SourceManager;
40class LangOptions;
41
42
43
45
47
48
49
51
52
53
56
57
58
59
61
63
64
65
66
67
69
72};
73
74
75
76
77
80
81 void anchor() override;
82
83
84
85
86
87 const char *BufferStart;
88
89
90 const char *BufferEnd;
91
92
94
95
96
97
98
99
100
101
102
104
105
106 bool LineComment;
107
108
109 bool Is_PragmaLexer;
110
111
112
113
114
115
116
117
118
119
120
121
122 unsigned char ExtendedTokenMode;
123
124
125
126
127
128
129
130
131 const char *BufferPtr;
132
133
134
135 bool IsAtStartOfLine;
136
137 bool IsAtPhysicalStartOfLine;
138
139 bool HasLeadingSpace;
140
141 bool HasLeadingEmptyMacro;
142
143
144 bool IsFirstTimeLexingFile;
145
146
147
148 const char *NewLinePtr;
149
150
152
153
155
156
157
158 unsigned NextDepDirectiveTokenIndex = 0;
159
160 void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
161
162public:
163
164
165
166
168 bool IsFirstIncludeOfFile = true);
169
170
171
172
174 const char *BufStart, const char *BufPtr, const char *BufEnd,
175 bool IsFirstIncludeOfFile = true);
176
177
178
179
180 Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,
182 bool IsFirstIncludeOfFile = true);
183
186
187
188
189
194
195
196
197
198
200
201
202
204
205private:
206
207 bool LexDependencyDirectiveToken(Token &Result);
208
209
210
211 bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
212
213
214
215 bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
216
217
218
219
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &Result);
222
223public:
224
226
227private:
228
229
231
232public:
233
234
235
237 assert(LexingRawMode && "Not already in raw mode!");
239
240
241 return BufferPtr == BufferEnd;
242 }
243
244
245
246
247
249 return ExtendedTokenMode > 1;
250 }
251
252
253
255 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
258 }
259
260
261
263 return ExtendedTokenMode > 0;
264 }
265
266
267
268
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
273 }
274
275
276
277
278
279
280
282
283
285 return StringRef(BufferStart, BufferEnd - BufferStart);
286 }
287
288
289
291
292
293
294
296
297
298
300
301
302
305 }
306
307
309
310
312 assert(BufferPtr >= BufferStart && "Invalid buffer state");
313 return BufferPtr - BufferStart;
314 }
315
316
317 void seek(unsigned Offset, bool IsAtStartOfLine);
318
319
320
321
322 static std::string Stringify(StringRef Str, bool Charify = false);
323
324
325
327
328
329
330
331
332
333
334
335
336
337
338 static unsigned getSpelling(const Token &Tok, const char *&Buffer,
341 bool *Invalid = nullptr);
342
343
344
345
346
347
351 bool *Invalid = nullptr);
352
353
354
355
356
357
358
359
360
365 bool *invalid = nullptr);
366
367
368
369
370
374
375
376
380 bool IgnoreWhiteSpace = false);
381
382
383
384
388
389
390
392 unsigned CharNo,
395
396
397
398
400 unsigned Characters,
405 }
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
425
426
427
428
429
436 Range.getBegin(), End);
437 }
441 return Range.isTokenRange()
444 }
445
446
447
448
449
450
455
456
457
458
459
460
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
495
496
500 bool *Invalid = nullptr);
501
502
503
504
505
506
507
508
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
550 unsigned MaxLines = 0);
551
552
553
554
558 bool IncludeComments = false);
559
560
564 bool IncludeComments);
565
566
567
568
569
570
575 bool SkipTrailingWhitespaceAndNewLine);
576
577
580
581
582
583 static bool isNewLineEscaped(const char *BufferStart, const char *Str);
584
585
589 };
590
591
592
595
596
597 if (isObviouslySimpleCharacter(Ptr[0])) {
598 return {*Ptr, 1u};
599 }
600
601 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
602 }
603
604
605
608
609
611
612private:
613
614
615
616
617
618
619 bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
620
621 bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
622
623 bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);
624
625
626
627
628
629
630 void FormTokenWithChars(Token &Result, const char *TokEnd,
632 unsigned TokLen = TokEnd-BufferPtr;
633 Result.setLength(TokLen);
636 BufferPtr = TokEnd;
637 }
638
639
640
641
642 unsigned isNextPPTokenLParen();
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666 static bool isObviouslySimpleCharacter(char C) {
667 return C != '?' && C != '\\';
668 }
669
670
671
672
673
674 inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {
675
676
677 if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;
678
679 auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);
681 return C;
682 }
683
684
685
686
687
688 const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {
689
690 if (Size == 1)
691 return Ptr+Size;
692
693
694
695 return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;
696 }
697
698
699
700
701
702 inline char getCharAndSize(const char *Ptr, unsigned &Size) {
703
704
705 if (isObviouslySimpleCharacter(Ptr[0])) {
707 return *Ptr;
708 }
709
710 auto CharAndSize = getCharAndSizeSlow(Ptr);
711 Size = CharAndSize.Size;
712 return CharAndSize.Char;
713 }
714
715
716
717 SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);
718
719
720
721
722 static unsigned getEscapedNewLineSize(const char *P);
723
724
725
726
727 static const char *SkipEscapedNewLines(const char *P);
728
729
730
731 static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
732 const LangOptions &LangOpts);
733
734
735
736
737 void SetByteOffset(unsigned Offset, bool StartOfLine);
738
739 void PropagateLineStartLeadingSpaceInfo(Token &Result);
740
741 const char *LexUDSuffix(Token &Result, const char *CurPtr,
742 bool IsStringLiteral);
743
744
745
746
747
748 bool LexIdentifierContinue(Token &Result, const char *CurPtr);
749
750 bool LexNumericConstant (Token &Result, const char *CurPtr);
751 bool LexStringLiteral (Token &Result, const char *CurPtr,
753 bool LexRawStringLiteral (Token &Result, const char *CurPtr,
755 bool LexAngledStringLiteral(Token &Result, const char *CurPtr);
756 bool LexCharConstant (Token &Result, const char *CurPtr,
758 bool LexEndOfFile (Token &Result, const char *CurPtr);
759 bool SkipWhitespace (Token &Result, const char *CurPtr,
760 bool &TokAtPhysicalStartOfLine);
761 bool SkipLineComment (Token &Result, const char *CurPtr,
762 bool &TokAtPhysicalStartOfLine);
763 bool SkipBlockComment (Token &Result, const char *CurPtr,
764 bool &TokAtPhysicalStartOfLine);
765 bool SaveLineComment (Token &Result, const char *CurPtr);
766
767 bool IsStartOfConflictMarker(const char *CurPtr);
768 bool HandleEndOfConflictMarker(const char *CurPtr);
769
770 bool lexEditorPlaceholder(Token &Result, const char *CurPtr);
771
772 bool isCodeCompletionPoint(const char *CurPtr) const;
773 void cutOffLexing() { BufferPtr = BufferEnd; }
774
775 bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);
776
777 void codeCompleteIncludedFile(const char *PathStart,
778 const char *CompletionPoint, bool IsAngled);
779
780 std::optional<uint32_t>
781 tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);
782 std::optional<uint32_t> tryReadNamedUCN(const char *&StartPtr,
783 const char *SlashLoc, Token *Result);
784
785
786
787
788
789
790
791
792
793
794
795
796
797 uint32_t tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);
798
799
800
801
802
803
804
805
806
807
808
809
810 bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
812
813
814
815
816
817
818
819 bool tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result);
820};
821
822}
823
824#endif
enum clang::sema::@1727::IndirectLocalPathEntry::EntryKind Kind
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
static std::optional< Token > findPreviousToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments)
Finds the token that comes before the given location.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
unsigned Size
Size of the preamble in bytes.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)