clang: include/clang/Lex/Lexer.h Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13#ifndef LLVM_CLANG_LEX_LEXER_H
14#define LLVM_CLANG_LEX_LEXER_H
15
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/StringRef.h"
24#include
25#include
26#include
27#include
28
29namespace llvm {
30
31class MemoryBufferRef;
32
33}
34
36
37class DiagnosticBuilder;
38class Preprocessor;
39class SourceManager;
40class LangOptions;
41
42
43
45
47
48
49
51
52
53
56
57
58
59
61
63
64
65
66
67
69
72};
73
74
75
76
77
80
81 void anchor() override;
82
83
84
85
86
87 const char *BufferStart;
88
89
90 const char *BufferEnd;
91
92
94
95
96
97
98
99
100
101
102
104
105
106 bool LineComment;
107
108
109 bool Is_PragmaLexer;
110
111
112
113
114
115
116
117
118
119
120
121
122 unsigned char ExtendedTokenMode;
123
124
125
126
127
128
129
130
131 const char *BufferPtr;
132
133
134
135 bool IsAtStartOfLine;
136
137 bool IsAtPhysicalStartOfLine;
138
139 bool HasLeadingSpace;
140
141 bool HasLeadingEmptyMacro;
142
143
144 bool IsFirstTimeLexingFile;
145
146
147
148 const char *NewLinePtr;
149
150
152
153
155
156
157
158 unsigned NextDepDirectiveTokenIndex = 0;
159
160 void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd);
161
162public:
163
164
165
166
168 bool IsFirstIncludeOfFile = true);
169
170
171
172
174 const char *BufStart, const char *BufPtr, const char *BufEnd,
175 bool IsFirstIncludeOfFile = true);
176
177
178
179
180 Lexer(FileID FID, const llvm::MemoryBufferRef &FromFile,
182 bool IsFirstIncludeOfFile = true);
183
186
187
188
189
194
195
196
197
198
200
201
202
204
205private:
206
207 bool LexDependencyDirectiveToken(Token &Result);
208
209
210
211 bool LexDependencyDirectiveTokenWhileSkipping(Token &Result);
212
213
214
215 bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); }
216
217
218
219
220 const char *convertDependencyDirectiveToken(
221 const dependency_directives_scan::Token &DDTok, Token &Result);
222
223public:
224
226
227private:
228
229
231
232public:
233
234
235
237 assert(LexingRawMode && "Not already in raw mode!");
239
240
241 return BufferPtr == BufferEnd;
242 }
243
244
245
246
247
249 return ExtendedTokenMode > 1;
250 }
251
252
253
255 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
256 "Can only retain whitespace in raw mode or -traditional-cpp");
257 ExtendedTokenMode = Val ? 2 : 0;
258 }
259
260
261
263 return ExtendedTokenMode > 0;
264 }
265
266
267
268
271 "Can't play with comment retention state when retaining whitespace");
272 ExtendedTokenMode = Mode ? 1 : 0;
273 }
274
275
276
277
278
279
280
282
283
285 return StringRef(BufferStart, BufferEnd - BufferStart);
286 }
287
288
289
291
292
293
294
296
297
298
300
301
302
305 }
306
307
309
310
312 assert(BufferPtr >= BufferStart && "Invalid buffer state");
313 return BufferPtr - BufferStart;
314 }
315
316
317 void seek(unsigned Offset, bool IsAtStartOfLine);
318
319
320
321
322 static std::string Stringify(StringRef Str, bool Charify = false);
323
324
325
327
328
329
330
331
332
333
334
335
336
337
338 static unsigned getSpelling(const Token &Tok, const char *&Buffer,
341 bool *Invalid = nullptr);
342
343
344
345
346
347
351 bool *Invalid = nullptr);
352
353
354
355
356
357
358
359
360
365 bool *invalid = nullptr);
366
367
368
369
370
374
375
376
380 bool IgnoreWhiteSpace = false);
381
382
383
384
388
389
390
392 unsigned CharNo,
395
396
397
398
400 unsigned Characters,
405 }
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
425
426
427
428
429
436 Range.getBegin(), End);
437 }
441 return Range.isTokenRange()
444 }
445
446
447
448
449
450
455
456
457
458
459
460
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
495
496
500 bool *Invalid = nullptr);
501
502
503
504
505
506
507
508
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
550 unsigned MaxLines = 0);
551
552
553
554
558 bool IncludeComments = false);
559
560
561
562
563
564
569 bool SkipTrailingWhitespaceAndNewLine);
570
571
574
575
576
577 static bool isNewLineEscaped(const char *BufferStart, const char *Str);
578
579
583 };
584
585
586
589
590
591 if (isObviouslySimpleCharacter(Ptr[0])) {
592 return {*Ptr, 1u};
593 }
594
595 return getCharAndSizeSlowNoWarn(Ptr, LangOpts);
596 }
597
598
599
602
603
605
606private:
607
608
609
610
611
612
613 bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
614
615 bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
616
617 bool LexUnicodeIdentifierStart(Token &Result, uint32_t C, const char *CurPtr);
618
619
620
621
622
623
624 void FormTokenWithChars(Token &Result, const char *TokEnd,
626 unsigned TokLen = TokEnd-BufferPtr;
627 Result.setLength(TokLen);
630 BufferPtr = TokEnd;
631 }
632
633
634
635
636 unsigned isNextPPTokenLParen();
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660 static bool isObviouslySimpleCharacter(char C) {
661 return C != '?' && C != '\\';
662 }
663
664
665
666
667
668 inline char getAndAdvanceChar(const char *&Ptr, Token &Tok) {
669
670
671 if (isObviouslySimpleCharacter(Ptr[0])) return *Ptr++;
672
673 auto [C, Size] = getCharAndSizeSlow(Ptr, &Tok);
675 return C;
676 }
677
678
679
680
681
682 const char *ConsumeChar(const char *Ptr, unsigned Size, Token &Tok) {
683
684 if (Size == 1)
685 return Ptr+Size;
686
687
688
689 return Ptr + getCharAndSizeSlow(Ptr, &Tok).Size;
690 }
691
692
693
694
695
696 inline char getCharAndSize(const char *Ptr, unsigned &Size) {
697
698
699 if (isObviouslySimpleCharacter(Ptr[0])) {
701 return *Ptr;
702 }
703
704 auto CharAndSize = getCharAndSizeSlow(Ptr);
705 Size = CharAndSize.Size;
706 return CharAndSize.Char;
707 }
708
709
710
711 SizedChar getCharAndSizeSlow(const char *Ptr, Token *Tok = nullptr);
712
713
714
715
716 static unsigned getEscapedNewLineSize(const char *P);
717
718
719
720
721 static const char *SkipEscapedNewLines(const char *P);
722
723
724
725 static SizedChar getCharAndSizeSlowNoWarn(const char *Ptr,
726 const LangOptions &LangOpts);
727
728
729
730
731 void SetByteOffset(unsigned Offset, bool StartOfLine);
732
733 void PropagateLineStartLeadingSpaceInfo(Token &Result);
734
735 const char *LexUDSuffix(Token &Result, const char *CurPtr,
736 bool IsStringLiteral);
737
738
739
740
741
742 bool LexIdentifierContinue(Token &Result, const char *CurPtr);
743
744 bool LexNumericConstant (Token &Result, const char *CurPtr);
745 bool LexStringLiteral (Token &Result, const char *CurPtr,
747 bool LexRawStringLiteral (Token &Result, const char *CurPtr,
749 bool LexAngledStringLiteral(Token &Result, const char *CurPtr);
750 bool LexCharConstant (Token &Result, const char *CurPtr,
752 bool LexEndOfFile (Token &Result, const char *CurPtr);
753 bool SkipWhitespace (Token &Result, const char *CurPtr,
754 bool &TokAtPhysicalStartOfLine);
755 bool SkipLineComment (Token &Result, const char *CurPtr,
756 bool &TokAtPhysicalStartOfLine);
757 bool SkipBlockComment (Token &Result, const char *CurPtr,
758 bool &TokAtPhysicalStartOfLine);
759 bool SaveLineComment (Token &Result, const char *CurPtr);
760
761 bool IsStartOfConflictMarker(const char *CurPtr);
762 bool HandleEndOfConflictMarker(const char *CurPtr);
763
764 bool lexEditorPlaceholder(Token &Result, const char *CurPtr);
765
766 bool isCodeCompletionPoint(const char *CurPtr) const;
767 void cutOffLexing() { BufferPtr = BufferEnd; }
768
769 bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);
770
771 void codeCompleteIncludedFile(const char *PathStart,
772 const char *CompletionPoint, bool IsAngled);
773
774 std::optional<uint32_t>
775 tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);
776 std::optional<uint32_t> tryReadNamedUCN(const char *&StartPtr,
777 const char *SlashLoc, Token *Result);
778
779
780
781
782
783
784
785
786
787
788
789
790
791 uint32_t tryReadUCN(const char *&StartPtr, const char *SlashLoc, Token *Result);
792
793
794
795
796
797
798
799
800
801
802
803
804 bool tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
806
807
808
809
810
811
812
813 bool tryConsumeIdentifierUTF8Char(const char *&CurPtr, Token &Result);
814};
815
816}
817
818#endif
enum clang::sema::@1726::IndirectLocalPathEntry::EntryKind Kind
This is the interface for scanning header and source files to get the minimum necessary preprocessor ...
Defines the clang::LangOptions interface.
Defines the PreprocessorLexer interface.
Defines the clang::SourceLocation class and associated facilities.
Defines the clang::TokenKind enum and support functions.
__device__ __2f16 float c
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode.
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode.
void seek(unsigned Offset, bool IsAtStartOfLine)
Set the lexer's buffer pointer to Offset.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
StringRef getBuffer() const
Gets source code buffer.
const char * getBufferLocation() const
Return the current location in the buffer.
bool Lex(Token &Result)
Lex - Return the next token in the file.
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
Lexer & operator=(const Lexer &)=delete
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file,...
static bool isAsciiIdentifierContinueChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
static std::optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts, bool IncludeComments=false)
Finds the token that comes right after the given location.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
Lexer(const Lexer &)=delete
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
static SizedChar getCharAndSizeNoWarn(const char *Ptr, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
bool isFirstTimeLexingFile() const
Check if this is the first time we're lexing the input file.
bool LexingRawMode
True if in raw mode.
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
SourceLocation getLocWithOffset(IntTy Offset) const
Return a source location with the specified offset from this SourceLocation.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
Token - This structure provides full information about a lexed token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from.
@ CMK_Perforce
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s.
@ CMK_None
Not within a conflict marker.
@ CMK_Normal
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
@ Result
The result type of a method or function.
Diagnostic wrappers for TextAPI types for error reporting.
Represents a char and the number of bytes parsed to produce it.
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
unsigned Size
Size of the preamble in bytes.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)