LLVM: lib/TableGen/TGLexer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
18#include "llvm/Config/config.h"
23#include
24#include
25#include
26#include
27
28using namespace llvm;
29
30namespace {
31
32
33struct PreprocessorDir {
35 StringRef Word;
36};
37}
38
39
40
41
42
43
49
55
56
57
60
61
62 const char *Next = Str.begin();
65
67
68
69 const char *End = Str.end();
73}
74
76 CurBuffer = SrcMgr.getMainFileID();
77 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
78 CurPtr = CurBuf.begin();
79 TokStart = nullptr;
80
81
82 PrepIncludeStack.emplace_back();
83
84
85
86 for (StringRef MacroName : Macros) {
88 if (End != MacroName.end())
90 "` specified on command line");
91
92 DefinedMacros.insert(MacroName);
93 }
94}
95
97
101
102
103
107}
108
111}
112
113bool TGLexer::processEOF() {
114 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
115 if (ParentIncludeLoc != SMLoc()) {
116
117
118
119
120 if (!prepExitInclude(false))
121 return false;
122
123 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
124 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
125 CurPtr = ParentIncludeLoc.getPointer();
126
127
128
129 TokStart = CurPtr;
130 return true;
131 }
132
133
134
135
136 prepExitInclude(true);
137 return false;
138}
139
140int TGLexer::getNextChar() {
141 char CurChar = *CurPtr++;
142 switch (CurChar) {
143 default:
144 return (unsigned char)CurChar;
145
146 case 0: {
147
148
149 if (CurPtr - 1 == CurBuf.end()) {
150 --CurPtr;
151 return EOF;
152 }
154 "NUL character is invalid in source; treated as space");
155 return ' ';
156 }
157
158 case '\n':
159 case '\r':
160
161
162
163 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar)
164 ++CurPtr;
165 return '\n';
166 }
167}
168
169int TGLexer::peekNextChar(int Index) const { return *(CurPtr + Index); }
170
171tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
172 while (true) {
173 TokStart = CurPtr;
174
175 int CurChar = getNextChar();
176
177 switch (CurChar) {
178 default:
179
181 return LexIdentifier();
182
183
184 return ReturnError(TokStart, "unexpected character");
185 case EOF:
186
187 if (processEOF()) {
188
189
190 FileOrLineStart = false;
191 break;
192 }
193
194
196
197 case ':':
199 case ';':
201 case ',':
203 case '<':
205 case '>':
207 case ']':
209 case '{':
211 case '}':
213 case '(':
215 case ')':
217 case '=':
219 case '?':
221 case '#':
222 if (FileOrLineStart) {
225 return lexPreprocessor(Kind);
226 }
227
229
230
231
232 case '.':
233 if (peekNextChar(0) == '.') {
234 ++CurPtr;
235 if (peekNextChar(0) == '.') {
236 ++CurPtr;
238 }
239 return ReturnError(TokStart, "invalid '..' punctuation");
240 }
242
243 case '\r':
245
246 case ' ':
247 case '\t':
248
249 break;
250 case '\n':
251
252 FileOrLineStart = true;
253 break;
254 case '/':
255
256
257 if (*CurPtr == '/')
258 SkipBCPLComment();
259 else if (*CurPtr == '*') {
260 if (SkipCComment())
262 } else
263 return ReturnError(TokStart, "unexpected character");
264 break;
265 case '-':
266 case '+':
267 case '0':
268 case '1':
269 case '2':
270 case '3':
271 case '4':
272 case '5':
273 case '6':
274 case '7':
275 case '8':
276 case '9': {
277 int NextChar = 0;
279
280
281
282 int i = 0;
283 do {
284 NextChar = peekNextChar(i++);
285 } while (isDigit(NextChar));
286
287 if (NextChar == 'x' || NextChar == 'b') {
288
289
290 int NextNextChar = peekNextChar(i);
291 switch (NextNextChar) {
292 default:
293 break;
294 case '0':
295 case '1':
296 if (NextChar == 'b')
297 return LexNumber();
298 [[fallthrough]];
299 case '2':
300 case '3':
301 case '4':
302 case '5':
303 case '6':
304 case '7':
305 case '8':
306 case '9':
307 case 'a':
308 case 'b':
309 case 'c':
310 case 'd':
311 case 'e':
312 case 'f':
313 case 'A':
314 case 'B':
315 case 'C':
316 case 'D':
317 case 'E':
318 case 'F':
319 if (NextChar == 'x')
320 return LexNumber();
321 break;
322 }
323 }
324 }
325
327 return LexIdentifier();
328
329 return LexNumber();
330 }
331 case '"':
332 return LexString();
333 case '$':
334 return LexVarName();
335 case '[':
336 return LexBracket();
337 case '!':
338 return LexExclaim();
339 }
340 }
341}
342
343
345 const char *StrStart = CurPtr;
346
347 CurStrVal = "";
348
349 while (*CurPtr != '"') {
350
351 if (*CurPtr == 0 && CurPtr == CurBuf.end())
352 return ReturnError(StrStart, "end of file in string literal");
353
354 if (*CurPtr == '\n' || *CurPtr == '\r')
355 return ReturnError(StrStart, "end of line in string literal");
356
357 if (*CurPtr != '\\') {
358 CurStrVal += *CurPtr++;
359 continue;
360 }
361
362 ++CurPtr;
363
364 switch (*CurPtr) {
365 case '\\':
366 case '\'':
367 case '"':
368
369 CurStrVal += *CurPtr++;
370 break;
371 case 't':
372 CurStrVal += '\t';
373 ++CurPtr;
374 break;
375 case 'n':
376 CurStrVal += '\n';
377 ++CurPtr;
378 break;
379
380 case '\n':
381 case '\r':
382 return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
383
384
385 case '\0':
386 if (CurPtr == CurBuf.end())
387 return ReturnError(StrStart, "end of file in string literal");
388 [[fallthrough]];
389 default:
390 return ReturnError(CurPtr, "invalid escape in string literal");
391 }
392 }
393
394 ++CurPtr;
396}
397
400 return ReturnError(TokStart, "invalid variable name");
401
402
403 const char *VarNameStart = CurPtr++;
404
406 ++CurPtr;
407
408 CurStrVal.assign(VarNameStart, CurPtr);
410}
411
413
414 const char *IdentStart = TokStart;
415
416
418 ++CurPtr;
419
420
421 StringRef Str(IdentStart, CurPtr - IdentStart);
422
451
452
453 switch (Kind) {
455 if (LexInclude())
457 return Lex();
459 CurStrVal.assign(Str.begin(), Str.end());
460 break;
461 default:
462 break;
463 }
464
466}
467
468
469
470bool TGLexer::LexInclude() {
471
474 return true;
477 return true;
478 }
479
480
481 std::string Filename = CurStrVal;
482 std::string IncludedFile;
483
485 IncludedFile);
486 if (!CurBuffer) {
487 PrintError(getLoc(), "could not find include file '" + Filename + "'");
488 return true;
489 }
490
491 Dependencies.insert(IncludedFile);
492
493 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
494 CurPtr = CurBuf.begin();
495
496 PrepIncludeStack.emplace_back();
497 return false;
498}
499
500
501
502void TGLexer::SkipBCPLComment() {
503 ++CurPtr;
504 auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());
505 CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;
506}
507
508
509
510bool TGLexer::SkipCComment() {
511 ++CurPtr;
512 unsigned CommentDepth = 1;
513
514 while (true) {
515 int CurChar = getNextChar();
516 switch (CurChar) {
517 case EOF:
518 PrintError(TokStart, "unterminated comment");
519 return true;
520 case '*':
521
522 if (CurPtr[0] != '/')
523 break;
524
525 ++CurPtr;
526 if (--CommentDepth == 0)
527 return false;
528 break;
529 case '/':
530
531 if (CurPtr[0] != '*')
532 break;
533 ++CurPtr;
534 ++CommentDepth;
535 break;
536 }
537 }
538}
539
540
541
542
543
545 unsigned Base = 0;
546 const char *NumStart;
547
548
549 if (CurPtr[-1] == '0') {
550 NumStart = CurPtr + 1;
551 if (CurPtr[0] == 'x') {
553 do
554 ++CurPtr;
556 } else if (CurPtr[0] == 'b') {
558 do
559 ++CurPtr;
560 while (CurPtr[0] == '0' || CurPtr[0] == '1');
561 }
562 }
563
564
565 bool IsMinus = false;
566
567
568 if (Base == 0) {
569
570 if ((CurPtr[0])) {
571 if (CurPtr[-1] == '-')
573 else if (CurPtr[-1] == '+')
575 }
576
578 NumStart = TokStart;
579 IsMinus = CurPtr[-1] == '-';
580
581 while (isDigit(CurPtr[0]))
582 ++CurPtr;
583 }
584
585
586 if (CurPtr == NumStart)
587 return ReturnError(TokStart, "invalid number");
588
589 errno = 0;
590 if (IsMinus)
591 CurIntVal = strtoll(NumStart, nullptr, Base);
592 else
593 CurIntVal = strtoull(NumStart, nullptr, Base);
594
595 if (errno == EINVAL)
596 return ReturnError(TokStart, "invalid number");
597 if (errno == ERANGE)
598 return ReturnError(TokStart, "number out of range");
599
601}
602
603
604
606 if (CurPtr[0] != '{')
608 ++CurPtr;
609 const char *CodeStart = CurPtr;
610 while (true) {
611 int Char = getNextChar();
612 if (Char == EOF)
613 break;
614
615 if (Char != '}')
616 continue;
617
618 Char = getNextChar();
619 if (Char == EOF)
620 break;
621 if (Char == ']') {
622 CurStrVal.assign(CodeStart, CurPtr - 2);
624 }
625 }
626
627 return ReturnError(CodeStart - 2, "unterminated code block");
628}
629
630
633 return ReturnError(CurPtr - 1, "invalid \"!operator\"");
634
635 const char *Start = CurPtr++;
637 ++CurPtr;
638
639
641 StringSwitchtgtok::TokKind(StringRef(Start, CurPtr - Start))
685 .Cases({"setdagop", "setop"},
687 .Cases({"getdagop", "getop"},
701
703 : ReturnError(Start - 1, "unknown operator");
704}
705
706bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
707
708
709 if (!PrepIncludeStack.back().empty()) {
710 prepReportPreprocessorStackError();
711
712 return false;
713 }
714
715
716 PrepIncludeStack.pop_back();
717
718 if (IncludeStackMustBeEmpty) {
719 assert(PrepIncludeStack.empty() &&
720 "preprocessor include stack is not empty");
721 } else {
722 assert(!PrepIncludeStack.empty() && "preprocessor include stack is empty");
723 }
724
725 return true;
726}
727
730 if (StringRef(CurPtr, Word.size()) != Word)
731 continue;
732 int NextChar = peekNextChar(Word.size());
733
734
735
736
737
738
739 if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||
740 NextChar == '\n' ||
741
742
743
744 NextChar == '\r')
746
747
748
749
750
751
752
753
754
755
756
757
758
759 if (NextChar == '/') {
760 NextChar = peekNextChar(Word.size() + 1);
761
762 if (NextChar == '*' || NextChar == '/')
764
765
766 }
767 }
768
770}
771
772void TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
773 TokStart = CurPtr;
774
776 if (PKind == Kind) {
777
778 CurPtr += PWord.size();
779 return;
780 }
781 }
782
784 "unsupported preprocessing token in prepEatPreprocessorDirective()");
785}
786
788 bool ReturnNextLiveToken) {
789
790 prepEatPreprocessorDirective(Kind);
791
793 StringRef MacroName = prepLexMacroName();
794 StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
795 if (MacroName.empty())
796 return ReturnError(TokStart, "expected macro name after " + IfTokName);
797
798 bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
799
800
802 MacroIsDefined = !MacroIsDefined;
803
804
805
806
807 PrepIncludeStack.back().push_back(
809
810 if (!prepSkipDirectiveEnd())
811 return ReturnError(CurPtr, "only comments are supported after " +
812 IfTokName + " NAME");
813
814
815
816 if (!ReturnNextLiveToken)
818
819
820
821 if (MacroIsDefined)
822 return LexToken();
823
824
825
826
827
828 if (prepSkipRegion(ReturnNextLiveToken))
829 return LexToken();
830
833
834
835 if (PrepIncludeStack.back().empty())
836 return ReturnError(TokStart, "#else without #ifdef or #ifndef");
837
838 PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back().back();
839
841 PrintError(TokStart, "double #else");
842 return ReturnError(IfdefEntry.SrcPos, "previous #else is here");
843 }
844
845
846
847 PrepIncludeStack.back().back() = {Kind, !IfdefEntry.IsDefined,
849
850 if (!prepSkipDirectiveEnd())
851 return ReturnError(CurPtr, "only comments are supported after #else");
852
853
854
855 if (ReturnNextLiveToken) {
856 if (prepSkipRegion(ReturnNextLiveToken))
857 return LexToken();
858
860 }
861
862
865
866
867 if (PrepIncludeStack.back().empty())
868 return ReturnError(TokStart, "#endif without #ifdef");
869
870 [[maybe_unused]] auto &IfdefOrElseEntry = PrepIncludeStack.back().back();
871
874 "invalid preprocessor control on the stack");
875
876 if (!prepSkipDirectiveEnd())
877 return ReturnError(CurPtr, "only comments are supported after #endif");
878
879 PrepIncludeStack.back().pop_back();
880
881
882
883 if (ReturnNextLiveToken) {
884 return LexToken();
885 }
886
887
890 StringRef MacroName = prepLexMacroName();
891 if (MacroName.empty())
892 return ReturnError(TokStart, "expected macro name after #define");
893
894 if (!DefinedMacros.insert(MacroName).second)
896 "duplicate definition of macro: " + Twine(MacroName));
897
898 if (!prepSkipDirectiveEnd())
899 return ReturnError(CurPtr,
900 "only comments are supported after #define NAME");
901
902 assert(ReturnNextLiveToken &&
903 "#define must be ignored during the lines skipping");
904
905 return LexToken();
906 }
907
908 llvm_unreachable("preprocessing directive is not supported");
909}
910
911bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
912 assert(MustNeverBeFalse && "invalid recursion.");
913
914 do {
915
916 while (*CurPtr != '\n')
917 ++CurPtr;
918
919
920 if (!prepSkipLineBegin())
921 return false;
922
923
924
925
926
927 if (*CurPtr == '#')
928 ++CurPtr;
929 else
930 continue;
931
933
934
935
936
938 continue;
939
940 tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);
941
942
943
944
946 return false;
947
948 assert(Kind == ProcessedKind && "prepIsDirective() and lexPreprocessor() "
949 "returned different token kinds");
950
951
952
953
954
955 if (prepIsProcessingEnabled()) {
957 "tokens processing was enabled by an unexpected preprocessing "
958 "directive");
959
960 return true;
961 }
962 } while (CurPtr != CurBuf.end());
963
964
965
966 prepReportPreprocessorStackError();
967 return false;
968}
969
970StringRef TGLexer::prepLexMacroName() {
971
972 while (*CurPtr == ' ' || *CurPtr == '\t')
973 ++CurPtr;
974
975 TokStart = CurPtr;
976 CurPtr = lexMacroName(StringRef(CurPtr, CurBuf.end() - CurPtr));
977 return StringRef(TokStart, CurPtr - TokStart);
978}
979
980bool TGLexer::prepSkipLineBegin() {
981 while (CurPtr != CurBuf.end()) {
982 switch (*CurPtr) {
983 case ' ':
984 case '\t':
985 case '\n':
986 case '\r':
987 break;
988
989 case '/': {
990 int NextChar = peekNextChar(1);
991 if (NextChar == '*') {
992
993
994
995
996
997
998
999
1000
1001
1002 TokStart = CurPtr;
1003
1004
1005 ++CurPtr;
1006 if (SkipCComment())
1007 return false;
1008 } else {
1009
1010 return true;
1011 }
1012
1013
1014 continue;
1015 }
1016
1017 default:
1018 return true;
1019 }
1020
1021 ++CurPtr;
1022 }
1023
1024
1025
1026 return true;
1027}
1028
1029bool TGLexer::prepSkipDirectiveEnd() {
1030 while (CurPtr != CurBuf.end()) {
1031 switch (*CurPtr) {
1032 case ' ':
1033 case '\t':
1034 break;
1035
1036 case '\n':
1037 case '\r':
1038 return true;
1039
1040 case '/': {
1041 int NextChar = peekNextChar(1);
1042 if (NextChar == '/') {
1043
1044
1045
1046 ++CurPtr;
1047 SkipBCPLComment();
1048 } else if (NextChar == '*') {
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 TokStart = CurPtr;
1065 ++CurPtr;
1066 if (SkipCComment())
1067 return false;
1068 } else {
1069 TokStart = CurPtr;
1070 PrintError(CurPtr, "unexpected character");
1071 return false;
1072 }
1073
1074
1075 continue;
1076 }
1077
1078 default:
1079
1080 TokStart = CurPtr;
1081 return false;
1082 }
1083
1084 ++CurPtr;
1085 }
1086
1087 return true;
1088}
1089
1090bool TGLexer::prepIsProcessingEnabled() {
1091 return all_of(PrepIncludeStack.back(),
1092 [](const PreprocessorControlDesc &I) { return I.IsDefined; });
1093}
1094
1095void TGLexer::prepReportPreprocessorStackError() {
1096 auto &PrepControl = PrepIncludeStack.back().back();
1097 PrintError(CurBuf.end(), "reached EOF without matching #endif");
1098 PrintError(PrepControl.SrcPos, "the latest preprocessor control is here");
1099
1100 TokStart = CurPtr;
1101}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
constexpr PreprocessorDir PreprocessorDirs[]
Definition TGLexer.cpp:50
static bool isValidIDChar(char C, bool First)
Returns true if C is a valid character in an identifier.
Definition TGLexer.cpp:44
static const char * lexMacroName(StringRef Str)
Definition TGLexer.cpp:58
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
constexpr const char * getPointer() const
Represents a range in source code.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr bool empty() const
empty - Check if the string is empty.
SMRange getLocRange() const
Definition TGLexer.cpp:98
SMLoc getLoc() const
Definition TGLexer.cpp:96
TGLexer(SourceMgr &SrcMgr, ArrayRef< std::string > Macros)
Definition TGLexer.cpp:75
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
support::ulittle32_t Word
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void PrintFatalError(const Twine &Msg)
void PrintError(const Twine &Msg)
void PrintWarning(const Twine &Msg)
bool isAlpha(char C)
Checks if character C is a valid letter as classified by "C" locale.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr Next
bool isHexDigit(char C)
Checks if character C is a hexadecimal numeric character.