LLVM: lib/MC/MCParser/AsmLexer.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
24#include
25#include
26#include
27#include
28#include
29#include
30#include
31
32using namespace llvm;
33
37}
38
40
42 bool EndStatementAtEOF) {
43 CurBuf = Buf;
44
45 if (ptr)
46 CurPtr = ptr;
47 else
48 CurPtr = CurBuf.begin();
49
51 this->EndStatementAtEOF = EndStatementAtEOF;
52}
53
54
55
56AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
58
60}
61
62int AsmLexer::getNextChar() {
63 if (CurPtr == CurBuf.end())
64 return EOF;
65 return (unsigned char)*CurPtr++;
66}
67
68int AsmLexer::peekNextChar() {
69 if (CurPtr == CurBuf.end())
70 return EOF;
71 return (unsigned char)*CurPtr;
72}
73
74
75
76
77AsmToken AsmLexer::LexFloatLiteral() {
78
80 ++CurPtr;
81
82 if (*CurPtr == '-' || *CurPtr == '+')
83 return ReturnError(CurPtr, "invalid sign in float literal");
84
85
86 if ((*CurPtr == 'e' || *CurPtr == 'E')) {
87 ++CurPtr;
88
89 if (*CurPtr == '-' || *CurPtr == '+')
90 ++CurPtr;
91
93 ++CurPtr;
94 }
95
98}
99
100
101
102
103
104
105
106AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
107 assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
108 "unexpected parse state in floating hex");
109 bool NoFracDigits = true;
110
111
112 if (*CurPtr == '.') {
113 ++CurPtr;
114
115 const char *FracStart = CurPtr;
117 ++CurPtr;
118
119 NoFracDigits = CurPtr == FracStart;
120 }
121
122 if (NoIntDigits && NoFracDigits)
123 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
124 "expected at least one significand digit");
125
126
127 if (*CurPtr != 'p' && *CurPtr != 'P')
128 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
129 "expected exponent part 'p'");
130 ++CurPtr;
131
132 if (*CurPtr == '+' || *CurPtr == '-')
133 ++CurPtr;
134
135
136 const char *ExpStart = CurPtr;
138 ++CurPtr;
139
140 if (CurPtr == ExpStart)
141 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
142 "expected at least one exponent digit");
143
145}
146
147
149 return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
150 (AllowAt && C == '@') || (AllowHash && C == '#');
151}
152
153AsmToken AsmLexer::LexIdentifier() {
154
155 if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
156
158 ++CurPtr;
159
162 *CurPtr == 'e' || *CurPtr == 'E')
163 return LexFloatLiteral();
164 }
165
167 ++CurPtr;
168
169
172
174}
175
176
177
178
179AsmToken AsmLexer::LexSlash() {
181 IsAtStartOfStatement = false;
183 }
184
185 switch (*CurPtr) {
186 case '*':
187 IsAtStartOfStatement = false;
188 break;
189 case '/':
190 ++CurPtr;
191 return LexLineComment();
192 default:
193 IsAtStartOfStatement = false;
195 }
196
197
198 ++CurPtr;
199 const char *CommentTextStart = CurPtr;
200 while (CurPtr != CurBuf.end()) {
201 switch (*CurPtr++) {
202 case '*':
203
204 if (*CurPtr != '/')
205 break;
206
210 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
211 }
212 ++CurPtr;
215 }
216 }
217 return ReturnError(TokStart, "unterminated comment");
218}
219
220
221
222AsmToken AsmLexer::LexLineComment() {
223
224
225
226
227 const char *CommentTextStart = CurPtr;
228 int CurChar = getNextChar();
229 while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
230 CurChar = getNextChar();
231 const char *NewlinePtr = CurPtr;
232 if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
233 ++CurPtr;
234
235
239 StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
240 }
241
242 IsAtStartOfLine = true;
243
244 if (IsAtStartOfStatement)
247 IsAtStartOfStatement = true;
248
251}
252
254
255 if (CurPtr[0] == 'U' || CurPtr[0] == 'u')
256 ++CurPtr;
257 if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
258 ++CurPtr;
259 if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
260 ++CurPtr;
261}
262
263
264
265static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
266 bool LexHex) {
267 const char *FirstNonDec = nullptr;
268 const char *LookAhead = CurPtr;
269 while (true) {
270 if (isDigit(*LookAhead)) {
271 ++LookAhead;
272 } else {
273 if (!FirstNonDec)
274 FirstNonDec = LookAhead;
275
276
277 if (LexHex && isHexDigit(*LookAhead))
278 ++LookAhead;
279 else
280 break;
281 }
282 }
283 bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
284 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
285 if (isHex)
286 return 16;
287 return DefaultRadix;
288}
289
290static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
291 while (hexDigitValue(*CurPtr) < DefaultRadix) {
292 ++CurPtr;
293 }
294 return CurPtr;
295}
296
298 if (Value.isIntN(64))
301}
302
303static std::string radixName(unsigned Radix) {
304 switch (Radix) {
305 case 2:
306 return "binary";
307 case 8:
308 return "octal";
309 case 10:
310 return "decimal";
311 case 16:
312 return "hexadecimal";
313 default:
314 return "base-" + std::to_string(Radix);
315 }
316}
317
318
319
320
321
322
323
324
325AsmToken AsmLexer::LexDigit() {
326
327
328
329
331 const char *FirstNonBinary =
332 (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
333 const char *FirstNonDecimal =
334 (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
335 const char *OldCurPtr = CurPtr;
337 switch (*CurPtr) {
338 default:
339 if (!FirstNonDecimal) {
340 FirstNonDecimal = CurPtr;
341 }
342 [[fallthrough]];
343 case '9':
344 case '8':
345 case '7':
346 case '6':
347 case '5':
348 case '4':
349 case '3':
350 case '2':
351 if (!FirstNonBinary) {
352 FirstNonBinary = CurPtr;
353 }
354 break;
355 case '1':
356 case '0':
357 break;
358 }
359 ++CurPtr;
360 }
361 if (*CurPtr == '.') {
362
363
364 ++CurPtr;
365 return LexFloatLiteral();
366 }
367
368 if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
369 ++CurPtr;
371 }
372
373 unsigned Radix = 0;
374 if (*CurPtr == 'h' || *CurPtr == 'H') {
375
376 ++CurPtr;
377 Radix = 16;
378 } else if (*CurPtr == 't' || *CurPtr == 'T') {
379
380 ++CurPtr;
381 Radix = 10;
382 } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
383 *CurPtr == 'Q') {
384
385 ++CurPtr;
386 Radix = 8;
387 } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
388
389 ++CurPtr;
390 Radix = 2;
391 } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
393 (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
394 Radix = 10;
395 } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
397 (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
398 Radix = 2;
399 }
400
401 if (Radix) {
404
405 if (Result.drop_back().getAsInteger(Radix, Value))
406 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
407
408
410
412 }
413
414
415 CurPtr = OldCurPtr;
416 }
417
418
419
423
428 }
429
431 }
432
433
435 const char *NumStart = CurPtr;
437 ++CurPtr;
438
440 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
441 return ReturnError(TokStart, "invalid hexadecimal number");
442
444 }
445
446
448 const char *NumStart = CurPtr;
449 while (*CurPtr == '0' || *CurPtr == '1')
450 ++CurPtr;
451
453 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
454 return ReturnError(TokStart, "invalid binary number");
455
457 }
458
459
460
461
462
463 if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
465
467 bool IsHex = Radix == 16;
468
469 if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
470 if (*CurPtr == '.')
471 ++CurPtr;
472 return LexFloatLiteral();
473 }
474 }
475
477
480 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
481
483
484
486
488 }
489
490 if ( && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
491 ++CurPtr;
492
493 if ((CurPtr[0])) {
494 --CurPtr;
497 }
498 const char *NumStart = CurPtr;
499 while (CurPtr[0] == '0' || CurPtr[0] == '1')
500 ++CurPtr;
501
502
503 if (CurPtr == NumStart)
504 return ReturnError(TokStart, "invalid binary number");
505
507
509 if (Result.substr(2).getAsInteger(2, Value))
510 return ReturnError(TokStart, "invalid binary number");
511
512
513
515
517 }
518
519 if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
520 ++CurPtr;
521 const char *NumStart = CurPtr;
523 ++CurPtr;
524
525
526
527 if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
528 return LexHexFloatLiteral(NumStart == CurPtr);
529
530
531 if (CurPtr == NumStart)
532 return ReturnError(CurPtr-2, "invalid hexadecimal number");
533
536 return ReturnError(TokStart, "invalid hexadecimal number");
537
538
539 if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
540 ++CurPtr;
541
542
543
545
547 }
548
549
554 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
555
556
557 if (Radix == 16)
558 ++CurPtr;
559
560
561
563
565}
566
567
568AsmToken AsmLexer::LexSingleQuote() {
569 int CurChar = getNextChar();
570
572 return ReturnError(TokStart, "invalid usage of character literals");
573
575 while (CurChar != EOF) {
576 if (CurChar != '\'') {
577 CurChar = getNextChar();
578 } else if (peekNextChar() == '\'') {
579
580
581 (void)getNextChar();
582 CurChar = getNextChar();
583 } else {
584 break;
585 }
586 }
587 if (CurChar == EOF)
588 return ReturnError(TokStart, "unterminated string constant");
590 }
591
592 if (CurChar == '\\')
593 CurChar = getNextChar();
594
595 if (CurChar == EOF)
596 return ReturnError(TokStart, "unterminated single quote");
597
598 CurChar = getNextChar();
599
600 if (CurChar != '\'')
601 return ReturnError(TokStart, "single quote way too long");
602
603
604
607
609 char theChar = Res[2];
610 switch (theChar) {
611 default: Value = theChar; break;
612 case '\'': Value = '\''; break;
613 case 't': Value = '\t'; break;
614 case 'n': Value = '\n'; break;
615 case 'b': Value = '\b'; break;
616 case 'f': Value = '\f'; break;
617 case 'r': Value = '\r'; break;
618 }
619 } else
621
623}
624
625
626AsmToken AsmLexer::LexQuote() {
627 int CurChar = getNextChar();
629 return ReturnError(TokStart, "invalid usage of string literals");
630
632 while (CurChar != EOF) {
633 if (CurChar != '"') {
634 CurChar = getNextChar();
635 } else if (peekNextChar() == '"') {
636
637
638 (void)getNextChar();
639 CurChar = getNextChar();
640 } else {
641 break;
642 }
643 }
644 if (CurChar == EOF)
645 return ReturnError(TokStart, "unterminated string constant");
647 }
648
649 while (CurChar != '"') {
650 if (CurChar == '\\') {
651
652 CurChar = getNextChar();
653 }
654
655 if (CurChar == EOF)
656 return ReturnError(TokStart, "unterminated string constant");
657
658 CurChar = getNextChar();
659 }
660
662}
663
666
667 while (!isAtStartOfComment(CurPtr) &&
668 !isAtStatementSeparator(CurPtr) &&
669 *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
670 ++CurPtr;
671 }
673}
674
675StringRef AsmLexer::LexUntilEndOfLine() {
677
678 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
679 ++CurPtr;
680 }
682}
683
685 bool ShouldSkipSpace) {
689 SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement);
692 std::string SavedErr = getErr();
694
695 size_t ReadCount;
696 for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
698
699 Buf[ReadCount] = Token;
700
702 break;
703 }
704
705 SetError(SavedErrLoc, SavedErr);
706 return ReadCount;
707}
708
709bool AsmLexer::isAtStartOfComment(const char *Ptr) {
710 if (MAI.isHLASM() && !IsAtStartOfStatement)
711 return false;
712
714
715 if (CommentString.size() == 1)
716 return CommentString[0] == Ptr[0];
717
718
719 if (CommentString[1] == '#')
720 return CommentString[0] == Ptr[0];
721
722 return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
723}
724
725bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
728}
729
732
733 int CurChar = getNextChar();
734
735 if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
736
737
741
744 CurPtr = TokStart;
745 StringRef s = LexUntilEndOfLine();
746 UnLex(TokenBuf[1]);
747 UnLex(TokenBuf[0]);
749 }
750
752 return LexLineComment();
753 }
754
755 if (isAtStartOfComment(TokStart))
756 return LexLineComment();
757
758 if (isAtStatementSeparator(TokStart)) {
760 IsAtStartOfLine = true;
761 IsAtStartOfStatement = true;
764 }
765
766
767
768 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
769 IsAtStartOfLine = true;
770 IsAtStartOfStatement = true;
772 }
773 IsAtStartOfLine = false;
774 bool OldIsAtStartOfStatement = IsAtStartOfStatement;
775 IsAtStartOfStatement = false;
776 switch (CurChar) {
777 default:
778
779
780
781
782 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
783 return LexIdentifier();
784
785
786 return ReturnError(TokStart, "invalid character in input");
787 case EOF:
788 if (EndStatementAtEOF) {
789 IsAtStartOfLine = true;
790 IsAtStartOfStatement = true;
791 }
793 case 0:
794 case ' ':
795 case '\t':
796 IsAtStartOfStatement = OldIsAtStartOfStatement;
797 while (*CurPtr == ' ' || *CurPtr == '\t')
798 CurPtr++;
800 return LexToken();
801 else
803 case '\r': {
804 IsAtStartOfLine = true;
805 IsAtStartOfStatement = true;
806
807 if (CurPtr != CurBuf.end() && *CurPtr == '\n')
808 ++CurPtr;
811 }
812 case '\n':
813 IsAtStartOfLine = true;
814 IsAtStartOfStatement = true;
827 case '$': {
829 return LexDigit();
831 return LexIdentifier();
833 }
834 case '@':
836 return LexIdentifier();
838 case '#':
840 return LexIdentifier();
842 case '?':
844 return LexIdentifier();
847 case '=':
848 if (*CurPtr == '=') {
849 ++CurPtr;
851 }
853 case '-':
854 if (*CurPtr == '>') {
855 ++CurPtr;
857 }
859 case '|':
860 if (*CurPtr == '|') {
861 ++CurPtr;
863 }
866 case '&':
867 if (*CurPtr == '&') {
868 ++CurPtr;
870 }
872 case '!':
873 if (*CurPtr == '=') {
874 ++CurPtr;
876 }
878 case '%':
880 return LexDigit();
881 }
882
885 unsigned OperatorLength;
886
887 std::tie(Operator, OperatorLength) =
915
917 CurPtr += OperatorLength - 1;
919 }
920 }
922 case '/':
923 IsAtStartOfStatement = OldIsAtStartOfStatement;
924 return LexSlash();
925 case '\'': return LexSingleQuote();
926 case '"': return LexQuote();
927 case '0': case '1': case '2': case '3': case '4':
928 case '5': case '6': case '7': case '8': case '9':
929 return LexDigit();
930 case '<':
931 switch (*CurPtr) {
932 case '<':
933 ++CurPtr;
935 case '=':
936 ++CurPtr;
938 case '>':
939 ++CurPtr;
941 default:
943 }
944 case '>':
945 switch (*CurPtr) {
946 case '>':
947 ++CurPtr;
949 case '=':
950 ++CurPtr;
952 default:
954 }
955
956
957
958
959
960 }
961}
This file implements a class to represent arbitrary precision integral constant values and operations...
static std::string radixName(unsigned Radix)
static void SkipIgnoredIntegerSuffix(const char *&CurPtr)
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)
static AsmToken intToken(StringRef Ref, APInt &Value)
static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)
static bool isIdentifierChar(char C)
Return true if the given character satisfies the following regular expression: [-a-zA-Z$....
static bool isDigit(const char C)
static bool isHexDigit(const char C)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file provides utility classes that use RAII to save and restore values.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
Class for arbitrary precision integers.
size_t size() const
size - Get the array size.
size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override
Look ahead an arbitrary number of tokens.
AsmLexer(const MCAsmInfo &MAI)
StringRef LexUntilEndOfStatement() override
void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)
AsmToken LexToken() override
LexToken - Read the next token and return its code.
Target independent representation for an assembler token.
bool is(TokenKind K) const
This class is intended to be used as a base class for asm properties and features specific to the tar...
bool doesAllowDollarAtStartOfIdentifier() const
bool hasMipsExpressions() const
bool shouldUseMotorolaIntegers() const
StringRef getCommentString() const
const char * getSeparatorString() const
bool doesAllowAtAtStartOfIdentifier() const
bool shouldAllowAdditionalComments() const
bool doesAllowQuestionAtStartOfIdentifier() const
void UnLex(AsmToken const &Token)
AsmCommentConsumer * CommentConsumer
SMLoc getErrLoc()
Get the current error location.
bool is(AsmToken::TokenKind K) const
Check if the current token has kind K.
void SetError(SMLoc errLoc, const std::string &err)
bool AllowHashInIdentifier
const std::string & getErr()
Get the current error string.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
This is a utility class that provides an abstraction for the common functionality between Instruction...
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
StringRef - Represent a constant reference to a string, i.e.
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr size_t size() const
size - Get the string size.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
A switch()-like statement whose cases are string literals.
LLVM Value Representation.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
@ Ref
The access may reference the value stored in memory.
@ Default
The result values are uniform if and only if all operands are uniform.
A utility class that uses RAII to save and restore the value of a variable.