clang: lib/Lex/LiteralSupport.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/StringExtras.h"
26#include "llvm/ADT/StringSwitch.h"
27#include "llvm/Support/ConvertUTF.h"
28#include "llvm/Support/Error.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/Unicode.h"
31#include
32#include
33#include
34#include
35#include
36#include
37
38using namespace clang;
39
41 switch (kind) {
42 default: llvm_unreachable("Unknown token type!");
43 case tok::char_constant:
44 case tok::string_literal:
45 case tok::utf8_char_constant:
46 case tok::utf8_string_literal:
47 return Target.getCharWidth();
48 case tok::wide_char_constant:
49 case tok::wide_string_literal:
50 return Target.getWCharWidth();
51 case tok::utf16_char_constant:
52 case tok::utf16_string_literal:
53 return Target.getChar16Width();
54 case tok::utf32_char_constant:
55 case tok::utf32_string_literal:
56 return Target.getChar32Width();
57 }
58}
59
61 switch (kind) {
62 default:
63 llvm_unreachable("Unknown token type!");
64 case tok::char_constant:
65 case tok::string_literal:
66 return 0;
67 case tok::utf8_char_constant:
68 case tok::utf8_string_literal:
69 return 2;
70 case tok::wide_char_constant:
71 case tok::wide_string_literal:
72 case tok::utf16_char_constant:
73 case tok::utf16_string_literal:
74 case tok::utf32_char_constant:
75 case tok::utf32_string_literal:
76 return 1;
77 }
78}
79
82 const char *TokBegin,
83 const char *TokRangeBegin,
84 const char *TokRangeEnd) {
92}
93
94
95
96
97
98
101 const char *TokBegin, const char *TokRangeBegin,
102 const char *TokRangeEnd, unsigned DiagID) {
107 MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
108}
109
111 switch (Escape) {
112 case '\'':
113 case '"':
114 case '?':
115 case '\\':
116 case 'a':
117 case 'b':
118 case 'f':
119 case 'n':
120 case 'r':
121 case 't':
122 case 'v':
123 return true;
124 }
125 return false;
126}
127
128
129
131 const char *&ThisTokBuf,
132 const char *ThisTokEnd, bool &HadError,
137 const char *EscapeBegin = ThisTokBuf;
138 bool Delimited = false;
139 bool EndDelimiterFound = false;
140
141
142 ++ThisTokBuf;
143
144
145
146 unsigned ResultChar = *ThisTokBuf++;
147 char Escape = ResultChar;
148 switch (ResultChar) {
149
150 case '\\': case '\'': case '"': case '?': break;
151
152
153 case 'a':
154
155 ResultChar = 7;
156 break;
157 case 'b':
158 ResultChar = 8;
159 break;
160 case 'e':
161 if (Diags)
162 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
163 diag::ext_nonstandard_escape) << "e";
164 ResultChar = 27;
165 break;
166 case 'E':
167 if (Diags)
168 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
169 diag::ext_nonstandard_escape) << "E";
170 ResultChar = 27;
171 break;
172 case 'f':
173 ResultChar = 12;
174 break;
175 case 'n':
176 ResultChar = 10;
177 break;
178 case 'r':
179 ResultChar = 13;
180 break;
181 case 't':
182 ResultChar = 9;
183 break;
184 case 'v':
185 ResultChar = 11;
186 break;
187 case 'x': {
188 ResultChar = 0;
189 if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
190 Delimited = true;
191 ThisTokBuf++;
192 if (*ThisTokBuf == '}') {
193 HadError = true;
194 if (Diags)
195 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
196 diag::err_delimited_escape_empty);
197 }
198 } else if (ThisTokBuf == ThisTokEnd || (*ThisTokBuf)) {
199 if (Diags)
200 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
201 diag::err_hex_escape_no_digits) << "x";
202 return ResultChar;
203 }
204
205
206 bool Overflow = false;
207 for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
208 if (Delimited && *ThisTokBuf == '}') {
209 ThisTokBuf++;
210 EndDelimiterFound = true;
211 break;
212 }
213 int CharVal = llvm::hexDigitValue(*ThisTokBuf);
214 if (CharVal == -1) {
215
216 if (!Delimited)
217 break;
218 HadError = true;
219 if (Diags)
220 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
221 diag::err_delimited_escape_invalid)
222 << StringRef(ThisTokBuf, 1);
223 continue;
224 }
225
226 if (ResultChar & 0xF0000000)
227 Overflow = true;
228 ResultChar <<= 4;
229 ResultChar |= CharVal;
230 }
231
232 if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
233 Overflow = true;
234 ResultChar &= ~0U >> (32-CharWidth);
235 }
236
237
238 if (!HadError && Overflow) {
239 HadError = true;
240 if (Diags)
241 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
242 diag::err_escape_too_large)
243 << 0;
244 }
245 break;
246 }
247 case '0': case '1': case '2': case '3':
248 case '4': case '5': case '6': case '7': {
249
250 --ThisTokBuf;
251 ResultChar = 0;
252
253
254
255 unsigned NumDigits = 0;
256 do {
257 ResultChar <<= 3;
258 ResultChar |= *ThisTokBuf++ - '0';
259 ++NumDigits;
260 } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
261 ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
262
263
264 if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
265 if (Diags)
266 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
267 diag::err_escape_too_large) << 1;
268 ResultChar &= ~0U >> (32-CharWidth);
269 }
270 break;
271 }
272 case 'o': {
273 bool Overflow = false;
274 if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
275 HadError = true;
276 if (Diags)
277 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
278 diag::err_delimited_escape_missing_brace)
279 << "o";
280
281 break;
282 }
283 ResultChar = 0;
284 Delimited = true;
285 ++ThisTokBuf;
286 if (*ThisTokBuf == '}') {
287 HadError = true;
288 if (Diags)
289 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
290 diag::err_delimited_escape_empty);
291 }
292
293 while (ThisTokBuf != ThisTokEnd) {
294 if (*ThisTokBuf == '}') {
295 EndDelimiterFound = true;
296 ThisTokBuf++;
297 break;
298 }
299 if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
300 HadError = true;
301 if (Diags)
302 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
303 diag::err_delimited_escape_invalid)
304 << StringRef(ThisTokBuf, 1);
305 ThisTokBuf++;
306 continue;
307 }
308
309 if (ResultChar & 0xE0000000)
310 Overflow = true;
311
312 ResultChar <<= 3;
313 ResultChar |= *ThisTokBuf++ - '0';
314 }
315
316 if (!HadError &&
317 (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
318 HadError = true;
319 if (Diags)
320 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
321 diag::err_escape_too_large)
322 << 1;
323 ResultChar &= ~0U >> (32 - CharWidth);
324 }
325 break;
326 }
327
328 case '(': case '{': case '[': case '%':
329
330 if (Diags)
331 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
332 diag::ext_nonstandard_escape)
333 << std::string(1, ResultChar);
334 break;
335 default:
336 if (!Diags)
337 break;
338
340 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
341 diag::ext_unknown_escape)
342 << std::string(1, ResultChar);
343 else
344 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
345 diag::ext_unknown_escape)
346 << "x" + llvm::utohexstr(ResultChar);
347 break;
348 }
349
350 if (Delimited && Diags) {
351 if (!EndDelimiterFound)
352 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
353 diag::err_expected)
354 << tok::r_brace;
355 else if (!HadError) {
356 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
357 Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence
358 : diag::ext_delimited_escape_sequence)
359 << 0 << (Features.CPlusPlus ? 1 : 0);
360 }
361 }
362
363 if (EvalMethod == StringLiteralEvalMethod::Unevaluated &&
365 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
366 diag::err_unevaluated_string_invalid_escape_sequence)
367 << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin);
368 HadError = true;
369 }
370
371 return ResultChar;
372}
373
376 char ResultBuf[4];
377 char *ResultPtr = ResultBuf;
378 if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr))
379 Str.append(ResultBuf, ResultPtr);
380}
381
383 for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
384 if (*I != '\\') {
385 Buf.push_back(*I);
386 continue;
387 }
388
389 ++I;
390 char Kind = *I;
391 ++I;
392
393 assert(Kind == 'u' || Kind == 'U' || Kind == 'N');
394 uint32_t CodePoint = 0;
395
396 if (Kind == 'u' && *I == '{') {
397 for (++I; *I != '}'; ++I) {
398 unsigned Value = llvm::hexDigitValue(*I);
399 assert(Value != -1U);
400 CodePoint <<= 4;
401 CodePoint += Value;
402 }
404 continue;
405 }
406
407 if (Kind == 'N') {
408 assert(*I == '{');
409 ++I;
410 auto Delim = std::find(I, Input.end(), '}');
411 assert(Delim != Input.end());
412 StringRef Name(I, std::distance(I, Delim));
413 std::optionalllvm::sys::unicode::LooseMatchingResult Res =
414 llvm::sys::unicode::nameToCodepointLooseMatching(Name);
415 assert(Res && "could not find a codepoint that was previously found");
416 CodePoint = Res->CodePoint;
417 assert(CodePoint != 0xFFFFFFFF);
419 I = Delim;
420 continue;
421 }
422
423 unsigned NumHexDigits;
424 if (Kind == 'u')
425 NumHexDigits = 4;
426 else
427 NumHexDigits = 8;
428
429 assert(I + NumHexDigits <= E);
430
431 for (; NumHexDigits != 0; ++I, --NumHexDigits) {
432 unsigned Value = llvm::hexDigitValue(*I);
433 assert(Value != -1U);
434
435 CodePoint <<= 4;
436 CodePoint += Value;
437 }
438
440 --I;
441 }
442}
443
446 return LO.MicrosoftExt &&
447 (K == tok::kw___FUNCTION__ || K == tok::kw_L__FUNCTION__ ||
448 K == tok::kw___FUNCSIG__ || K == tok::kw_L__FUNCSIG__ ||
449 K == tok::kw___FUNCDNAME__);
450}
451
455}
456
458 const char *&ThisTokBuf,
459 const char *ThisTokEnd, uint32_t &UcnVal,
460 unsigned short &UcnLen, bool &Delimited,
463 bool in_char_string_literal = false) {
464 const char *UcnBegin = ThisTokBuf;
465 bool HasError = false;
466 bool EndDelimiterFound = false;
467
468
469 ThisTokBuf += 2;
470 Delimited = false;
471 if (UcnBegin[1] == 'u' && in_char_string_literal &&
472 ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
473 Delimited = true;
474 ThisTokBuf++;
475 } else if (ThisTokBuf == ThisTokEnd || (*ThisTokBuf)) {
476 if (Diags)
477 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
478 diag::err_hex_escape_no_digits)
479 << StringRef(&ThisTokBuf[-1], 1);
480 return false;
481 }
482 UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
483
484 bool Overflow = false;
485 unsigned short Count = 0;
486 for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
487 ++ThisTokBuf) {
488 if (Delimited && *ThisTokBuf == '}') {
489 ++ThisTokBuf;
490 EndDelimiterFound = true;
491 break;
492 }
493 int CharVal = llvm::hexDigitValue(*ThisTokBuf);
494 if (CharVal == -1) {
495 HasError = true;
496 if (!Delimited)
497 break;
498 if (Diags) {
499 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
500 diag::err_delimited_escape_invalid)
501 << StringRef(ThisTokBuf, 1);
502 }
503 Count++;
504 continue;
505 }
506 if (UcnVal & 0xF0000000) {
507 Overflow = true;
508 continue;
509 }
510 UcnVal <<= 4;
511 UcnVal |= CharVal;
512 Count++;
513 }
514
515 if (Overflow) {
516 if (Diags)
517 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
518 diag::err_escape_too_large)
519 << 0;
520 return false;
521 }
522
523 if (Delimited && !EndDelimiterFound) {
524 if (Diags) {
525 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
526 diag::err_expected)
527 << tok::r_brace;
528 }
529 return false;
530 }
531
532
533 if (Count == 0 || (!Delimited && Count != UcnLen)) {
534 if (Diags)
535 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
536 Delimited ? diag::err_delimited_escape_empty
537 : diag::err_ucn_escape_incomplete);
538 return false;
539 }
540 return !HasError;
541}
542
545 const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd,
546 llvm::StringRef Name) {
547
548 Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
549 diag::err_invalid_ucn_name)
550 << Name;
551
552 namespace u = llvm::sys::unicode;
553
554 std::optional<u::LooseMatchingResult> Res =
555 u::nameToCodepointLooseMatching(Name);
556 if (Res) {
557 Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
558 diag::note_invalid_ucn_name_loose_matching)
561 TokRangeEnd),
562 Res->Name);
563 return;
564 }
565
566 unsigned Distance = 0;
568 u::nearestMatchesForCodepointName(Name, 5);
569 assert(!Matches.empty() && "No unicode characters found");
570
571 for (const auto &Match : Matches) {
572 if (Distance == 0)
573 Distance = Match.Distance;
574 if (std::max(Distance, Match.Distance) -
575 std::min(Distance, Match.Distance) >
576 3)
577 break;
578 Distance = Match.Distance;
579
580 std::string Str;
581 llvm::UTF32 V = Match.Value;
582 bool Converted =
584 (void)Converted;
585 assert(Converted && "Found a match wich is not a unicode character");
586
587 Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
588 diag::note_invalid_ucn_name_candidate)
589 << Match.Name << llvm::utohexstr(Match.Value)
590 << Str
593 TokRangeEnd),
594 Match.Name);
595 }
596}
597
599 const char *&ThisTokBuf,
600 const char *ThisTokEnd, uint32_t &UcnVal,
604 const char *UcnBegin = ThisTokBuf;
605 assert(UcnBegin[0] == '\\' && UcnBegin[1] == 'N');
606 ThisTokBuf += 2;
607 if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
608 if (Diags) {
609 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
610 diag::err_delimited_escape_missing_brace)
611 << StringRef(&ThisTokBuf[-1], 1);
612 }
613 return false;
614 }
615 ThisTokBuf++;
616 const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) {
618 });
619 bool Incomplete = ClosingBrace == ThisTokEnd;
620 bool Empty = ClosingBrace == ThisTokBuf;
622 if (Diags) {
623 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
624 Incomplete ? diag::err_ucn_escape_incomplete
625 : diag::err_delimited_escape_empty)
626 << StringRef(&UcnBegin[1], 1);
627 }
628 ThisTokBuf = ClosingBrace == ThisTokEnd ? ClosingBrace : ClosingBrace + 1;
629 return false;
630 }
631 StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf);
632 ThisTokBuf = ClosingBrace + 1;
633 std::optional<char32_t> Res = llvm::sys::unicode::nameToCodepointStrict(Name);
634 if (!Res) {
635 if (Diags)
637 &UcnBegin[3], ClosingBrace, Name);
638 return false;
639 }
640 UcnVal = *Res;
641 UcnLen = UcnVal > 0xFFFF ? 8 : 4;
642 return true;
643}
644
645
646
647static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
648 const char *ThisTokEnd, uint32_t &UcnVal,
652 bool in_char_string_literal = false) {
653
654 bool HasError;
655 const char *UcnBegin = ThisTokBuf;
656 bool IsDelimitedEscapeSequence = false;
657 bool IsNamedEscapeSequence = false;
658 if (ThisTokBuf[1] == 'N') {
659 IsNamedEscapeSequence = true;
661 UcnVal, UcnLen, Loc, Diags, Features);
662 } else {
663 HasError =
665 UcnLen, IsDelimitedEscapeSequence, Loc, Diags,
666 Features, in_char_string_literal);
667 }
668 if (HasError)
669 return false;
670
671
672 if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) ||
673 UcnVal > 0x10FFFF) {
674 if (Diags)
675 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
676 diag::err_ucn_escape_invalid);
677 return false;
678 }
679
680
681
682 if (UcnVal < 0xa0 &&
683
684 (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) {
685 bool IsError =
686 (!(Features.CPlusPlus11 || Features.C23) || !in_char_string_literal);
687 if (Diags) {
688 char BasicSCSChar = UcnVal;
689 if (UcnVal >= 0x20 && UcnVal < 0x7f)
690 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
691 IsError ? diag::err_ucn_escape_basic_scs
692 : Features.CPlusPlus
693 ? diag::warn_cxx98_compat_literal_ucn_escape_basic_scs
694 : diag::warn_c23_compat_literal_ucn_escape_basic_scs)
695 << StringRef(&BasicSCSChar, 1);
696 else
697 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
698 IsError ? diag::err_ucn_control_character
699 : Features.CPlusPlus
700 ? diag::warn_cxx98_compat_literal_ucn_control_character
701 : diag::warn_c23_compat_literal_ucn_control_character);
702 }
703 if (IsError)
704 return false;
705 }
706
707 if (!Features.CPlusPlus && !Features.C99 && Diags)
708 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
709 diag::warn_ucn_not_valid_in_c89_literal);
710
711 if ((IsDelimitedEscapeSequence || IsNamedEscapeSequence) && Diags)
712 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
713 Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence
714 : diag::ext_delimited_escape_sequence)
715 << (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0);
716
717 return true;
718}
719
720
721
722static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
723 const char *ThisTokEnd, unsigned CharByteWidth,
724 const LangOptions &Features, bool &HadError) {
725
726 if (CharByteWidth == 4)
727 return 4;
728
729 uint32_t UcnVal = 0;
730 unsigned short UcnLen = 0;
732
733 if ((ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
734 UcnLen, Loc, nullptr, Features, true)) {
735 HadError = true;
736 return 0;
737 }
738
739
740 if (CharByteWidth == 2)
741 return UcnVal <= 0xFFFF ? 2 : 4;
742
743
744 if (UcnVal < 0x80)
745 return 1;
746 if (UcnVal < 0x800)
747 return 2;
748 if (UcnVal < 0x10000)
749 return 3;
750 return 4;
751}
752
753
754
755
756
757static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
758 const char *ThisTokEnd,
759 char *&ResultBuf, bool &HadError,
763 typedef uint32_t UTF32;
764 UTF32 UcnVal = 0;
765 unsigned short UcnLen = 0;
766 if ((ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
767 Loc, Diags, Features, true)) {
768 HadError = true;
769 return;
770 }
771
772 assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&
773 "only character widths of 1, 2, or 4 bytes supported");
774
775 (void)UcnLen;
776 assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
777
778 if (CharByteWidth == 4) {
779
780
781 llvm::UTF32 *ResultPtr = reinterpret_castllvm::UTF32\*\(ResultBuf);
782 *ResultPtr = UcnVal;
783 ResultBuf += 4;
784 return;
785 }
786
787 if (CharByteWidth == 2) {
788
789
790 llvm::UTF16 *ResultPtr = reinterpret_castllvm::UTF16\*\(ResultBuf);
791
792 if (UcnVal <= (UTF32)0xFFFF) {
793 *ResultPtr = UcnVal;
794 ResultBuf += 2;
795 return;
796 }
797
798
799 UcnVal -= 0x10000;
800 *ResultPtr = 0xD800 + (UcnVal >> 10);
801 *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
802 ResultBuf += 4;
803 return;
804 }
805
806 assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
807
808
809
810
811
812 typedef uint8_t UTF8;
813
814 unsigned short bytesToWrite = 0;
815 if (UcnVal < (UTF32)0x80)
816 bytesToWrite = 1;
817 else if (UcnVal < (UTF32)0x800)
818 bytesToWrite = 2;
819 else if (UcnVal < (UTF32)0x10000)
820 bytesToWrite = 3;
821 else
822 bytesToWrite = 4;
823
824 const unsigned byteMask = 0xBF;
825 const unsigned byteMark = 0x80;
826
827
828
829 static const UTF8 firstByteMark[5] = {
830 0x00, 0x00, 0xC0, 0xE0, 0xF0
831 };
832
833 ResultBuf += bytesToWrite;
834 switch (bytesToWrite) {
835 case 4:
836 *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
837 [[fallthrough]];
838 case 3:
839 *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
840 [[fallthrough]];
841 case 2:
842 *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
843 [[fallthrough]];
844 case 1:
845 *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
846 }
847
848 ResultBuf += bytesToWrite;
849}
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
908 : SM(SM), LangOpts(LangOpts), Diags(Diags),
909 ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
910
911 s = DigitsBegin = ThisTokBegin;
912 saw_exponent = false;
913 saw_period = false;
914 saw_ud_suffix = false;
915 saw_fixed_point_suffix = false;
930
931
932
933
934
935
936
937
939 !(LangOpts.HLSL && *ThisTokEnd == '.')) {
940 Diags.Report(TokLoc, diag::err_lexing_numeric);
942 return;
943 }
944
945 if (*s == '0') {
946 ParseNumberStartingWithZero(TokLoc);
948 return;
949 } else {
950 radix = 10;
952 if (s == ThisTokEnd) {
953
954 } else {
955 ParseDecimalOrOctalCommon(TokLoc);
957 return;
958 }
959 }
960
961 SuffixBegin = s;
962 checkSeparator(TokLoc, s, CSK_AfterDigits);
963
964
965 if (LangOpts.FixedPoint) {
966 for (const char *c = s; c != ThisTokEnd; ++c) {
967 if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {
968 saw_fixed_point_suffix = true;
969 break;
970 }
971 }
972 }
973
974
975
978 bool HasSize = false;
979 bool DoubleUnderscore = false;
980
981
982
983 for (; s != ThisTokEnd; ++s) {
984 switch (*s) {
985 case 'R':
986 case 'r':
987 if (!LangOpts.FixedPoint)
988 break;
990 if (!(saw_period || saw_exponent)) break;
992 continue;
993 case 'K':
994 case 'k':
995 if (!LangOpts.FixedPoint)
996 break;
998 if (!(saw_period || saw_exponent)) break;
1000 continue;
1001 case 'h':
1002 case 'H':
1003
1004 if (!(LangOpts.Half || LangOpts.FixedPoint))
1005 break;
1006 if (isIntegerLiteral()) break;
1007 if (HasSize)
1008 break;
1009 HasSize = true;
1011 continue;
1012 case 'f':
1013 case 'F':
1014 if (!isFPConstant) break;
1015 if (HasSize)
1016 break;
1017 HasSize = true;
1018
1019
1020
1021
1022
1023
1024
1025 if ((Target.hasFloat16Type() || LangOpts.CUDA ||
1026 (LangOpts.OpenMPIsTargetDevice && Target.getTriple().isNVPTX())) &&
1027 s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
1028 s += 2;
1030 continue;
1031 }
1032
1034 continue;
1035 case 'q':
1036 case 'Q':
1037 if (!isFPConstant) break;
1038 if (HasSize)
1039 break;
1040 HasSize = true;
1042 continue;
1043 case 'u':
1044 case 'U':
1045 if (isFPConstant) break;
1046 if (isUnsigned) break;
1048 continue;
1049 case 'l':
1050 case 'L':
1051 if (HasSize)
1052 break;
1053 HasSize = true;
1054
1055
1057 assert(s + 1 < ThisTokEnd && "didn't maximally munch?");
1058 if (isFPConstant) break;
1060 ++s;
1061 } else {
1063 }
1064 continue;
1065 case 'z':
1066 case 'Z':
1067 if (isFPConstant)
1068 break;
1069 if (HasSize)
1070 break;
1071 HasSize = true;
1073 continue;
1074 case 'i':
1075 case 'I':
1076 if (LangOpts.MicrosoftExt && !isFPConstant) {
1077
1078
1079 uint8_t Bits = 0;
1080 size_t ToSkip = 0;
1081 switch (s[1]) {
1082 case '8':
1083 Bits = 8;
1084 ToSkip = 2;
1085 break;
1086 case '1':
1087 if (s[2] == '6') {
1088 Bits = 16;
1089 ToSkip = 3;
1090 }
1091 break;
1092 case '3':
1093 if (s[2] == '2') {
1094 Bits = 32;
1095 ToSkip = 3;
1096 }
1097 break;
1098 case '6':
1099 if (s[2] == '4') {
1100 Bits = 64;
1101 ToSkip = 3;
1102 }
1103 break;
1104 default:
1105 break;
1106 }
1107 if (Bits) {
1108 if (HasSize)
1109 break;
1110 HasSize = true;
1112 s += ToSkip;
1113 assert(s <= ThisTokEnd && "didn't maximally munch?");
1114 break;
1115 }
1116 }
1117 [[fallthrough]];
1118 case 'j':
1119 case 'J':
1120 if (isImaginary) break;
1122 continue;
1123 case '_':
1124 if (isFPConstant)
1125 break;
1126 if (HasSize)
1127 break;
1128
1129
1130 assert(!DoubleUnderscore && "unhandled double underscore case");
1131 if (LangOpts.CPlusPlus && s + 2 < ThisTokEnd &&
1132 s[1] == '_') {
1133
1134 DoubleUnderscore = true;
1135 s += 2;
1136 if (s + 1 < ThisTokEnd &&
1137 (*s == 'u' || *s == 'U')) {
1139 ++s;
1140 }
1141 if (s + 1 < ThisTokEnd &&
1142 ((*s == 'w' && *(++s) == 'b') || (*s == 'W' && *(++s) == 'B'))) {
1144 HasSize = true;
1145 continue;
1146 }
1147 }
1148 break;
1149 case 'w':
1150 case 'W':
1151 if (isFPConstant)
1152 break;
1153 if (HasSize)
1154 break;
1155
1156
1157
1158
1159
1160 if ((!LangOpts.CPlusPlus || DoubleUnderscore) && s + 1 < ThisTokEnd &&
1161 ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {
1163 HasSize = true;
1164 ++s;
1165 continue;
1166 }
1167 }
1168
1169 break;
1170 }
1171
1172
1174
1175 expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
1178
1179
1190 saw_fixed_point_suffix = false;
1193 }
1194
1195 saw_ud_suffix = true;
1196 return;
1197 }
1198
1199 if (s != ThisTokEnd) {
1200
1202 TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
1203 diag::err_invalid_suffix_constant)
1204 << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
1205 << (isFixedPointConstant ? 2 : isFPConstant);
1207 }
1208 }
1209
1210 if ( && saw_fixed_point_suffix) {
1212 }
1213}
1214
1215
1216
1217
1218void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
1219 assert((radix == 8 || radix == 10) && "Unexpected radix");
1220
1221
1222
1227 diag::err_invalid_digit)
1228 << StringRef(s, 1) << (radix == 8 ? 1 : 0);
1230 return;
1231 }
1232
1233 if (*s == '.') {
1234 checkSeparator(TokLoc, s, CSK_AfterDigits);
1235 s++;
1236 radix = 10;
1237 saw_period = true;
1238 checkSeparator(TokLoc, s, CSK_BeforeDigits);
1240 }
1241 if (*s == 'e' || *s == 'E') {
1242 checkSeparator(TokLoc, s, CSK_AfterDigits);
1243 const char *Exponent = s;
1244 s++;
1245 radix = 10;
1246 saw_exponent = true;
1247 if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++;
1248 const char *first_non_digit = SkipDigits(s);
1249 if (containsDigits(s, first_non_digit)) {
1250 checkSeparator(TokLoc, s, CSK_BeforeDigits);
1251 s = first_non_digit;
1252 } else {
1255 TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1256 diag::err_exponent_has_no_digits);
1258 }
1259 return;
1260 }
1261 }
1262}
1263
1264
1265
1266
1268 StringRef Suffix) {
1269 if (!LangOpts.CPlusPlus11 || Suffix.empty())
1270 return false;
1271
1272
1273
1274
1275 if (Suffix.starts_with("_") && !Suffix.starts_with("__"))
1276 return true;
1277
1278
1279 if (!LangOpts.CPlusPlus14)
1280 return false;
1281
1282
1283
1284
1285 return llvm::StringSwitch(Suffix)
1286 .Cases("h", "min", "s", true)
1287 .Cases("ms", "us", "ns", true)
1288 .Cases("il", "i", "if", true)
1289 .Cases("d", "y", LangOpts.CPlusPlus20)
1290 .Default(false);
1291}
1292
1293void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
1294 const char *Pos,
1295 CheckSeparatorKind IsAfterDigits) {
1296 if (IsAfterDigits == CSK_AfterDigits) {
1297 if (Pos == ThisTokBegin)
1298 return;
1299 --Pos;
1300 } else if (Pos == ThisTokEnd)
1301 return;
1302
1303 if (isDigitSeparator(*Pos)) {
1305 LangOpts),
1306 diag::err_digit_separator_not_between_digits)
1307 << IsAfterDigits;
1309 }
1310}
1311
1312
1313
1314
1315
1316
1317void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
1318 assert(s[0] == '0' && "Invalid method call");
1319 s++;
1320
1321 int c1 = s[0];
1322
1323
1324 if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
1325 s++;
1326 assert(s < ThisTokEnd && "didn't maximally munch?");
1327 radix = 16;
1328 DigitsBegin = s;
1330 bool HasSignificandDigits = containsDigits(DigitsBegin, s);
1331 if (s == ThisTokEnd) {
1332
1333 } else if (*s == '.') {
1334 s++;
1335 saw_period = true;
1336 const char *floatDigitsBegin = s;
1338 if (containsDigits(floatDigitsBegin, s))
1339 HasSignificandDigits = true;
1340 if (HasSignificandDigits)
1341 checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);
1342 }
1343
1344 if (!HasSignificandDigits) {
1346 LangOpts),
1347 diag::err_hex_constant_requires)
1348 << LangOpts.CPlusPlus << 1;
1350 return;
1351 }
1352
1353
1354
1355 if (*s == 'p' || *s == 'P') {
1356 checkSeparator(TokLoc, s, CSK_AfterDigits);
1357 const char *Exponent = s;
1358 s++;
1359 saw_exponent = true;
1360 if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++;
1361 const char *first_non_digit = SkipDigits(s);
1362 if (!containsDigits(s, first_non_digit)) {
1365 TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
1366 diag::err_exponent_has_no_digits);
1368 }
1369 return;
1370 }
1371 checkSeparator(TokLoc, s, CSK_BeforeDigits);
1372 s = first_non_digit;
1373
1374 if (!LangOpts.HexFloats)
1375 Diags.Report(TokLoc, LangOpts.CPlusPlus
1376 ? diag::ext_hex_literal_invalid
1377 : diag::ext_hex_constant_invalid);
1378 else if (LangOpts.CPlusPlus17)
1379 Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
1380 } else if (saw_period) {
1382 LangOpts),
1383 diag::err_hex_constant_requires)
1384 << LangOpts.CPlusPlus << 0;
1386 }
1387 return;
1388 }
1389
1390
1391 if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
1392
1393 unsigned DiagId;
1394 if (LangOpts.CPlusPlus14)
1395 DiagId = diag::warn_cxx11_compat_binary_literal;
1396 else if (LangOpts.C23)
1397 DiagId = diag::warn_c23_compat_binary_literal;
1398 else if (LangOpts.CPlusPlus)
1399 DiagId = diag::ext_binary_literal_cxx14;
1400 else
1401 DiagId = diag::ext_binary_literal;
1402 Diags.Report(TokLoc, DiagId);
1403 ++s;
1404 assert(s < ThisTokEnd && "didn't maximally munch?");
1405 radix = 2;
1406 DigitsBegin = s;
1408 if (s == ThisTokEnd) {
1409
1413 LangOpts),
1414 diag::err_invalid_digit)
1415 << StringRef(s, 1) << 2;
1417 }
1418
1419 return;
1420 }
1421
1422
1423
1424
1425 radix = 8;
1426 const char *PossibleNewDigitStart = s;
1428
1429
1430
1431 if (s != PossibleNewDigitStart)
1432 DigitsBegin = PossibleNewDigitStart;
1433
1434 if (s == ThisTokEnd)
1435 return;
1436
1437
1438
1440 const char *EndDecimal = SkipDigits(s);
1441 if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
1442 s = EndDecimal;
1443 radix = 10;
1444 }
1445 }
1446
1447 ParseDecimalOrOctalCommon(TokLoc);
1448}
1449
1451 switch (Radix) {
1452 case 2:
1453 return NumDigits <= 64;
1454 case 8:
1455 return NumDigits <= 64 / 3;
1456 case 10:
1457 return NumDigits <= 19;
1458 case 16:
1459 return NumDigits <= 64 / 4;
1460 default:
1461 llvm_unreachable("impossible Radix");
1462 }
1463}
1464
1465
1466
1467
1469
1470
1471
1472
1473
1474
1475 const unsigned NumDigits = SuffixBegin - DigitsBegin;
1477 uint64_t N = 0;
1478 for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
1479 if (!isDigitSeparator(*Ptr))
1480 N = N * radix + llvm::hexDigitValue(*Ptr);
1481
1482
1483
1484 Val = N;
1485 return Val.getZExtValue() != N;
1486 }
1487
1488 Val = 0;
1489 const char *Ptr = DigitsBegin;
1490
1491 llvm::APInt RadixVal(Val.getBitWidth(), radix);
1492 llvm::APInt CharVal(Val.getBitWidth(), 0);
1493 llvm::APInt OldVal = Val;
1494
1495 bool OverflowOccurred = false;
1496 while (Ptr < SuffixBegin) {
1497 if (isDigitSeparator(*Ptr)) {
1498 ++Ptr;
1499 continue;
1500 }
1501
1502 unsigned C = llvm::hexDigitValue(*Ptr++);
1503
1504
1505 assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1506
1507 CharVal = C;
1508
1509
1510
1511 OldVal = Val;
1512
1513
1514 Val *= RadixVal;
1515 OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
1516
1517
1518
1519 Val += CharVal;
1520 OverflowOccurred |= Val.ult(CharVal);
1521 }
1522 return OverflowOccurred;
1523}
1524
1525llvm::APFloat::opStatus
1527 llvm::RoundingMode RM) {
1528 using llvm::APFloat;
1529
1530 unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
1531
1533 StringRef Str(ThisTokBegin, n);
1534 if (Str.contains('\'')) {
1535 Buffer.reserve(n);
1536 std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),
1537 &isDigitSeparator);
1538 Str = Buffer;
1539 }
1540
1541 auto StatusOrErr = Result.convertFromString(Str, RM);
1542 assert(StatusOrErr && "Invalid floating point representation");
1543 return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr
1544 : APFloat::opInvalidOp;
1545}
1546
1548 if (isHex)
1549 return c == 'p' || c == 'P';
1550 return c == 'e' || c == 'E';
1551}
1552
1554 assert(radix == 16 || radix == 10);
1555
1556
1557 unsigned NumDigits = SuffixBegin - DigitsBegin;
1558 if (saw_period) --NumDigits;
1559
1560
1561 bool ExpOverflowOccurred = false;
1562 bool NegativeExponent = false;
1563 const char *ExponentBegin;
1564 uint64_t Exponent = 0;
1565 int64_t BaseShift = 0;
1566 if (saw_exponent) {
1567 const char *Ptr = DigitsBegin;
1568
1570 ++Ptr;
1571 ExponentBegin = Ptr;
1572 ++Ptr;
1573 NegativeExponent = *Ptr == '-';
1574 if (NegativeExponent) ++Ptr;
1575
1576 unsigned NumExpDigits = SuffixBegin - Ptr;
1578 llvm::StringRef ExpStr(Ptr, NumExpDigits);
1579 llvm::APInt ExpInt(64, ExpStr, 10);
1580 Exponent = ExpInt.getZExtValue();
1581 } else {
1582 ExpOverflowOccurred = true;
1583 }
1584
1585 if (NegativeExponent) BaseShift -= Exponent;
1586 else BaseShift += Exponent;
1587 }
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606 uint64_t NumBitsNeeded;
1607 if (radix == 10)
1608 NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;
1609 else
1610 NumBitsNeeded = 4 * NumDigits + Exponent + Scale;
1611
1612 if (NumBitsNeeded > std::numeric_limits::max())
1613 ExpOverflowOccurred = true;
1614 llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, false);
1615
1616 bool FoundDecimal = false;
1617
1618 int64_t FractBaseShift = 0;
1619 const char *End = saw_exponent ? ExponentBegin : SuffixBegin;
1620 for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {
1621 if (*Ptr == '.') {
1622 FoundDecimal = true;
1623 continue;
1624 }
1625
1626
1627 unsigned C = llvm::hexDigitValue(*Ptr);
1628 assert(C < radix && "NumericLiteralParser ctor should have rejected this");
1629
1630 Val *= radix;
1631 Val += C;
1632
1633 if (FoundDecimal)
1634
1635
1636 --FractBaseShift;
1637 }
1638
1639
1640 if (radix == 16) FractBaseShift *= 4;
1641 BaseShift += FractBaseShift;
1642
1643 Val <<= Scale;
1644
1645 uint64_t Base = (radix == 16) ? 2 : 10;
1646 if (BaseShift > 0) {
1647 for (int64_t i = 0; i < BaseShift; ++i) {
1648 Val *= Base;
1649 }
1650 } else if (BaseShift < 0) {
1651 for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)
1652 Val = Val.udiv(Base);
1653 }
1654
1655 bool IntOverflowOccurred = false;
1656 auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());
1657 if (Val.getBitWidth() > StoreVal.getBitWidth()) {
1658 IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth()));
1659 StoreVal = Val.trunc(StoreVal.getBitWidth());
1660 } else if (Val.getBitWidth() < StoreVal.getBitWidth()) {
1661 IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);
1662 StoreVal = Val.zext(StoreVal.getBitWidth());
1663 } else {
1664 StoreVal = Val;
1665 }
1666
1667 return IntOverflowOccurred || ExpOverflowOccurred;
1668}
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1712
1713 HadError = false;
1714
1715 Kind = kind;
1716
1717 const char *TokBegin = begin;
1718
1719
1720 if (Kind != tok::char_constant)
1721 ++begin;
1722 if (Kind == tok::utf8_char_constant)
1723 ++begin;
1724
1725
1726 if (begin[0] != '\'') {
1727 PP.Diag(Loc, diag::err_lexing_char);
1728 HadError = true;
1729 return;
1730 }
1731
1732 ++begin;
1733
1734
1735 if (end[-1] != '\'') {
1736 const char *UDSuffixEnd = end;
1737 do {
1738 --end;
1739 } while (end[-1] != '\'');
1740
1741 expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));
1742 UDSuffixOffset = end - TokBegin;
1743 }
1744
1745
1746 assert(end != begin && "Invalid token lexed");
1747 --end;
1748
1749
1750
1751
1753 "Assumes char is 8 bits");
1756 "Assumes sizeof(int) on target is <= 64 and a multiple of char");
1758 "Assumes sizeof(wchar) on target is <= 64");
1759
1761 codepoint_buffer.resize(end - begin);
1762 uint32_t *buffer_begin = &codepoint_buffer.front();
1763 uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
1764
1765
1766
1767
1768 uint32_t largest_character_for_kind;
1769 if (tok::wide_char_constant == Kind) {
1770 largest_character_for_kind =
1772 } else if (tok::utf8_char_constant == Kind) {
1773 largest_character_for_kind = 0x7F;
1774 } else if (tok::utf16_char_constant == Kind) {
1775 largest_character_for_kind = 0xFFFF;
1776 } else if (tok::utf32_char_constant == Kind) {
1777 largest_character_for_kind = 0x10FFFF;
1778 } else {
1779 largest_character_for_kind = 0x7Fu;
1780 }
1781
1782 while (begin != end) {
1783
1784 if (begin[0] != '\\') {
1785 char const *start = begin;
1786 do {
1787 ++begin;
1788 } while (begin != end && *begin != '\\');
1789
1790 char const *tmp_in_start = start;
1791 uint32_t *tmp_out_start = buffer_begin;
1792 llvm::ConversionResult res =
1793 llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),
1794 reinterpret_cast<llvm::UTF8 const *>(begin),
1795 &buffer_begin, buffer_end, llvm::strictConversion);
1796 if (res != llvm::conversionOK) {
1797
1798
1799
1800 bool NoErrorOnBadEncoding = isOrdinary();
1801 unsigned Msg = diag::err_bad_character_encoding;
1802 if (NoErrorOnBadEncoding)
1803 Msg = diag::warn_bad_character_encoding;
1805 if (NoErrorOnBadEncoding) {
1806 start = tmp_in_start;
1807 buffer_begin = tmp_out_start;
1808 for (; start != begin; ++start, ++buffer_begin)
1809 *buffer_begin = static_cast<uint8_t>(*start);
1810 } else {
1811 HadError = true;
1812 }
1813 } else {
1814 for (; tmp_out_start < buffer_begin; ++tmp_out_start) {
1815 if (*tmp_out_start > largest_character_for_kind) {
1816 HadError = true;
1817 PP.Diag(Loc, diag::err_character_too_large);
1818 }
1819 }
1820 }
1821
1822 continue;
1823 }
1824
1825 if (begin[1] == 'u' || begin[1] == 'U' || begin[1] == 'N') {
1826 unsigned short UcnLen = 0;
1827 if ((TokBegin, begin, end, *buffer_begin, UcnLen,
1830 HadError = true;
1831 } else if (*buffer_begin > largest_character_for_kind) {
1832 HadError = true;
1833 PP.Diag(Loc, diag::err_character_too_large);
1834 }
1835
1836 ++buffer_begin;
1837 continue;
1838 }
1840 uint64_t result =
1845 *buffer_begin++ = result;
1846 }
1847
1848 unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();
1849
1850 if (NumCharsSoFar > 1) {
1851 if (isOrdinary() && NumCharsSoFar == 4)
1852 PP.Diag(Loc, diag::warn_four_char_character_literal);
1854 PP.Diag(Loc, diag::warn_multichar_character_literal);
1855 else {
1856 PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);
1857 HadError = true;
1858 }
1859 IsMultiChar = true;
1860 } else {
1861 IsMultiChar = false;
1862 }
1863
1865
1866
1867
1868 bool multi_char_too_long = false;
1870 LitVal = 0;
1871 for (size_t i = 0; i < NumCharsSoFar; ++i) {
1872
1873 multi_char_too_long |= (LitVal.countl_zero() < 8);
1874 LitVal <<= 8;
1875 LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1876 }
1877 } else if (NumCharsSoFar > 0) {
1878
1879 LitVal = buffer_begin[-1];
1880 }
1881
1882 if (!HadError && multi_char_too_long) {
1883 PP.Diag(Loc, diag::warn_char_constant_too_large);
1884 }
1885
1886
1887 Value = LitVal.getZExtValue();
1888
1889
1890
1891
1892
1893 if (isOrdinary() && NumCharsSoFar == 1 && (Value & 128) &&
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1955 : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1956 Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),
1957 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
1958 ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false),
1960 init(StringToks);
1961}
1962
1963void StringLiteralParser::init(ArrayRef StringToks){
1964
1965
1966 if (StringToks.empty() || StringToks[0].getLength() < 2)
1968
1969
1970
1971
1972
1973 assert(!StringToks.empty() && "expected at least one token");
1974 MaxTokenLength = StringToks[0].getLength();
1975 assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
1976 SizeBound = StringToks[0].getLength() - 2;
1978
1979
1980 Kind = tok::string_literal;
1981
1982
1983 for (const Token &Tok : StringToks) {
1984 if (Tok.getLength() < 2)
1985 return DiagnoseLexingError(Tok.getLocation());
1986
1987
1988
1989 assert(Tok.getLength() >= 2 && "literal token is invalid!");
1990 SizeBound += Tok.getLength() - 2;
1991
1992
1993 if (Tok.getLength() > MaxTokenLength)
1994 MaxTokenLength = Tok.getLength();
1995
1996
1997
1998 if (isUnevaluated() && Tok.getKind() != tok::string_literal) {
1999 if (Diags) {
2002 Features);
2005 StringRef Prefix(SM.getCharacterData(Tok.getLocation()),
2007 Diags->Report(Tok.getLocation(),
2008 Features.CPlusPlus26
2009 ? diag::err_unevaluated_string_prefix
2010 : diag::warn_unevaluated_string_prefix)
2012 }
2013 if (Features.CPlusPlus26)
2015 } else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) {
2017 Kind = Tok.getKind();
2018 } else {
2019 if (Diags)
2020 Diags->Report(Tok.getLocation(), diag::err_unsupported_string_concat);
2022 }
2023 }
2024 }
2025
2026
2027 ++SizeBound;
2028
2029
2030
2031
2033 assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
2034 CharByteWidth /= 8;
2035
2036
2037
2038 SizeBound *= CharByteWidth;
2039
2040
2041 ResultBuf.resize(SizeBound);
2042
2043
2045 TokenBuf.resize(MaxTokenLength);
2046
2047
2048
2049 ResultPtr = &ResultBuf[0];
2050
2052
2054
2055 for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {
2056 const char *ThisTokBuf = &TokenBuf[0];
2057
2058
2059
2060 bool StringInvalid = false;
2061 unsigned ThisTokLen =
2063 &StringInvalid);
2064 if (StringInvalid)
2065 return DiagnoseLexingError(StringToks[i].getLocation());
2066
2067 const char *ThisTokBegin = ThisTokBuf;
2068 const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
2069
2070
2071 if (ThisTokEnd[-1] != '"') {
2072 const char *UDSuffixEnd = ThisTokEnd;
2073 do {
2074 --ThisTokEnd;
2075 } while (ThisTokEnd[-1] != '"');
2076
2077 StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
2078
2079 if (UDSuffixBuf.empty()) {
2080 if (StringToks[i].hasUCN())
2082 else
2083 UDSuffixBuf.assign(UDSuffix);
2084 UDSuffixToken = i;
2085 UDSuffixOffset = ThisTokEnd - ThisTokBuf;
2086 UDSuffixTokLoc = StringToks[i].getLocation();
2087 } else {
2089 if (StringToks[i].hasUCN()) {
2090 expandUCNs(ExpandedUDSuffix, UDSuffix);
2091 UDSuffix = ExpandedUDSuffix;
2092 }
2093
2094
2095
2096
2097
2098 bool UnevaluatedStringHasUDL = isUnevaluated() && !UDSuffix.empty();
2099 if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) {
2100 if (Diags) {
2101 SourceLocation TokLoc = StringToks[i].getLocation();
2102 if (UnevaluatedStringHasUDL) {
2103 Diags->Report(TokLoc, diag::err_unevaluated_string_udl)
2105 } else {
2106 Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
2107 << UDSuffixBuf << UDSuffix
2108 << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc);
2109 }
2110 }
2112 }
2113 }
2114 }
2115
2116
2117 --ThisTokEnd;
2118
2119
2120
2121
2122 if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
2123 ++ThisTokBuf;
2124
2125 if (ThisTokBuf[0] == '8')
2126 ++ThisTokBuf;
2127 }
2128
2129
2130 if (ThisTokBuf[0] == 'R') {
2131 if (ThisTokBuf[1] != '"') {
2132
2133
2134 return DiagnoseLexingError(StringToks[i].getLocation());
2135 }
2136 ThisTokBuf += 2;
2137
2138
2139
2140 constexpr unsigned MaxRawStrDelimLen = 16;
2141
2142 const char *Prefix = ThisTokBuf;
2143 while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&
2144 ThisTokBuf[0] != '(')
2145 ++ThisTokBuf;
2146 if (ThisTokBuf[0] != '(')
2147 return DiagnoseLexingError(StringToks[i].getLocation());
2148 ++ThisTokBuf;
2149
2150
2151 ThisTokEnd -= ThisTokBuf - Prefix;
2152 if (ThisTokEnd < ThisTokBuf)
2153 return DiagnoseLexingError(StringToks[i].getLocation());
2154
2155
2156
2157 StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
2158 while (!RemainingTokenSpan.empty()) {
2159
2160 size_t CRLFPos = RemainingTokenSpan.find("\r\n");
2161 StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
2162 StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
2163
2164
2165 if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
2167
2168
2169
2170 RemainingTokenSpan = AfterCRLF.substr(1);
2171 }
2172 } else {
2173 if (ThisTokBuf[0] != '"') {
2174
2175
2176 return DiagnoseLexingError(StringToks[i].getLocation());
2177 }
2178 ++ThisTokBuf;
2179
2180
2181 if (() && Features.PascalStrings &&
2182 ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' &&
2183 ThisTokBuf[1] == 'p') {
2184
2185
2186
2187 if (i == 0) {
2188 ++ThisTokBuf;
2191 ThisTokBuf += 2;
2192 }
2193
2194 while (ThisTokBuf != ThisTokEnd) {
2195
2196 if (ThisTokBuf[0] != '\\') {
2197 const char *InStart = ThisTokBuf;
2198 do {
2199 ++ThisTokBuf;
2200 } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
2201
2202
2203 if (CopyStringFragment(StringToks[i], ThisTokBegin,
2204 StringRef(InStart, ThisTokBuf - InStart)))
2206 continue;
2207 }
2208
2209 if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U' ||
2210 ThisTokBuf[1] == 'N') {
2214 CharByteWidth, Diags, Features);
2215 continue;
2216 }
2217
2218 unsigned ResultChar =
2221 CharByteWidth * 8, Diags, Features, EvalMethod);
2222
2223 if (CharByteWidth == 4) {
2224
2225
2226 llvm::UTF32 *ResultWidePtr = reinterpret_castllvm::UTF32\*\(ResultPtr);
2227 *ResultWidePtr = ResultChar;
2228 ResultPtr += 4;
2229 } else if (CharByteWidth == 2) {
2230
2231
2232 llvm::UTF16 *ResultWidePtr = reinterpret_castllvm::UTF16\*\(ResultPtr);
2233 *ResultWidePtr = ResultChar & 0xFFFF;
2234 ResultPtr += 2;
2235 } else {
2236 assert(CharByteWidth == 1 && "Unexpected char width");
2237 *ResultPtr++ = ResultChar & 0xFF;
2238 }
2239 }
2240 }
2241 }
2242
2244 "Pascal string in unevaluated context");
2246 if (CharByteWidth == 4) {
2247
2248
2249 llvm::UTF32 *ResultWidePtr = reinterpret_castllvm::UTF32\*\(ResultBuf.data());
2251 } else if (CharByteWidth == 2) {
2252
2253
2254 llvm::UTF16 *ResultWidePtr = reinterpret_castllvm::UTF16\*\(ResultBuf.data());
2256 } else {
2257 assert(CharByteWidth == 1 && "Unexpected char width");
2259 }
2260
2261
2263 if (Diags)
2264 Diags->Report(StringToks.front().getLocation(),
2265 diag::err_pascal_string_too_long)
2266 << SourceRange(StringToks.front().getLocation(),
2267 StringToks.back().getLocation());
2269 return;
2270 }
2271 } else if (Diags) {
2272
2273 unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
2274
2276 Diags->Report(StringToks.front().getLocation(),
2277 diag::ext_string_too_long)
2279 << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
2280 << SourceRange(StringToks.front().getLocation(),
2281 StringToks.back().getLocation());
2282 }
2283}
2284
2285static const char *resyncUTF8(const char *Err, const char *End) {
2286 if (Err == End)
2287 return End;
2288 End = Err + std::min(llvm::getNumBytesForUTF8(*Err), End-Err);
2289 while (++Err != End && (*Err & 0xC0) == 0x80)
2290 ;
2291 return Err;
2292}
2293
2294
2295
2296
2297bool StringLiteralParser::CopyStringFragment(const Token &Tok,
2298 const char *TokBegin,
2299 StringRef Fragment) {
2300 const llvm::UTF8 *ErrorPtrTmp;
2301 if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
2302 return false;
2303
2304
2305
2306
2307 bool NoErrorOnBadEncoding = isOrdinary();
2308 if (NoErrorOnBadEncoding) {
2309 memcpy(ResultPtr, Fragment.data(), Fragment.size());
2310 ResultPtr += Fragment.size();
2311 }
2312
2313 if (Diags) {
2314 const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2315
2318 Diag(Diags, Features, SourceLoc, TokBegin,
2319 ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
2320 NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
2321 : diag::err_bad_string_encoding);
2322
2323 const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2324 StringRef NextFragment(NextStart, Fragment.end()-NextStart);
2325
2326
2328 Dummy.reserve(Fragment.size() * CharByteWidth);
2329 char *Ptr = Dummy.data();
2330
2331 while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
2332 const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
2333 NextStart = resyncUTF8(ErrorPtr, Fragment.end());
2335 ErrorPtr, NextStart);
2336 NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
2337 }
2338 }
2339 return !NoErrorOnBadEncoding;
2340}
2341
2342void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
2344 if (Diags)
2345 Diags->Report(Loc, diag::err_lexing_string);
2346}
2347
2348
2349
2350
2352 unsigned ByteNo) const {
2353
2355 SpellingBuffer.resize(Tok.getLength());
2356
2357 bool StringInvalid = false;
2358 const char *SpellingPtr = &SpellingBuffer[0];
2360 &StringInvalid);
2361 if (StringInvalid)
2362 return 0;
2363
2364 const char *SpellingStart = SpellingPtr;
2365 const char *SpellingEnd = SpellingPtr+TokLen;
2366
2367
2368 if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
2369 SpellingPtr += 2;
2370
2371 assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
2372 SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
2373
2374
2375 if (SpellingPtr[0] == 'R') {
2376 assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
2377
2378 SpellingPtr += 2;
2379 while (*SpellingPtr != '(') {
2380 ++SpellingPtr;
2381 assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
2382 }
2383
2384 ++SpellingPtr;
2385 return SpellingPtr - SpellingStart + ByteNo;
2386 }
2387
2388
2389 assert(SpellingPtr[0] == '"' && "Should be a string literal!");
2390 ++SpellingPtr;
2391
2392
2393 while (ByteNo) {
2394 assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
2395
2396
2397 if (*SpellingPtr != '\\') {
2398 ++SpellingPtr;
2399 --ByteNo;
2400 continue;
2401 }
2402
2403
2404 bool HadError = false;
2405 if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U' ||
2406 SpellingPtr[1] == 'N') {
2407 const char *EscapePtr = SpellingPtr;
2408 unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
2409 1, Features, HadError);
2410 if (Len > ByteNo) {
2411
2412 SpellingPtr = EscapePtr;
2413 break;
2414 }
2415 ByteNo -= Len;
2416 } else {
2417 ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
2420 --ByteNo;
2421 }
2422 assert(!HadError && "This method isn't valid on erroneous strings");
2423 }
2424
2425 return SpellingPtr-SpellingStart;
2426}
2427
2428
2429
2430
2432 StringRef Suffix) {
2434 Suffix == "sv";
2435}
enum clang::sema::@1725::IndirectLocalPathEntry::EntryKind Kind
Defines the clang::LangOptions interface.
static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, FullSourceLoc Loc, unsigned CharByteWidth, DiagnosticsEngine *Diags, const LangOptions &Features)
EncodeUCNEscape - Read the Universal Character Name, check constraints and convert the UTF32 to UTF8 ...
static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)
ProcessUCNEscape - Read the Universal Character Name, check constraints and return the UTF32.
static CharSourceRange MakeCharSourceRange(const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd)
static const char * resyncUTF8(const char *Err, const char *End)
static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, unsigned CharByteWidth, const LangOptions &Features, bool &HadError)
MeasureUCNEscape - Determine the number of bytes within the resulting string which this UCN will occu...
static void appendCodePoint(unsigned Codepoint, llvm::SmallVectorImpl< char > &Str)
static unsigned getEncodingPrefixLen(tok::TokenKind kind)
static void DiagnoseInvalidUnicodeCharacterName(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, llvm::StringRef Name)
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
static bool IsEscapeValidInUnevaluatedStringLiteral(char Escape)
static bool ProcessNumericUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, bool &Delimited, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)
static bool ProcessNamedUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features)
static bool IsExponentPart(char c, bool isHex)
static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits)
static unsigned ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, const LangOptions &Features, StringLiteralEvalMethod EvalMethod)
ProcessCharEscape - Parse a standard C escape sequence, which can occur in either a character or a st...
static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target)
llvm::MachO::Target Target
Defines the clang::Preprocessor interface.
Defines the clang::SourceLocation class and associated facilities.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
__device__ __2f16 float __ockl_bool s
__device__ __2f16 float c
CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, tok::TokenKind kind)
Represents a character-granular source range.
static CharSourceRange getCharRange(SourceRange R)
A little helper class used to produce diagnostics.
Concrete class used by the front-end to report problems and issues.
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
A SourceLocation and its associated SourceManager.
const SourceManager & getManager() const
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...
NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, const SourceManager &SM, const LangOptions &LangOpts, const TargetInfo &Target, DiagnosticsEngine &Diags)
integer-constant: [C99 6.4.4.1] decimal-constant integer-suffix octal-constant integer-suffix hexadec...
bool isFixedPointLiteral() const
bool isFloatingLiteral() const
bool isIntegerLiteral() const
llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result, llvm::RoundingMode RM)
Convert this numeric literal to a floating value, using the specified APFloat fltSemantics (specifyin...
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
bool GetIntegerValue(llvm::APInt &Val)
GetIntegerValue - Convert this numeric literal value to an APInt that matches Val's input width.
bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale)
GetFixedPointValue - Convert this numeric literal value into a scaled integer that represents this va...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
SourceManager & getSourceManager() const
const TargetInfo & getTargetInfo() const
const LangOptions & getLangOpts() const
DiagnosticsEngine & getDiagnostics() const
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const
getOffsetOfStringByte - This function returns the offset of the specified byte of the string data rep...
bool isUnevaluated() const
StringLiteralParser(ArrayRef< Token > StringToks, Preprocessor &PP, StringLiteralEvalMethod StringMethod=StringLiteralEvalMethod::Evaluated)
unsigned GetStringLength() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
unsigned GetNumStringChars() const
Exposes information about the current target.
unsigned getIntWidth() const
getIntWidth/Align - Return the size of 'signed int' and 'unsigned int' for this target,...
unsigned getWCharWidth() const
getWCharWidth/Align - Return the size of 'wchar_t' for this target, in bits.
unsigned getCharWidth() const
Token - This structure provides full information about a lexed token.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file.
unsigned getLength() const
tok::TokenKind getKind() const
Defines the clang::TargetInfo interface.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isPrintable(unsigned char c)
Return true if this character is an ASCII printable character; that is, a character that should take ...
void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)
Copy characters from Input to Buf, expanding any UCNs.
bool tokenIsLikeStringLiteral(const Token &Tok, const LangOptions &LO)
Return true if the token is a string literal, or a function local predefined macro,...
@ Result
The result type of a method or function.
LLVM_READONLY bool isDigit(unsigned char c)
Return true if this character is an ASCII digit: [0-9].
bool isFunctionLocalStringLiteralMacro(tok::TokenKind K, const LangOptions &LO)
Return true if the token corresponds to a function local predefined macro, which expands to a string ...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
LLVM_READONLY bool isHexDigit(unsigned char c)
Return true if this character is an ASCII hex digit: [0-9a-fA-F].
@ Incomplete
Template argument deduction did not deduce a value for every template parameter.