clang: lib/Lex/LiteralSupport.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

23#include "llvm/ADT/APInt.h"

24#include "llvm/ADT/SmallVector.h"

25#include "llvm/ADT/StringExtras.h"

26#include "llvm/ADT/StringSwitch.h"

27#include "llvm/Support/ConvertUTF.h"

28#include "llvm/Support/Error.h"

29#include "llvm/Support/ErrorHandling.h"

30#include "llvm/Support/Unicode.h"

31#include

32#include

33#include

34#include

35#include

36#include

37

38using namespace clang;

39

41 switch (kind) {

42 default: llvm_unreachable("Unknown token type!");

43 case tok::char_constant:

44 case tok::string_literal:

45 case tok::utf8_char_constant:

46 case tok::utf8_string_literal:

47 return Target.getCharWidth();

48 case tok::wide_char_constant:

49 case tok::wide_string_literal:

50 return Target.getWCharWidth();

51 case tok::utf16_char_constant:

52 case tok::utf16_string_literal:

53 return Target.getChar16Width();

54 case tok::utf32_char_constant:

55 case tok::utf32_string_literal:

56 return Target.getChar32Width();

57 }

58}

59

61 switch (kind) {

62 default:

63 llvm_unreachable("Unknown token type!");

64 case tok::char_constant:

65 case tok::string_literal:

66 return 0;

67 case tok::utf8_char_constant:

68 case tok::utf8_string_literal:

69 return 2;

70 case tok::wide_char_constant:

71 case tok::wide_string_literal:

72 case tok::utf16_char_constant:

73 case tok::utf16_string_literal:

74 case tok::utf32_char_constant:

75 case tok::utf32_string_literal:

76 return 1;

77 }

78}

79

82 const char *TokBegin,

83 const char *TokRangeBegin,

84 const char *TokRangeEnd) {

92}

93

94

95

96

97

98

101 const char *TokBegin, const char *TokRangeBegin,

102 const char *TokRangeEnd, unsigned DiagID) {

107 MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);

108}

109

111 switch (Escape) {

112 case '\'':

113 case '"':

114 case '?':

115 case '\\':

116 case 'a':

117 case 'b':

118 case 'f':

119 case 'n':

120 case 'r':

121 case 't':

122 case 'v':

123 return true;

124 }

125 return false;

126}

127

128

129

131 const char *&ThisTokBuf,

132 const char *ThisTokEnd, bool &HadError,

137 const char *EscapeBegin = ThisTokBuf;

138 bool Delimited = false;

139 bool EndDelimiterFound = false;

140

141

142 ++ThisTokBuf;

143

144

145

146 unsigned ResultChar = *ThisTokBuf++;

147 char Escape = ResultChar;

148 switch (ResultChar) {

149

150 case '\\': case '\'': case '"': case '?': break;

151

152

153 case 'a':

154

155 ResultChar = 7;

156 break;

157 case 'b':

158 ResultChar = 8;

159 break;

160 case 'e':

161 if (Diags)

162 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

163 diag::ext_nonstandard_escape) << "e";

164 ResultChar = 27;

165 break;

166 case 'E':

167 if (Diags)

168 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

169 diag::ext_nonstandard_escape) << "E";

170 ResultChar = 27;

171 break;

172 case 'f':

173 ResultChar = 12;

174 break;

175 case 'n':

176 ResultChar = 10;

177 break;

178 case 'r':

179 ResultChar = 13;

180 break;

181 case 't':

182 ResultChar = 9;

183 break;

184 case 'v':

185 ResultChar = 11;

186 break;

187 case 'x': {

188 ResultChar = 0;

189 if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {

190 Delimited = true;

191 ThisTokBuf++;

192 if (*ThisTokBuf == '}') {

193 HadError = true;

194 if (Diags)

195 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

196 diag::err_delimited_escape_empty);

197 }

198 } else if (ThisTokBuf == ThisTokEnd || isHexDigit(*ThisTokBuf)) {

199 if (Diags)

200 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

201 diag::err_hex_escape_no_digits) << "x";

202 return ResultChar;

203 }

204

205

206 bool Overflow = false;

207 for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {

208 if (Delimited && *ThisTokBuf == '}') {

209 ThisTokBuf++;

210 EndDelimiterFound = true;

211 break;

212 }

213 int CharVal = llvm::hexDigitValue(*ThisTokBuf);

214 if (CharVal == -1) {

215

216 if (!Delimited)

217 break;

218 HadError = true;

219 if (Diags)

220 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

221 diag::err_delimited_escape_invalid)

222 << StringRef(ThisTokBuf, 1);

223 continue;

224 }

225

226 if (ResultChar & 0xF0000000)

227 Overflow = true;

228 ResultChar <<= 4;

229 ResultChar |= CharVal;

230 }

231

232 if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {

233 Overflow = true;

234 ResultChar &= ~0U >> (32-CharWidth);

235 }

236

237

238 if (!HadError && Overflow) {

239 HadError = true;

240 if (Diags)

241 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

242 diag::err_escape_too_large)

243 << 0;

244 }

245 break;

246 }

247 case '0': case '1': case '2': case '3':

248 case '4': case '5': case '6': case '7': {

249

250 --ThisTokBuf;

251 ResultChar = 0;

252

253

254

255 unsigned NumDigits = 0;

256 do {

257 ResultChar <<= 3;

258 ResultChar |= *ThisTokBuf++ - '0';

259 ++NumDigits;

260 } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&

261 ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');

262

263

264 if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {

265 if (Diags)

266 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

267 diag::err_escape_too_large) << 1;

268 ResultChar &= ~0U >> (32-CharWidth);

269 }

270 break;

271 }

272 case 'o': {

273 bool Overflow = false;

274 if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {

275 HadError = true;

276 if (Diags)

277 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

278 diag::err_delimited_escape_missing_brace)

279 << "o";

280

281 break;

282 }

283 ResultChar = 0;

284 Delimited = true;

285 ++ThisTokBuf;

286 if (*ThisTokBuf == '}') {

287 HadError = true;

288 if (Diags)

289 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

290 diag::err_delimited_escape_empty);

291 }

292

293 while (ThisTokBuf != ThisTokEnd) {

294 if (*ThisTokBuf == '}') {

295 EndDelimiterFound = true;

296 ThisTokBuf++;

297 break;

298 }

299 if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {

300 HadError = true;

301 if (Diags)

302 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

303 diag::err_delimited_escape_invalid)

304 << StringRef(ThisTokBuf, 1);

305 ThisTokBuf++;

306 continue;

307 }

308

309 if (ResultChar & 0xE0000000)

310 Overflow = true;

311

312 ResultChar <<= 3;

313 ResultChar |= *ThisTokBuf++ - '0';

314 }

315

316 if (!HadError &&

317 (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {

318 HadError = true;

319 if (Diags)

320 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

321 diag::err_escape_too_large)

322 << 1;

323 ResultChar &= ~0U >> (32 - CharWidth);

324 }

325 break;

326 }

327

328 case '(': case '{': case '[': case '%':

329

330 if (Diags)

331 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

332 diag::ext_nonstandard_escape)

333 << std::string(1, ResultChar);

334 break;

335 default:

336 if (!Diags)

337 break;

338

340 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

341 diag::ext_unknown_escape)

342 << std::string(1, ResultChar);

343 else

344 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

345 diag::ext_unknown_escape)

346 << "x" + llvm::utohexstr(ResultChar);

347 break;

348 }

349

350 if (Delimited && Diags) {

351 if (!EndDelimiterFound)

352 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

353 diag::err_expected)

354 << tok::r_brace;

355 else if (!HadError) {

356 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

357 Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence

358 : diag::ext_delimited_escape_sequence)

359 << 0 << (Features.CPlusPlus ? 1 : 0);

360 }

361 }

362

363 if (EvalMethod == StringLiteralEvalMethod::Unevaluated &&

365 Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,

366 diag::err_unevaluated_string_invalid_escape_sequence)

367 << StringRef(EscapeBegin, ThisTokBuf - EscapeBegin);

368 HadError = true;

369 }

370

371 return ResultChar;

372}

373

376 char ResultBuf[4];

377 char *ResultPtr = ResultBuf;

378 if (llvm::ConvertCodePointToUTF8(Codepoint, ResultPtr))

379 Str.append(ResultBuf, ResultPtr);

380}

381

383 for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {

384 if (*I != '\\') {

385 Buf.push_back(*I);

386 continue;

387 }

388

389 ++I;

390 char Kind = *I;

391 ++I;

392

393 assert(Kind == 'u' || Kind == 'U' || Kind == 'N');

394 uint32_t CodePoint = 0;

395

396 if (Kind == 'u' && *I == '{') {

397 for (++I; *I != '}'; ++I) {

398 unsigned Value = llvm::hexDigitValue(*I);

399 assert(Value != -1U);

400 CodePoint <<= 4;

401 CodePoint += Value;

402 }

404 continue;

405 }

406

407 if (Kind == 'N') {

408 assert(*I == '{');

409 ++I;

410 auto Delim = std::find(I, Input.end(), '}');

411 assert(Delim != Input.end());

412 StringRef Name(I, std::distance(I, Delim));

413 std::optionalllvm::sys::unicode::LooseMatchingResult Res =

414 llvm::sys::unicode::nameToCodepointLooseMatching(Name);

415 assert(Res && "could not find a codepoint that was previously found");

416 CodePoint = Res->CodePoint;

417 assert(CodePoint != 0xFFFFFFFF);

419 I = Delim;

420 continue;

421 }

422

423 unsigned NumHexDigits;

424 if (Kind == 'u')

425 NumHexDigits = 4;

426 else

427 NumHexDigits = 8;

428

429 assert(I + NumHexDigits <= E);

430

431 for (; NumHexDigits != 0; ++I, --NumHexDigits) {

432 unsigned Value = llvm::hexDigitValue(*I);

433 assert(Value != -1U);

434

435 CodePoint <<= 4;

436 CodePoint += Value;

437 }

438

440 --I;

441 }

442}

443

446 return LO.MicrosoftExt &&

447 (K == tok::kw___FUNCTION__ || K == tok::kw_L__FUNCTION__ ||

448 K == tok::kw___FUNCSIG__ || K == tok::kw_L__FUNCSIG__ ||

449 K == tok::kw___FUNCDNAME__);

450}

451

455}

456

458 const char *&ThisTokBuf,

459 const char *ThisTokEnd, uint32_t &UcnVal,

460 unsigned short &UcnLen, bool &Delimited,

463 bool in_char_string_literal = false) {

464 const char *UcnBegin = ThisTokBuf;

465 bool HasError = false;

466 bool EndDelimiterFound = false;

467

468

469 ThisTokBuf += 2;

470 Delimited = false;

471 if (UcnBegin[1] == 'u' && in_char_string_literal &&

472 ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {

473 Delimited = true;

474 ThisTokBuf++;

475 } else if (ThisTokBuf == ThisTokEnd || isHexDigit(*ThisTokBuf)) {

476 if (Diags)

477 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

478 diag::err_hex_escape_no_digits)

479 << StringRef(&ThisTokBuf[-1], 1);

480 return false;

481 }

482 UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);

483

484 bool Overflow = false;

485 unsigned short Count = 0;

486 for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);

487 ++ThisTokBuf) {

488 if (Delimited && *ThisTokBuf == '}') {

489 ++ThisTokBuf;

490 EndDelimiterFound = true;

491 break;

492 }

493 int CharVal = llvm::hexDigitValue(*ThisTokBuf);

494 if (CharVal == -1) {

495 HasError = true;

496 if (!Delimited)

497 break;

498 if (Diags) {

499 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

500 diag::err_delimited_escape_invalid)

501 << StringRef(ThisTokBuf, 1);

502 }

503 Count++;

504 continue;

505 }

506 if (UcnVal & 0xF0000000) {

507 Overflow = true;

508 continue;

509 }

510 UcnVal <<= 4;

511 UcnVal |= CharVal;

512 Count++;

513 }

514

515 if (Overflow) {

516 if (Diags)

517 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

518 diag::err_escape_too_large)

519 << 0;

520 return false;

521 }

522

523 if (Delimited && !EndDelimiterFound) {

524 if (Diags) {

525 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

526 diag::err_expected)

527 << tok::r_brace;

528 }

529 return false;

530 }

531

532

533 if (Count == 0 || (!Delimited && Count != UcnLen)) {

534 if (Diags)

535 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

536 Delimited ? diag::err_delimited_escape_empty

537 : diag::err_ucn_escape_incomplete);

538 return false;

539 }

540 return !HasError;

541}

542

545 const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd,

546 llvm::StringRef Name) {

547

548 Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,

549 diag::err_invalid_ucn_name)

550 << Name;

551

552 namespace u = llvm::sys::unicode;

553

554 std::optional<u::LooseMatchingResult> Res =

555 u::nameToCodepointLooseMatching(Name);

556 if (Res) {

557 Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,

558 diag::note_invalid_ucn_name_loose_matching)

561 TokRangeEnd),

562 Res->Name);

563 return;

564 }

565

566 unsigned Distance = 0;

568 u::nearestMatchesForCodepointName(Name, 5);

569 assert(!Matches.empty() && "No unicode characters found");

570

571 for (const auto &Match : Matches) {

572 if (Distance == 0)

573 Distance = Match.Distance;

574 if (std::max(Distance, Match.Distance) -

575 std::min(Distance, Match.Distance) >

576 3)

577 break;

578 Distance = Match.Distance;

579

580 std::string Str;

581 llvm::UTF32 V = Match.Value;

582 bool Converted =

584 (void)Converted;

585 assert(Converted && "Found a match wich is not a unicode character");

586

587 Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,

588 diag::note_invalid_ucn_name_candidate)

589 << Match.Name << llvm::utohexstr(Match.Value)

590 << Str

593 TokRangeEnd),

594 Match.Name);

595 }

596}

597

599 const char *&ThisTokBuf,

600 const char *ThisTokEnd, uint32_t &UcnVal,

604 const char *UcnBegin = ThisTokBuf;

605 assert(UcnBegin[0] == '\\' && UcnBegin[1] == 'N');

606 ThisTokBuf += 2;

607 if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {

608 if (Diags) {

609 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

610 diag::err_delimited_escape_missing_brace)

611 << StringRef(&ThisTokBuf[-1], 1);

612 }

613 return false;

614 }

615 ThisTokBuf++;

616 const char *ClosingBrace = std::find_if(ThisTokBuf, ThisTokEnd, [](char C) {

618 });

619 bool Incomplete = ClosingBrace == ThisTokEnd;

620 bool Empty = ClosingBrace == ThisTokBuf;

622 if (Diags) {

623 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

624 Incomplete ? diag::err_ucn_escape_incomplete

625 : diag::err_delimited_escape_empty)

626 << StringRef(&UcnBegin[1], 1);

627 }

628 ThisTokBuf = ClosingBrace == ThisTokEnd ? ClosingBrace : ClosingBrace + 1;

629 return false;

630 }

631 StringRef Name(ThisTokBuf, ClosingBrace - ThisTokBuf);

632 ThisTokBuf = ClosingBrace + 1;

633 std::optional<char32_t> Res = llvm::sys::unicode::nameToCodepointStrict(Name);

634 if (!Res) {

635 if (Diags)

637 &UcnBegin[3], ClosingBrace, Name);

638 return false;

639 }

640 UcnVal = *Res;

641 UcnLen = UcnVal > 0xFFFF ? 8 : 4;

642 return true;

643}

644

645

646

647static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,

648 const char *ThisTokEnd, uint32_t &UcnVal,

652 bool in_char_string_literal = false) {

653

654 bool HasError;

655 const char *UcnBegin = ThisTokBuf;

656 bool IsDelimitedEscapeSequence = false;

657 bool IsNamedEscapeSequence = false;

658 if (ThisTokBuf[1] == 'N') {

659 IsNamedEscapeSequence = true;

661 UcnVal, UcnLen, Loc, Diags, Features);

662 } else {

663 HasError =

665 UcnLen, IsDelimitedEscapeSequence, Loc, Diags,

666 Features, in_char_string_literal);

667 }

668 if (HasError)

669 return false;

670

671

672 if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) ||

673 UcnVal > 0x10FFFF) {

674 if (Diags)

675 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

676 diag::err_ucn_escape_invalid);

677 return false;

678 }

679

680

681

682 if (UcnVal < 0xa0 &&

683

684 (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) {

685 bool IsError =

686 (!(Features.CPlusPlus11 || Features.C23) || !in_char_string_literal);

687 if (Diags) {

688 char BasicSCSChar = UcnVal;

689 if (UcnVal >= 0x20 && UcnVal < 0x7f)

690 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

691 IsError ? diag::err_ucn_escape_basic_scs

692 : Features.CPlusPlus

693 ? diag::warn_cxx98_compat_literal_ucn_escape_basic_scs

694 : diag::warn_c23_compat_literal_ucn_escape_basic_scs)

695 << StringRef(&BasicSCSChar, 1);

696 else

697 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

698 IsError ? diag::err_ucn_control_character

699 : Features.CPlusPlus

700 ? diag::warn_cxx98_compat_literal_ucn_control_character

701 : diag::warn_c23_compat_literal_ucn_control_character);

702 }

703 if (IsError)

704 return false;

705 }

706

707 if (!Features.CPlusPlus && !Features.C99 && Diags)

708 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

709 diag::warn_ucn_not_valid_in_c89_literal);

710

711 if ((IsDelimitedEscapeSequence || IsNamedEscapeSequence) && Diags)

712 Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,

713 Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence

714 : diag::ext_delimited_escape_sequence)

715 << (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0);

716

717 return true;

718}

719

720

721

722static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,

723 const char *ThisTokEnd, unsigned CharByteWidth,

724 const LangOptions &Features, bool &HadError) {

725

726 if (CharByteWidth == 4)

727 return 4;

728

729 uint32_t UcnVal = 0;

730 unsigned short UcnLen = 0;

732

733 if (ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,

734 UcnLen, Loc, nullptr, Features, true)) {

735 HadError = true;

736 return 0;

737 }

738

739

740 if (CharByteWidth == 2)

741 return UcnVal <= 0xFFFF ? 2 : 4;

742

743

744 if (UcnVal < 0x80)

745 return 1;

746 if (UcnVal < 0x800)

747 return 2;

748 if (UcnVal < 0x10000)

749 return 3;

750 return 4;

751}

752

753

754

755

756

757static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,

758 const char *ThisTokEnd,

759 char *&ResultBuf, bool &HadError,

763 typedef uint32_t UTF32;

764 UTF32 UcnVal = 0;

765 unsigned short UcnLen = 0;

766 if (ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,

767 Loc, Diags, Features, true)) {

768 HadError = true;

769 return;

770 }

771

772 assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth == 4) &&

773 "only character widths of 1, 2, or 4 bytes supported");

774

775 (void)UcnLen;

776 assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");

777

778 if (CharByteWidth == 4) {

779

780

781 llvm::UTF32 *ResultPtr = reinterpret_castllvm::UTF32\*\(ResultBuf);

782 *ResultPtr = UcnVal;

783 ResultBuf += 4;

784 return;

785 }

786

787 if (CharByteWidth == 2) {

788

789

790 llvm::UTF16 *ResultPtr = reinterpret_castllvm::UTF16\*\(ResultBuf);

791

792 if (UcnVal <= (UTF32)0xFFFF) {

793 *ResultPtr = UcnVal;

794 ResultBuf += 2;

795 return;

796 }

797

798

799 UcnVal -= 0x10000;

800 *ResultPtr = 0xD800 + (UcnVal >> 10);

801 *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);

802 ResultBuf += 4;

803 return;

804 }

805

806 assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");

807

808

809

810

811

812 typedef uint8_t UTF8;

813

814 unsigned short bytesToWrite = 0;

815 if (UcnVal < (UTF32)0x80)

816 bytesToWrite = 1;

817 else if (UcnVal < (UTF32)0x800)

818 bytesToWrite = 2;

819 else if (UcnVal < (UTF32)0x10000)

820 bytesToWrite = 3;

821 else

822 bytesToWrite = 4;

823

824 const unsigned byteMask = 0xBF;

825 const unsigned byteMark = 0x80;

826

827

828

829 static const UTF8 firstByteMark[5] = {

830 0x00, 0x00, 0xC0, 0xE0, 0xF0

831 };

832

833 ResultBuf += bytesToWrite;

834 switch (bytesToWrite) {

835 case 4:

836 *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;

837 [[fallthrough]];

838 case 3:

839 *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;

840 [[fallthrough]];

841 case 2:

842 *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;

843 [[fallthrough]];

844 case 1:

845 *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);

846 }

847

848 ResultBuf += bytesToWrite;

849}

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

908 : SM(SM), LangOpts(LangOpts), Diags(Diags),

909 ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {

910

911 s = DigitsBegin = ThisTokBegin;

912 saw_exponent = false;

913 saw_period = false;

914 saw_ud_suffix = false;

915 saw_fixed_point_suffix = false;

930

931

932

933

934

935

936

937

939 !(LangOpts.HLSL && *ThisTokEnd == '.')) {

940 Diags.Report(TokLoc, diag::err_lexing_numeric);

942 return;

943 }

944

945 if (*s == '0') {

946 ParseNumberStartingWithZero(TokLoc);

948 return;

949 } else {

950 radix = 10;

951 s = SkipDigits(s);

952 if (s == ThisTokEnd) {

953

954 } else {

955 ParseDecimalOrOctalCommon(TokLoc);

957 return;

958 }

959 }

960

961 SuffixBegin = s;

962 checkSeparator(TokLoc, s, CSK_AfterDigits);

963

964

965 if (LangOpts.FixedPoint) {

966 for (const char *c = s; c != ThisTokEnd; ++c) {

967 if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {

968 saw_fixed_point_suffix = true;

969 break;

970 }

971 }

972 }

973

974

975

978 bool HasSize = false;

979 bool DoubleUnderscore = false;

980

981

982

983 for (; s != ThisTokEnd; ++s) {

984 switch (*s) {

985 case 'R':

986 case 'r':

987 if (!LangOpts.FixedPoint)

988 break;

990 if (!(saw_period || saw_exponent)) break;

992 continue;

993 case 'K':

994 case 'k':

995 if (!LangOpts.FixedPoint)

996 break;

998 if (!(saw_period || saw_exponent)) break;

1000 continue;

1001 case 'h':

1002 case 'H':

1003

1004 if (!(LangOpts.Half || LangOpts.FixedPoint))

1005 break;

1006 if (isIntegerLiteral()) break;

1007 if (HasSize)

1008 break;

1009 HasSize = true;

1011 continue;

1012 case 'f':

1013 case 'F':

1014 if (!isFPConstant) break;

1015 if (HasSize)

1016 break;

1017 HasSize = true;

1018

1019

1020

1021

1022

1023

1024

1025 if ((Target.hasFloat16Type() || LangOpts.CUDA ||

1026 (LangOpts.OpenMPIsTargetDevice && Target.getTriple().isNVPTX())) &&

1027 s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {

1028 s += 2;

1030 continue;

1031 }

1032

1034 continue;

1035 case 'q':

1036 case 'Q':

1037 if (!isFPConstant) break;

1038 if (HasSize)

1039 break;

1040 HasSize = true;

1042 continue;

1043 case 'u':

1044 case 'U':

1045 if (isFPConstant) break;

1046 if (isUnsigned) break;

1048 continue;

1049 case 'l':

1050 case 'L':

1051 if (HasSize)

1052 break;

1053 HasSize = true;

1054

1055

1056 if (s[1] == s[0]) {

1057 assert(s + 1 < ThisTokEnd && "didn't maximally munch?");

1058 if (isFPConstant) break;

1060 ++s;

1061 } else {

1063 }

1064 continue;

1065 case 'z':

1066 case 'Z':

1067 if (isFPConstant)

1068 break;

1069 if (HasSize)

1070 break;

1071 HasSize = true;

1073 continue;

1074 case 'i':

1075 case 'I':

1076 if (LangOpts.MicrosoftExt && !isFPConstant) {

1077

1078

1079 uint8_t Bits = 0;

1080 size_t ToSkip = 0;

1081 switch (s[1]) {

1082 case '8':

1083 Bits = 8;

1084 ToSkip = 2;

1085 break;

1086 case '1':

1087 if (s[2] == '6') {

1088 Bits = 16;

1089 ToSkip = 3;

1090 }

1091 break;

1092 case '3':

1093 if (s[2] == '2') {

1094 Bits = 32;

1095 ToSkip = 3;

1096 }

1097 break;

1098 case '6':

1099 if (s[2] == '4') {

1100 Bits = 64;

1101 ToSkip = 3;

1102 }

1103 break;

1104 default:

1105 break;

1106 }

1107 if (Bits) {

1108 if (HasSize)

1109 break;

1110 HasSize = true;

1112 s += ToSkip;

1113 assert(s <= ThisTokEnd && "didn't maximally munch?");

1114 break;

1115 }

1116 }

1117 [[fallthrough]];

1118 case 'j':

1119 case 'J':

1120 if (isImaginary) break;

1122 continue;

1123 case '_':

1124 if (isFPConstant)

1125 break;

1126 if (HasSize)

1127 break;

1128

1129

1130 assert(!DoubleUnderscore && "unhandled double underscore case");

1131 if (LangOpts.CPlusPlus && s + 2 < ThisTokEnd &&

1132 s[1] == '_') {

1133

1134 DoubleUnderscore = true;

1135 s += 2;

1136 if (s + 1 < ThisTokEnd &&

1137 (*s == 'u' || *s == 'U')) {

1139 ++s;

1140 }

1141 if (s + 1 < ThisTokEnd &&

1142 ((*s == 'w' && *(++s) == 'b') || (*s == 'W' && *(++s) == 'B'))) {

1144 HasSize = true;

1145 continue;

1146 }

1147 }

1148 break;

1149 case 'w':

1150 case 'W':

1151 if (isFPConstant)

1152 break;

1153 if (HasSize)

1154 break;

1155

1156

1157

1158

1159

1160 if ((!LangOpts.CPlusPlus || DoubleUnderscore) && s + 1 < ThisTokEnd &&

1161 ((s[0] == 'w' && s[1] == 'b') || (s[0] == 'W' && s[1] == 'B'))) {

1163 HasSize = true;

1164 ++s;

1165 continue;

1166 }

1167 }

1168

1169 break;

1170 }

1171

1172

1174

1175 expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));

1178

1179

1190 saw_fixed_point_suffix = false;

1193 }

1194

1195 saw_ud_suffix = true;

1196 return;

1197 }

1198

1199 if (s != ThisTokEnd) {

1200

1202 TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),

1203 diag::err_invalid_suffix_constant)

1204 << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)

1205 << (isFixedPointConstant ? 2 : isFPConstant);

1207 }

1208 }

1209

1210 if (hadError && saw_fixed_point_suffix) {

1212 }

1213}

1214

1215

1216

1217

1218void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){

1219 assert((radix == 8 || radix == 10) && "Unexpected radix");

1220

1221

1222

1227 diag::err_invalid_digit)

1228 << StringRef(s, 1) << (radix == 8 ? 1 : 0);

1230 return;

1231 }

1232

1233 if (*s == '.') {

1234 checkSeparator(TokLoc, s, CSK_AfterDigits);

1235 s++;

1236 radix = 10;

1237 saw_period = true;

1238 checkSeparator(TokLoc, s, CSK_BeforeDigits);

1239 s = SkipDigits(s);

1240 }

1241 if (*s == 'e' || *s == 'E') {

1242 checkSeparator(TokLoc, s, CSK_AfterDigits);

1243 const char *Exponent = s;

1244 s++;

1245 radix = 10;

1246 saw_exponent = true;

1247 if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++;

1248 const char *first_non_digit = SkipDigits(s);

1249 if (containsDigits(s, first_non_digit)) {

1250 checkSeparator(TokLoc, s, CSK_BeforeDigits);

1251 s = first_non_digit;

1252 } else {

1255 TokLoc, Exponent - ThisTokBegin, SM, LangOpts),

1256 diag::err_exponent_has_no_digits);

1258 }

1259 return;

1260 }

1261 }

1262}

1263

1264

1265

1266

1268 StringRef Suffix) {

1269 if (!LangOpts.CPlusPlus11 || Suffix.empty())

1270 return false;

1271

1272

1273

1274

1275 if (Suffix.starts_with("_") && !Suffix.starts_with("__"))

1276 return true;

1277

1278

1279 if (!LangOpts.CPlusPlus14)

1280 return false;

1281

1282

1283

1284

1285 return llvm::StringSwitch(Suffix)

1286 .Cases("h", "min", "s", true)

1287 .Cases("ms", "us", "ns", true)

1288 .Cases("il", "i", "if", true)

1289 .Cases("d", "y", LangOpts.CPlusPlus20)

1290 .Default(false);

1291}

1292

1293void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,

1294 const char *Pos,

1295 CheckSeparatorKind IsAfterDigits) {

1296 if (IsAfterDigits == CSK_AfterDigits) {

1297 if (Pos == ThisTokBegin)

1298 return;

1299 --Pos;

1300 } else if (Pos == ThisTokEnd)

1301 return;

1302

1303 if (isDigitSeparator(*Pos)) {

1305 LangOpts),

1306 diag::err_digit_separator_not_between_digits)

1307 << IsAfterDigits;

1309 }

1310}

1311

1312

1313

1314

1315

1316

1317void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {

1318 assert(s[0] == '0' && "Invalid method call");

1319 s++;

1320

1321 int c1 = s[0];

1322

1323

1324 if ((c1 == 'x' || c1 == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {

1325 s++;

1326 assert(s < ThisTokEnd && "didn't maximally munch?");

1327 radix = 16;

1328 DigitsBegin = s;

1329 s = SkipHexDigits(s);

1330 bool HasSignificandDigits = containsDigits(DigitsBegin, s);

1331 if (s == ThisTokEnd) {

1332

1333 } else if (*s == '.') {

1334 s++;

1335 saw_period = true;

1336 const char *floatDigitsBegin = s;

1337 s = SkipHexDigits(s);

1338 if (containsDigits(floatDigitsBegin, s))

1339 HasSignificandDigits = true;

1340 if (HasSignificandDigits)

1341 checkSeparator(TokLoc, floatDigitsBegin, CSK_BeforeDigits);

1342 }

1343

1344 if (!HasSignificandDigits) {

1346 LangOpts),

1347 diag::err_hex_constant_requires)

1348 << LangOpts.CPlusPlus << 1;

1350 return;

1351 }

1352

1353

1354

1355 if (*s == 'p' || *s == 'P') {

1356 checkSeparator(TokLoc, s, CSK_AfterDigits);

1357 const char *Exponent = s;

1358 s++;

1359 saw_exponent = true;

1360 if (s != ThisTokEnd && (*s == '+' || *s == '-')) s++;

1361 const char *first_non_digit = SkipDigits(s);

1362 if (!containsDigits(s, first_non_digit)) {

1365 TokLoc, Exponent - ThisTokBegin, SM, LangOpts),

1366 diag::err_exponent_has_no_digits);

1368 }

1369 return;

1370 }

1371 checkSeparator(TokLoc, s, CSK_BeforeDigits);

1372 s = first_non_digit;

1373

1374 if (!LangOpts.HexFloats)

1375 Diags.Report(TokLoc, LangOpts.CPlusPlus

1376 ? diag::ext_hex_literal_invalid

1377 : diag::ext_hex_constant_invalid);

1378 else if (LangOpts.CPlusPlus17)

1379 Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);

1380 } else if (saw_period) {

1382 LangOpts),

1383 diag::err_hex_constant_requires)

1384 << LangOpts.CPlusPlus << 0;

1386 }

1387 return;

1388 }

1389

1390

1391 if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {

1392

1393 unsigned DiagId;

1394 if (LangOpts.CPlusPlus14)

1395 DiagId = diag::warn_cxx11_compat_binary_literal;

1396 else if (LangOpts.C23)

1397 DiagId = diag::warn_c23_compat_binary_literal;

1398 else if (LangOpts.CPlusPlus)

1399 DiagId = diag::ext_binary_literal_cxx14;

1400 else

1401 DiagId = diag::ext_binary_literal;

1402 Diags.Report(TokLoc, DiagId);

1403 ++s;

1404 assert(s < ThisTokEnd && "didn't maximally munch?");

1405 radix = 2;

1406 DigitsBegin = s;

1407 s = SkipBinaryDigits(s);

1408 if (s == ThisTokEnd) {

1409

1413 LangOpts),

1414 diag::err_invalid_digit)

1415 << StringRef(s, 1) << 2;

1417 }

1418

1419 return;

1420 }

1421

1422

1423

1424

1425 radix = 8;

1426 const char *PossibleNewDigitStart = s;

1427 s = SkipOctalDigits(s);

1428

1429

1430

1431 if (s != PossibleNewDigitStart)

1432 DigitsBegin = PossibleNewDigitStart;

1433

1434 if (s == ThisTokEnd)

1435 return;

1436

1437

1438

1440 const char *EndDecimal = SkipDigits(s);

1441 if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {

1442 s = EndDecimal;

1443 radix = 10;

1444 }

1445 }

1446

1447 ParseDecimalOrOctalCommon(TokLoc);

1448}

1449

1451 switch (Radix) {

1452 case 2:

1453 return NumDigits <= 64;

1454 case 8:

1455 return NumDigits <= 64 / 3;

1456 case 10:

1457 return NumDigits <= 19;

1458 case 16:

1459 return NumDigits <= 64 / 4;

1460 default:

1461 llvm_unreachable("impossible Radix");

1462 }

1463}

1464

1465

1466

1467

1469

1470

1471

1472

1473

1474

1475 const unsigned NumDigits = SuffixBegin - DigitsBegin;

1477 uint64_t N = 0;

1478 for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)

1479 if (!isDigitSeparator(*Ptr))

1480 N = N * radix + llvm::hexDigitValue(*Ptr);

1481

1482

1483

1484 Val = N;

1485 return Val.getZExtValue() != N;

1486 }

1487

1488 Val = 0;

1489 const char *Ptr = DigitsBegin;

1490

1491 llvm::APInt RadixVal(Val.getBitWidth(), radix);

1492 llvm::APInt CharVal(Val.getBitWidth(), 0);

1493 llvm::APInt OldVal = Val;

1494

1495 bool OverflowOccurred = false;

1496 while (Ptr < SuffixBegin) {

1497 if (isDigitSeparator(*Ptr)) {

1498 ++Ptr;

1499 continue;

1500 }

1501

1502 unsigned C = llvm::hexDigitValue(*Ptr++);

1503

1504

1505 assert(C < radix && "NumericLiteralParser ctor should have rejected this");

1506

1507 CharVal = C;

1508

1509

1510

1511 OldVal = Val;

1512

1513

1514 Val *= RadixVal;

1515 OverflowOccurred |= Val.udiv(RadixVal) != OldVal;

1516

1517

1518

1519 Val += CharVal;

1520 OverflowOccurred |= Val.ult(CharVal);

1521 }

1522 return OverflowOccurred;

1523}

1524

1525llvm::APFloat::opStatus

1527 llvm::RoundingMode RM) {

1528 using llvm::APFloat;

1529

1530 unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);

1531

1533 StringRef Str(ThisTokBegin, n);

1534 if (Str.contains('\'')) {

1535 Buffer.reserve(n);

1536 std::remove_copy_if(Str.begin(), Str.end(), std::back_inserter(Buffer),

1537 &isDigitSeparator);

1538 Str = Buffer;

1539 }

1540

1541 auto StatusOrErr = Result.convertFromString(Str, RM);

1542 assert(StatusOrErr && "Invalid floating point representation");

1543 return !errorToBool(StatusOrErr.takeError()) ? *StatusOrErr

1544 : APFloat::opInvalidOp;

1545}

1546

1548 if (isHex)

1549 return c == 'p' || c == 'P';

1550 return c == 'e' || c == 'E';

1551}

1552

1554 assert(radix == 16 || radix == 10);

1555

1556

1557 unsigned NumDigits = SuffixBegin - DigitsBegin;

1558 if (saw_period) --NumDigits;

1559

1560

1561 bool ExpOverflowOccurred = false;

1562 bool NegativeExponent = false;

1563 const char *ExponentBegin;

1564 uint64_t Exponent = 0;

1565 int64_t BaseShift = 0;

1566 if (saw_exponent) {

1567 const char *Ptr = DigitsBegin;

1568

1570 ++Ptr;

1571 ExponentBegin = Ptr;

1572 ++Ptr;

1573 NegativeExponent = *Ptr == '-';

1574 if (NegativeExponent) ++Ptr;

1575

1576 unsigned NumExpDigits = SuffixBegin - Ptr;

1578 llvm::StringRef ExpStr(Ptr, NumExpDigits);

1579 llvm::APInt ExpInt(64, ExpStr, 10);

1580 Exponent = ExpInt.getZExtValue();

1581 } else {

1582 ExpOverflowOccurred = true;

1583 }

1584

1585 if (NegativeExponent) BaseShift -= Exponent;

1586 else BaseShift += Exponent;

1587 }

1588

1589

1590

1591

1592

1593

1594

1595

1596

1597

1598

1599

1600

1601

1602

1603

1604

1605

1606 uint64_t NumBitsNeeded;

1607 if (radix == 10)

1608 NumBitsNeeded = 4 * (NumDigits + Exponent) + Scale;

1609 else

1610 NumBitsNeeded = 4 * NumDigits + Exponent + Scale;

1611

1612 if (NumBitsNeeded > std::numeric_limits::max())

1613 ExpOverflowOccurred = true;

1614 llvm::APInt Val(static_cast<unsigned>(NumBitsNeeded), 0, false);

1615

1616 bool FoundDecimal = false;

1617

1618 int64_t FractBaseShift = 0;

1619 const char *End = saw_exponent ? ExponentBegin : SuffixBegin;

1620 for (const char *Ptr = DigitsBegin; Ptr < End; ++Ptr) {

1621 if (*Ptr == '.') {

1622 FoundDecimal = true;

1623 continue;

1624 }

1625

1626

1627 unsigned C = llvm::hexDigitValue(*Ptr);

1628 assert(C < radix && "NumericLiteralParser ctor should have rejected this");

1629

1630 Val *= radix;

1631 Val += C;

1632

1633 if (FoundDecimal)

1634

1635

1636 --FractBaseShift;

1637 }

1638

1639

1640 if (radix == 16) FractBaseShift *= 4;

1641 BaseShift += FractBaseShift;

1642

1643 Val <<= Scale;

1644

1645 uint64_t Base = (radix == 16) ? 2 : 10;

1646 if (BaseShift > 0) {

1647 for (int64_t i = 0; i < BaseShift; ++i) {

1648 Val *= Base;

1649 }

1650 } else if (BaseShift < 0) {

1651 for (int64_t i = BaseShift; i < 0 && !Val.isZero(); ++i)

1652 Val = Val.udiv(Base);

1653 }

1654

1655 bool IntOverflowOccurred = false;

1656 auto MaxVal = llvm::APInt::getMaxValue(StoreVal.getBitWidth());

1657 if (Val.getBitWidth() > StoreVal.getBitWidth()) {

1658 IntOverflowOccurred |= Val.ugt(MaxVal.zext(Val.getBitWidth()));

1659 StoreVal = Val.trunc(StoreVal.getBitWidth());

1660 } else if (Val.getBitWidth() < StoreVal.getBitWidth()) {

1661 IntOverflowOccurred |= Val.zext(MaxVal.getBitWidth()).ugt(MaxVal);

1662 StoreVal = Val.zext(StoreVal.getBitWidth());

1663 } else {

1664 StoreVal = Val;

1665 }

1666

1667 return IntOverflowOccurred || ExpOverflowOccurred;

1668}

1669

1670

1671

1672

1673

1674

1675

1676

1677

1678

1679

1680

1681

1682

1683

1684

1685

1686

1687

1688

1689

1690

1691

1692

1693

1694

1695

1696

1697

1698

1699

1700

1701

1702

1703

1704

1705

1706

1707

1708

1712

1713 HadError = false;

1714

1715 Kind = kind;

1716

1717 const char *TokBegin = begin;

1718

1719

1720 if (Kind != tok::char_constant)

1721 ++begin;

1722 if (Kind == tok::utf8_char_constant)

1723 ++begin;

1724

1725

1726 if (begin[0] != '\'') {

1727 PP.Diag(Loc, diag::err_lexing_char);

1728 HadError = true;

1729 return;

1730 }

1731

1732 ++begin;

1733

1734

1735 if (end[-1] != '\'') {

1736 const char *UDSuffixEnd = end;

1737 do {

1738 --end;

1739 } while (end[-1] != '\'');

1740

1741 expandUCNs(UDSuffixBuf, StringRef(end, UDSuffixEnd - end));

1742 UDSuffixOffset = end - TokBegin;

1743 }

1744

1745

1746 assert(end != begin && "Invalid token lexed");

1747 --end;

1748

1749

1750

1751

1753 "Assumes char is 8 bits");

1756 "Assumes sizeof(int) on target is <= 64 and a multiple of char");

1758 "Assumes sizeof(wchar) on target is <= 64");

1759

1761 codepoint_buffer.resize(end - begin);

1762 uint32_t *buffer_begin = &codepoint_buffer.front();

1763 uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();

1764

1765

1766

1767

1768 uint32_t largest_character_for_kind;

1769 if (tok::wide_char_constant == Kind) {

1770 largest_character_for_kind =

1772 } else if (tok::utf8_char_constant == Kind) {

1773 largest_character_for_kind = 0x7F;

1774 } else if (tok::utf16_char_constant == Kind) {

1775 largest_character_for_kind = 0xFFFF;

1776 } else if (tok::utf32_char_constant == Kind) {

1777 largest_character_for_kind = 0x10FFFF;

1778 } else {

1779 largest_character_for_kind = 0x7Fu;

1780 }

1781

1782 while (begin != end) {

1783

1784 if (begin[0] != '\\') {

1785 char const *start = begin;

1786 do {

1787 ++begin;

1788 } while (begin != end && *begin != '\\');

1789

1790 char const *tmp_in_start = start;

1791 uint32_t *tmp_out_start = buffer_begin;

1792 llvm::ConversionResult res =

1793 llvm::ConvertUTF8toUTF32(reinterpret_cast<llvm::UTF8 const **>(&start),

1794 reinterpret_cast<llvm::UTF8 const *>(begin),

1795 &buffer_begin, buffer_end, llvm::strictConversion);

1796 if (res != llvm::conversionOK) {

1797

1798

1799

1800 bool NoErrorOnBadEncoding = isOrdinary();

1801 unsigned Msg = diag::err_bad_character_encoding;

1802 if (NoErrorOnBadEncoding)

1803 Msg = diag::warn_bad_character_encoding;

1805 if (NoErrorOnBadEncoding) {

1806 start = tmp_in_start;

1807 buffer_begin = tmp_out_start;

1808 for (; start != begin; ++start, ++buffer_begin)

1809 *buffer_begin = static_cast<uint8_t>(*start);

1810 } else {

1811 HadError = true;

1812 }

1813 } else {

1814 for (; tmp_out_start < buffer_begin; ++tmp_out_start) {

1815 if (*tmp_out_start > largest_character_for_kind) {

1816 HadError = true;

1817 PP.Diag(Loc, diag::err_character_too_large);

1818 }

1819 }

1820 }

1821

1822 continue;

1823 }

1824

1825 if (begin[1] == 'u' || begin[1] == 'U' || begin[1] == 'N') {

1826 unsigned short UcnLen = 0;

1827 if (ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,

1830 HadError = true;

1831 } else if (*buffer_begin > largest_character_for_kind) {

1832 HadError = true;

1833 PP.Diag(Loc, diag::err_character_too_large);

1834 }

1835

1836 ++buffer_begin;

1837 continue;

1838 }

1840 uint64_t result =

1845 *buffer_begin++ = result;

1846 }

1847

1848 unsigned NumCharsSoFar = buffer_begin - &codepoint_buffer.front();

1849

1850 if (NumCharsSoFar > 1) {

1851 if (isOrdinary() && NumCharsSoFar == 4)

1852 PP.Diag(Loc, diag::warn_four_char_character_literal);

1854 PP.Diag(Loc, diag::warn_multichar_character_literal);

1855 else {

1856 PP.Diag(Loc, diag::err_multichar_character_literal) << (isWide() ? 0 : 1);

1857 HadError = true;

1858 }

1859 IsMultiChar = true;

1860 } else {

1861 IsMultiChar = false;

1862 }

1863

1865

1866

1867

1868 bool multi_char_too_long = false;

1870 LitVal = 0;

1871 for (size_t i = 0; i < NumCharsSoFar; ++i) {

1872

1873 multi_char_too_long |= (LitVal.countl_zero() < 8);

1874 LitVal <<= 8;

1875 LitVal = LitVal + (codepoint_buffer[i] & 0xFF);

1876 }

1877 } else if (NumCharsSoFar > 0) {

1878

1879 LitVal = buffer_begin[-1];

1880 }

1881

1882 if (!HadError && multi_char_too_long) {

1883 PP.Diag(Loc, diag::warn_char_constant_too_large);

1884 }

1885

1886

1887 Value = LitVal.getZExtValue();

1888

1889

1890

1891

1892

1893 if (isOrdinary() && NumCharsSoFar == 1 && (Value & 128) &&

1896}

1897

1898

1899

1900

1901

1902

1903

1904

1905

1906

1907

1908

1909

1910

1911

1912

1913

1914

1915

1916

1917

1918

1919

1920

1921

1922

1923

1924

1925

1926

1927

1928

1929

1930

1931

1932

1933

1934

1935

1936

1937

1938

1939

1940

1941

1942

1943

1944

1945

1946

1947

1948

1949

1950

1951

1955 : SM(PP.getSourceManager()), Features(PP.getLangOpts()),

1956 Target(PP.getTargetInfo()), Diags(&PP.getDiagnostics()),

1957 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),

1958 ResultPtr(ResultBuf.data()), EvalMethod(EvalMethod), hadError(false),

1960 init(StringToks);

1961}

1962

1963void StringLiteralParser::init(ArrayRef StringToks){

1964

1965

1966 if (StringToks.empty() || StringToks[0].getLength() < 2)

1968

1969

1970

1971

1972

1973 assert(!StringToks.empty() && "expected at least one token");

1974 MaxTokenLength = StringToks[0].getLength();

1975 assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");

1976 SizeBound = StringToks[0].getLength() - 2;

1978

1979

1980 Kind = tok::string_literal;

1981

1982

1983 for (const Token &Tok : StringToks) {

1984 if (Tok.getLength() < 2)

1985 return DiagnoseLexingError(Tok.getLocation());

1986

1987

1988

1989 assert(Tok.getLength() >= 2 && "literal token is invalid!");

1990 SizeBound += Tok.getLength() - 2;

1991

1992

1993 if (Tok.getLength() > MaxTokenLength)

1994 MaxTokenLength = Tok.getLength();

1995

1996

1997

1998 if (isUnevaluated() && Tok.getKind() != tok::string_literal) {

1999 if (Diags) {

2002 Features);

2005 StringRef Prefix(SM.getCharacterData(Tok.getLocation()),

2007 Diags->Report(Tok.getLocation(),

2008 Features.CPlusPlus26

2009 ? diag::err_unevaluated_string_prefix

2010 : diag::warn_unevaluated_string_prefix)

2012 }

2013 if (Features.CPlusPlus26)

2015 } else if (Tok.isNot(Kind) && Tok.isNot(tok::string_literal)) {

2017 Kind = Tok.getKind();

2018 } else {

2019 if (Diags)

2020 Diags->Report(Tok.getLocation(), diag::err_unsupported_string_concat);

2022 }

2023 }

2024 }

2025

2026

2027 ++SizeBound;

2028

2029

2030

2031

2033 assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");

2034 CharByteWidth /= 8;

2035

2036

2037

2038 SizeBound *= CharByteWidth;

2039

2040

2041 ResultBuf.resize(SizeBound);

2042

2043

2045 TokenBuf.resize(MaxTokenLength);

2046

2047

2048

2049 ResultPtr = &ResultBuf[0];

2050

2052

2054

2055 for (unsigned i = 0, e = StringToks.size(); i != e; ++i) {

2056 const char *ThisTokBuf = &TokenBuf[0];

2057

2058

2059

2060 bool StringInvalid = false;

2061 unsigned ThisTokLen =

2063 &StringInvalid);

2064 if (StringInvalid)

2065 return DiagnoseLexingError(StringToks[i].getLocation());

2066

2067 const char *ThisTokBegin = ThisTokBuf;

2068 const char *ThisTokEnd = ThisTokBuf+ThisTokLen;

2069

2070

2071 if (ThisTokEnd[-1] != '"') {

2072 const char *UDSuffixEnd = ThisTokEnd;

2073 do {

2074 --ThisTokEnd;

2075 } while (ThisTokEnd[-1] != '"');

2076

2077 StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);

2078

2079 if (UDSuffixBuf.empty()) {

2080 if (StringToks[i].hasUCN())

2082 else

2083 UDSuffixBuf.assign(UDSuffix);

2084 UDSuffixToken = i;

2085 UDSuffixOffset = ThisTokEnd - ThisTokBuf;

2086 UDSuffixTokLoc = StringToks[i].getLocation();

2087 } else {

2089 if (StringToks[i].hasUCN()) {

2090 expandUCNs(ExpandedUDSuffix, UDSuffix);

2091 UDSuffix = ExpandedUDSuffix;

2092 }

2093

2094

2095

2096

2097

2098 bool UnevaluatedStringHasUDL = isUnevaluated() && !UDSuffix.empty();

2099 if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) {

2100 if (Diags) {

2101 SourceLocation TokLoc = StringToks[i].getLocation();

2102 if (UnevaluatedStringHasUDL) {

2103 Diags->Report(TokLoc, diag::err_unevaluated_string_udl)

2105 } else {

2106 Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)

2107 << UDSuffixBuf << UDSuffix

2108 << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc);

2109 }

2110 }

2112 }

2113 }

2114 }

2115

2116

2117 --ThisTokEnd;

2118

2119

2120

2121

2122 if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {

2123 ++ThisTokBuf;

2124

2125 if (ThisTokBuf[0] == '8')

2126 ++ThisTokBuf;

2127 }

2128

2129

2130 if (ThisTokBuf[0] == 'R') {

2131 if (ThisTokBuf[1] != '"') {

2132

2133

2134 return DiagnoseLexingError(StringToks[i].getLocation());

2135 }

2136 ThisTokBuf += 2;

2137

2138

2139

2140 constexpr unsigned MaxRawStrDelimLen = 16;

2141

2142 const char *Prefix = ThisTokBuf;

2143 while (static_cast<unsigned>(ThisTokBuf - Prefix) < MaxRawStrDelimLen &&

2144 ThisTokBuf[0] != '(')

2145 ++ThisTokBuf;

2146 if (ThisTokBuf[0] != '(')

2147 return DiagnoseLexingError(StringToks[i].getLocation());

2148 ++ThisTokBuf;

2149

2150

2151 ThisTokEnd -= ThisTokBuf - Prefix;

2152 if (ThisTokEnd < ThisTokBuf)

2153 return DiagnoseLexingError(StringToks[i].getLocation());

2154

2155

2156

2157 StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);

2158 while (!RemainingTokenSpan.empty()) {

2159

2160 size_t CRLFPos = RemainingTokenSpan.find("\r\n");

2161 StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);

2162 StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);

2163

2164

2165 if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))

2167

2168

2169

2170 RemainingTokenSpan = AfterCRLF.substr(1);

2171 }

2172 } else {

2173 if (ThisTokBuf[0] != '"') {

2174

2175

2176 return DiagnoseLexingError(StringToks[i].getLocation());

2177 }

2178 ++ThisTokBuf;

2179

2180

2181 if (isUnevaluated() && Features.PascalStrings &&

2182 ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' &&

2183 ThisTokBuf[1] == 'p') {

2184

2185

2186

2187 if (i == 0) {

2188 ++ThisTokBuf;

2191 ThisTokBuf += 2;

2192 }

2193

2194 while (ThisTokBuf != ThisTokEnd) {

2195

2196 if (ThisTokBuf[0] != '\\') {

2197 const char *InStart = ThisTokBuf;

2198 do {

2199 ++ThisTokBuf;

2200 } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');

2201

2202

2203 if (CopyStringFragment(StringToks[i], ThisTokBegin,

2204 StringRef(InStart, ThisTokBuf - InStart)))

2206 continue;

2207 }

2208

2209 if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U' ||

2210 ThisTokBuf[1] == 'N') {

2214 CharByteWidth, Diags, Features);

2215 continue;

2216 }

2217

2218 unsigned ResultChar =

2221 CharByteWidth * 8, Diags, Features, EvalMethod);

2222

2223 if (CharByteWidth == 4) {

2224

2225

2226 llvm::UTF32 *ResultWidePtr = reinterpret_castllvm::UTF32\*\(ResultPtr);

2227 *ResultWidePtr = ResultChar;

2228 ResultPtr += 4;

2229 } else if (CharByteWidth == 2) {

2230

2231

2232 llvm::UTF16 *ResultWidePtr = reinterpret_castllvm::UTF16\*\(ResultPtr);

2233 *ResultWidePtr = ResultChar & 0xFFFF;

2234 ResultPtr += 2;

2235 } else {

2236 assert(CharByteWidth == 1 && "Unexpected char width");

2237 *ResultPtr++ = ResultChar & 0xFF;

2238 }

2239 }

2240 }

2241 }

2242

2244 "Pascal string in unevaluated context");

2246 if (CharByteWidth == 4) {

2247

2248

2249 llvm::UTF32 *ResultWidePtr = reinterpret_castllvm::UTF32\*\(ResultBuf.data());

2251 } else if (CharByteWidth == 2) {

2252

2253

2254 llvm::UTF16 *ResultWidePtr = reinterpret_castllvm::UTF16\*\(ResultBuf.data());

2256 } else {

2257 assert(CharByteWidth == 1 && "Unexpected char width");

2259 }

2260

2261

2263 if (Diags)

2264 Diags->Report(StringToks.front().getLocation(),

2265 diag::err_pascal_string_too_long)

2266 << SourceRange(StringToks.front().getLocation(),

2267 StringToks.back().getLocation());

2269 return;

2270 }

2271 } else if (Diags) {

2272

2273 unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;

2274

2276 Diags->Report(StringToks.front().getLocation(),

2277 diag::ext_string_too_long)

2279 << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)

2280 << SourceRange(StringToks.front().getLocation(),

2281 StringToks.back().getLocation());

2282 }

2283}

2284

2285static const char *resyncUTF8(const char *Err, const char *End) {

2286 if (Err == End)

2287 return End;

2288 End = Err + std::min(llvm::getNumBytesForUTF8(*Err), End-Err);

2289 while (++Err != End && (*Err & 0xC0) == 0x80)

2290 ;

2291 return Err;

2292}

2293

2294

2295

2296

2297bool StringLiteralParser::CopyStringFragment(const Token &Tok,

2298 const char *TokBegin,

2299 StringRef Fragment) {

2300 const llvm::UTF8 *ErrorPtrTmp;

2301 if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))

2302 return false;

2303

2304

2305

2306

2307 bool NoErrorOnBadEncoding = isOrdinary();

2308 if (NoErrorOnBadEncoding) {

2309 memcpy(ResultPtr, Fragment.data(), Fragment.size());

2310 ResultPtr += Fragment.size();

2311 }

2312

2313 if (Diags) {

2314 const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);

2315

2318 Diag(Diags, Features, SourceLoc, TokBegin,

2319 ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),

2320 NoErrorOnBadEncoding ? diag::warn_bad_string_encoding

2321 : diag::err_bad_string_encoding);

2322

2323 const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());

2324 StringRef NextFragment(NextStart, Fragment.end()-NextStart);

2325

2326

2328 Dummy.reserve(Fragment.size() * CharByteWidth);

2329 char *Ptr = Dummy.data();

2330

2331 while (!ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {

2332 const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);

2333 NextStart = resyncUTF8(ErrorPtr, Fragment.end());

2335 ErrorPtr, NextStart);

2336 NextFragment = StringRef(NextStart, Fragment.end()-NextStart);

2337 }

2338 }

2339 return !NoErrorOnBadEncoding;

2340}

2341

2342void StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {

2344 if (Diags)

2345 Diags->Report(Loc, diag::err_lexing_string);

2346}

2347

2348

2349

2350

2352 unsigned ByteNo) const {

2353

2355 SpellingBuffer.resize(Tok.getLength());

2356

2357 bool StringInvalid = false;

2358 const char *SpellingPtr = &SpellingBuffer[0];

2360 &StringInvalid);

2361 if (StringInvalid)

2362 return 0;

2363

2364 const char *SpellingStart = SpellingPtr;

2365 const char *SpellingEnd = SpellingPtr+TokLen;

2366

2367

2368 if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')

2369 SpellingPtr += 2;

2370

2371 assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&

2372 SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");

2373

2374

2375 if (SpellingPtr[0] == 'R') {

2376 assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");

2377

2378 SpellingPtr += 2;

2379 while (*SpellingPtr != '(') {

2380 ++SpellingPtr;

2381 assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");

2382 }

2383

2384 ++SpellingPtr;

2385 return SpellingPtr - SpellingStart + ByteNo;

2386 }

2387

2388

2389 assert(SpellingPtr[0] == '"' && "Should be a string literal!");

2390 ++SpellingPtr;

2391

2392

2393 while (ByteNo) {

2394 assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");

2395

2396

2397 if (*SpellingPtr != '\\') {

2398 ++SpellingPtr;

2399 --ByteNo;

2400 continue;

2401 }

2402

2403

2404 bool HadError = false;

2405 if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U' ||

2406 SpellingPtr[1] == 'N') {

2407 const char *EscapePtr = SpellingPtr;

2408 unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,

2409 1, Features, HadError);

2410 if (Len > ByteNo) {

2411

2412 SpellingPtr = EscapePtr;

2413 break;

2414 }

2415 ByteNo -= Len;

2416 } else {

2417 ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,

2420 --ByteNo;

2421 }

2422 assert(!HadError && "This method isn't valid on erroneous strings");

2423 }

2424

2425 return SpellingPtr-SpellingStart;

2426}

2427

2428

2429

2430

2432 StringRef Suffix) {

2434 Suffix == "sv";

2435}

enum clang::sema::@1725::IndirectLocalPathEntry::EntryKind Kind

Defines the clang::LangOptions interface.

static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, char *&ResultBuf, bool &HadError, FullSourceLoc Loc, unsigned CharByteWidth, DiagnosticsEngine *Diags, const LangOptions &Features)

EncodeUCNEscape - Read the Universal Character Name, check constraints and convert the UTF32 to UTF8 ...

static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)

ProcessUCNEscape - Read the Universal Character Name, check constraints and return the UTF32.

static CharSourceRange MakeCharSourceRange(const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd)

static const char * resyncUTF8(const char *Err, const char *End)

static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, unsigned CharByteWidth, const LangOptions &Features, bool &HadError)

MeasureUCNEscape - Determine the number of bytes within the resulting string which this UCN will occu...

static void appendCodePoint(unsigned Codepoint, llvm::SmallVectorImpl< char > &Str)

static unsigned getEncodingPrefixLen(tok::TokenKind kind)

static void DiagnoseInvalidUnicodeCharacterName(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, llvm::StringRef Name)

static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)

Produce a diagnostic highlighting some portion of a literal.

static bool IsEscapeValidInUnevaluatedStringLiteral(char Escape)

static bool ProcessNumericUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, bool &Delimited, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features, bool in_char_string_literal=false)

static bool ProcessNamedUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, uint32_t &UcnVal, unsigned short &UcnLen, FullSourceLoc Loc, DiagnosticsEngine *Diags, const LangOptions &Features)

static bool IsExponentPart(char c, bool isHex)

static bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits)

static unsigned ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf, const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc, unsigned CharWidth, DiagnosticsEngine *Diags, const LangOptions &Features, StringLiteralEvalMethod EvalMethod)

ProcessCharEscape - Parse a standard C escape sequence, which can occur in either a character or a st...

static unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target)

llvm::MachO::Target Target

Defines the clang::Preprocessor interface.

Defines the clang::SourceLocation class and associated facilities.

__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)

__device__ __2f16 float __ockl_bool s

__device__ __2f16 float c

CharLiteralParser(const char *begin, const char *end, SourceLocation Loc, Preprocessor &PP, tok::TokenKind kind)

Represents a character-granular source range.

static CharSourceRange getCharRange(SourceRange R)

A little helper class used to produce diagnostics.

Concrete class used by the front-end to report problems and issues.

DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)

Issue the message to the client.

static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)

Create a code modification hint that replaces the given source range with the given code string.

static FixItHint CreateRemoval(CharSourceRange RemoveRange)

Create a code modification hint that removes the given source range.

A SourceLocation and its associated SourceManager.

const SourceManager & getManager() const

Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...

static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)

AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token,...

static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)

getSpelling - This method is used to get the spelling of a token into a preallocated buffer,...

NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, const SourceManager &SM, const LangOptions &LangOpts, const TargetInfo &Target, DiagnosticsEngine &Diags)

integer-constant: [C99 6.4.4.1] decimal-constant integer-suffix octal-constant integer-suffix hexadec...

bool isFixedPointLiteral() const

bool isFloatingLiteral() const

bool isIntegerLiteral() const

llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result, llvm::RoundingMode RM)

Convert this numeric literal to a floating value, using the specified APFloat fltSemantics (specifyin...

static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)

Determine whether a suffix is a valid ud-suffix.

bool GetIntegerValue(llvm::APInt &Val)

GetIntegerValue - Convert this numeric literal value to an APInt that matches Val's input width.

bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale)

GetFixedPointValue - Convert this numeric literal value into a scaled integer that represents this va...

Engages in a tight little dance with the lexer to efficiently preprocess tokens.

SourceManager & getSourceManager() const

const TargetInfo & getTargetInfo() const

const LangOptions & getLangOpts() const

DiagnosticsEngine & getDiagnostics() const

DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const

Forwarding function for diagnostics.

Encodes a location in the source.

This class handles loading and caching of source files into memory.

A trivial tuple used to represent a source range.

unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const

getOffsetOfStringByte - This function returns the offset of the specified byte of the string data rep...

bool isUnevaluated() const

StringLiteralParser(ArrayRef< Token > StringToks, Preprocessor &PP, StringLiteralEvalMethod StringMethod=StringLiteralEvalMethod::Evaluated)

unsigned GetStringLength() const

static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)

Determine whether a suffix is a valid ud-suffix.

unsigned GetNumStringChars() const

Exposes information about the current target.

unsigned getIntWidth() const

getIntWidth/Align - Return the size of 'signed int' and 'unsigned int' for this target,...

unsigned getWCharWidth() const

getWCharWidth/Align - Return the size of 'wchar_t' for this target, in bits.

unsigned getCharWidth() const

Token - This structure provides full information about a lexed token.

SourceLocation getLocation() const

Return a source location identifier for the specified offset in the current file.

unsigned getLength() const

tok::TokenKind getKind() const

Defines the clang::TargetInfo interface.

bool isStringLiteral(TokenKind K)

Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token.

TokenKind

Provides a simple uniform namespace for tokens from all C languages.

The JSON file list parser is used to communicate input to InstallAPI.

LLVM_READONLY bool isVerticalWhitespace(unsigned char c)

Returns true if this character is vertical ASCII whitespace: '\n', '\r'.

LLVM_READONLY bool isPrintable(unsigned char c)

Return true if this character is an ASCII printable character; that is, a character that should take ...

void expandUCNs(SmallVectorImpl< char > &Buf, StringRef Input)

Copy characters from Input to Buf, expanding any UCNs.

bool tokenIsLikeStringLiteral(const Token &Tok, const LangOptions &LO)

Return true if the token is a string literal, or a function local predefined macro,...

@ Result

The result type of a method or function.

LLVM_READONLY bool isDigit(unsigned char c)

Return true if this character is an ASCII digit: [0-9].

bool isFunctionLocalStringLiteralMacro(tok::TokenKind K, const LangOptions &LO)

Return true if the token corresponds to a function local predefined macro, which expands to a string ...

LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)

Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.

LLVM_READONLY bool isHexDigit(unsigned char c)

Return true if this character is an ASCII hex digit: [0-9a-fA-F].

@ Incomplete

Template argument deduction did not deduce a value for every template parameter.