clang: lib/Format/FormatTokenLexer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

21#include "llvm/Support/Regex.h"

22

25

29 llvm::SpecificBumpPtrAllocator &Allocator,

34 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),

35 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),

36 FormattingDisabled(false), FormatOffRegex(Style.OneLineFormatOffRegex),

37 MacroBlockBeginRegex(Style.MacroBlockBegin),

38 MacroBlockEndRegex(Style.MacroBlockEnd) {

39 Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));

40 Lex->SetKeepWhitespaceMode(true);

41

42 for (const std::string &ForEachMacro : Style.ForEachMacros) {

43 auto Identifier = &IdentTable.get(ForEachMacro);

44 Macros.insert({Identifier, TT_ForEachMacro});

45 }

46 for (const std::string &IfMacro : Style.IfMacros) {

47 auto Identifier = &IdentTable.get(IfMacro);

48 Macros.insert({Identifier, TT_IfMacro});

49 }

50 for (const std::string &AttributeMacro : Style.AttributeMacros) {

51 auto Identifier = &IdentTable.get(AttributeMacro);

52 Macros.insert({Identifier, TT_AttributeMacro});

53 }

54 for (const std::string &StatementMacro : Style.StatementMacros) {

55 auto Identifier = &IdentTable.get(StatementMacro);

56 Macros.insert({Identifier, TT_StatementMacro});

57 }

58 for (const std::string &TypenameMacro : Style.TypenameMacros) {

59 auto Identifier = &IdentTable.get(TypenameMacro);

60 Macros.insert({Identifier, TT_TypenameMacro});

61 }

62 for (const std::string &NamespaceMacro : Style.NamespaceMacros) {

63 auto Identifier = &IdentTable.get(NamespaceMacro);

64 Macros.insert({Identifier, TT_NamespaceMacro});

65 }

66 for (const std::string &WhitespaceSensitiveMacro :

67 Style.WhitespaceSensitiveMacros) {

68 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);

69 Macros.insert({Identifier, TT_UntouchableMacroFunc});

70 }

71 for (const std::string &StatementAttributeLikeMacro :

72 Style.StatementAttributeLikeMacros) {

73 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);

74 Macros.insert({Identifier, TT_StatementAttributeLikeMacro});

75 }

76

77 for (const auto &Macro : Style.MacrosSkippedByRemoveParentheses)

79 for (const auto &TemplateName : Style.TemplateNames)

81 for (const auto &TypeName : Style.TypeNames)

82 TypeNames.insert(&IdentTable.get(TypeName));

83 for (const auto &VariableTemplate : Style.VariableTemplates)

85}

86

88 assert(Tokens.empty());

89 assert(FirstInLineIndex == 0);

90 enum { FO_None, FO_CurrentLine, FO_NextLine } FormatOff = FO_None;

91 do {

92 Tokens.push_back(getNextToken());

93 auto &Tok = *Tokens.back();

95 switch (FormatOff) {

96 case FO_NextLine:

98 FormatOff = FO_None;

99 } else {

100 Tok.Finalized = true;

101 FormatOff = FO_CurrentLine;

102 }

103 break;

104 case FO_CurrentLine:

106 Tok.Finalized = true;

107 break;

108 }

109 FormatOff = FO_None;

110 [[fallthrough]];

111 default:

112 if (!FormattingDisabled && FormatOffRegex.match(Tok.TokenText)) {

113 if (Tok.is(tok::comment) &&

115 Tok.Finalized = true;

116 FormatOff = FO_NextLine;

117 } else {

118 for (auto *Token : reverse(Tokens)) {

119 Token->Finalized = true;

120 if (Token->NewlinesBefore > 0)

121 break;

122 }

123 FormatOff = FO_CurrentLine;

124 }

125 }

126 }

127 if (Style.isJavaScript()) {

128 tryParseJSRegexLiteral();

129 handleTemplateStrings();

130 } else if (Style.isTextProto()) {

131 tryParsePythonComment();

132 }

133 tryMergePreviousTokens();

134 if (Style.isCSharp()) {

135

136

137 handleCSharpVerbatimAndInterpolatedStrings();

138 } else if (Style.isTableGen()) {

139 handleTableGenMultilineString();

140 handleTableGenNumericLikeIdentifier();

141 }

142 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)

143 FirstInLineIndex = Tokens.size() - 1;

144 } while (Tokens.back()->isNot(tok::eof));

145 if (Style.InsertNewlineAtEOF) {

146 auto &TokEOF = *Tokens.back();

147 if (TokEOF.NewlinesBefore == 0) {

148 TokEOF.NewlinesBefore = 1;

149 TokEOF.OriginalColumn = 0;

150 }

151 }

152 return Tokens;

153}

154

155void FormatTokenLexer::tryMergePreviousTokens() {

156 if (tryMerge_TMacro())

157 return;

158 if (tryMergeConflictMarkers())

159 return;

160 if (tryMergeLessLess())

161 return;

162 if (tryMergeGreaterGreater())

163 return;

164 if (tryMergeForEach())

165 return;

166 if (Style.isCpp() && tryTransformTryUsageForC())

167 return;

168

169 if ((Style.Language == FormatStyle::LK_Cpp ||

170 Style.Language == FormatStyle::LK_ObjC) &&

171 tryMergeUserDefinedLiteral()) {

172 return;

173 }

174

175 if (Style.isJavaScript() || Style.isCSharp()) {

176 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,

177 tok::question};

178 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,

179 tok::period};

180 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};

181

182 if (tryMergeTokens(FatArrow, TT_FatArrow))

183 return;

184 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {

185

186 Tokens.back()->Tok.setKind(tok::pipepipe);

187 return;

188 }

189 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {

190

191 Tokens.back()->Tok.setKind(tok::period);

192 return;

193 }

194 if (tryMergeNullishCoalescingEqual())

195 return;

196

197 if (Style.isCSharp()) {

198 static const tok::TokenKind CSharpNullConditionalLSquare[] = {

199 tok::question, tok::l_square};

200

201 if (tryMergeCSharpKeywordVariables())

202 return;

203 if (tryMergeCSharpStringLiteral())

204 return;

205 if (tryTransformCSharpForEach())

206 return;

207 if (tryMergeTokens(CSharpNullConditionalLSquare,

208 TT_CSharpNullConditionalLSquare)) {

209

210 Tokens.back()->Tok.setKind(tok::l_square);

211 return;

212 }

213 }

214 }

215

216 if (tryMergeNSStringLiteral())

217 return;

218

219 if (Style.isJavaScript()) {

220 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};

221 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,

222 tok::equal};

223 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,

224 tok::greaterequal};

225 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};

226 static const tok::TokenKind JSExponentiationEqual[] = {tok::star,

227 tok::starequal};

228 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};

229 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};

230

231

232 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))

233 return;

234 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))

235 return;

236 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))

237 return;

238 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))

239 return;

240 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {

241 Tokens.back()->Tok.setKind(tok::starequal);

242 return;

243 }

244 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||

245 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {

246

247 Tokens.back()->Tok.setKind(tok::equal);

248 return;

249 }

250 if (tryMergeJSPrivateIdentifier())

251 return;

252 } else if (Style.isJava()) {

253 static const tok::TokenKind JavaRightLogicalShiftAssign[] = {

254 tok::greater, tok::greater, tok::greaterequal};

255 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))

256 return;

257 } else if (Style.isVerilog()) {

258

259 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&

260 Tokens.end()[-2]->is(tok::numeric_constant) &&

261 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,

262 tok::question) &&

263 tryMergeTokens(2, TT_Unknown)) {

264 return;

265 }

266

267 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},

268 TT_BitFieldColon)) {

269 return;

270 }

271

272

273

274

275 if (Tokens.back()->TokenText.size() == 1 &&

276 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},

277 TT_BinaryOperator)) {

278 Tokens.back()->Tok.setKind(tok::caret);

279 return;

280 }

281

282 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {

283 Tokens.back()->Tok.setKind(tok::lessless);

284 return;

285 }

286 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {

287 Tokens.back()->Tok.setKind(tok::greatergreater);

288 return;

289 }

290 if (tryMergeTokensAny({{tok::lessless, tok::equal},

291 {tok::lessless, tok::lessequal},

292 {tok::greatergreater, tok::equal},

293 {tok::greatergreater, tok::greaterequal},

294 {tok::colon, tok::equal},

295 {tok::colon, tok::slash}},

296 TT_BinaryOperator)) {

298 return;

299 }

300

301 if (tryMergeTokensAny({{tok::star, tok::star},

302 {tok::lessless, tok::less},

303 {tok::greatergreater, tok::greater},

304 {tok::exclaimequal, tok::equal},

305 {tok::exclaimequal, tok::question},

306 {tok::equalequal, tok::equal},

307 {tok::equalequal, tok::question}},

308 TT_BinaryOperator)) {

309 return;

310 }

311

312

313 if (tryMergeTokensAny({{tok::plusequal, tok::greater},

314 {tok::plus, tok::star, tok::greater},

315 {tok::minusequal, tok::greater},

316 {tok::minus, tok::star, tok::greater},

317 {tok::less, tok::arrow},

318 {tok::equal, tok::greater},

319 {tok::star, tok::greater},

320 {tok::pipeequal, tok::greater},

321 {tok::pipe, tok::arrow}},

322 TT_BinaryOperator) ||

323 Tokens.back()->is(tok::arrow)) {

324 Tokens.back()->ForcedPrecedence = prec::Comma;

325 return;

326 }

327 if (Tokens.size() >= 3 &&

328 Tokens[Tokens.size() - 3]->is(Keywords.kw_verilogHash) &&

329 Tokens[Tokens.size() - 2]->isOneOf(tok::minus, tok::equal) &&

330 Tokens[Tokens.size() - 1]->is(Keywords.kw_verilogHash) &&

331 tryMergeTokens(3, TT_BinaryOperator)) {

332 Tokens.back()->setFinalizedType(TT_BinaryOperator);

333 Tokens.back()->ForcedPrecedence = prec::Comma;

334 return;

335 }

336 } else if (Style.isTableGen()) {

337

338 if (tryMergeTokens({tok::l_square, tok::l_brace},

339 TT_TableGenMultiLineString)) {

340

341 Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);

342 Tokens.back()->Tok.setKind(tok::string_literal);

343 return;

344 }

345

346

347 if (tryMergeTokens({tok::exclaim, tok::identifier},

348 TT_TableGenBangOperator)) {

349 Tokens.back()->Tok.setKind(tok::identifier);

350 Tokens.back()->Tok.setIdentifierInfo(nullptr);

351 if (Tokens.back()->TokenText == "!cond")

352 Tokens.back()->setFinalizedType(TT_TableGenCondOperator);

353 else

354 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);

355 return;

356 }

357 if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {

358

359

360 Tokens.back()->Tok.setKind(tok::identifier);

361 Tokens.back()->Tok.setIdentifierInfo(nullptr);

362 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);

363 return;

364 }

365

366 if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {

367 Tokens.back()->Tok.setKind(tok::numeric_constant);

368 return;

369 }

370 if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {

371 Tokens.back()->Tok.setKind(tok::numeric_constant);

372 return;

373 }

374 }

375}

376

377bool FormatTokenLexer::tryMergeNSStringLiteral() {

378 if (Tokens.size() < 2)

379 return false;

380 auto &At = *(Tokens.end() - 2);

381 auto &String = *(Tokens.end() - 1);

382 if (At->isNot(tok::at) || String->isNot(tok::string_literal))

383 return false;

384 At->Tok.setKind(tok::string_literal);

385 At->TokenText = StringRef(At->TokenText.begin(),

386 String->TokenText.end() - At->TokenText.begin());

387 At->ColumnWidth += String->ColumnWidth;

388 At->setType(TT_ObjCStringLiteral);

389 Tokens.erase(Tokens.end() - 1);

390 return true;

391}

392

393bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {

394

395

396 if (Tokens.size() < 2)

397 return false;

398 auto &Hash = *(Tokens.end() - 2);

399 auto &Identifier = *(Tokens.end() - 1);

400 if (Hash->isNot(tok::hash) || Identifier->isNot(tok::identifier))

401 return false;

402 Hash->Tok.setKind(tok::identifier);

403 Hash->TokenText =

404 StringRef(Hash->TokenText.begin(),

405 Identifier->TokenText.end() - Hash->TokenText.begin());

406 Hash->ColumnWidth += Identifier->ColumnWidth;

407 Hash->setType(TT_JsPrivateIdentifier);

408 Tokens.erase(Tokens.end() - 1);

409 return true;

410}

411

412

413

414

415

416

417bool FormatTokenLexer::tryMergeCSharpStringLiteral() {

418 if (Tokens.size() < 2)

419 return false;

420

421

422 const auto String = *(Tokens.end() - 1);

423 if (String->isNot(tok::string_literal))

424 return false;

425

426 auto Prefix = *(Tokens.end() - 2);

427 if (Prefix->isNot(tok::at) && Prefix->TokenText != "$")

428 return false;

429

430 if (Tokens.size() > 2) {

431 const auto Tok = *(Tokens.end() - 3);

432 if ((Tok->TokenText == "$" && Prefix->is(tok::at)) ||

433 (Tok->is(tok::at) && Prefix->TokenText == "$")) {

434

435 Tok->ColumnWidth += Prefix->ColumnWidth;

436 Tokens.erase(Tokens.end() - 2);

437 Prefix = Tok;

438 }

439 }

440

441

442 Prefix->Tok.setKind(tok::string_literal);

443 Prefix->TokenText =

444 StringRef(Prefix->TokenText.begin(),

445 String->TokenText.end() - Prefix->TokenText.begin());

446 Prefix->ColumnWidth += String->ColumnWidth;

447 Prefix->setType(TT_CSharpStringLiteral);

448 Tokens.erase(Tokens.end() - 1);

449 return true;

450}

451

452

453

454const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {

455 "assembly", "module", "field", "event", "method",

456 "param", "property", "return", "type",

457};

458

459bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {

460 if (Tokens.size() < 2)

461 return false;

462 auto &NullishCoalescing = *(Tokens.end() - 2);

463 auto &Equal = *(Tokens.end() - 1);

464 if (NullishCoalescing->isNot(TT_NullCoalescingOperator) ||

465 Equal->isNot(tok::equal)) {

466 return false;

467 }

468 NullishCoalescing->Tok.setKind(tok::equal);

469 NullishCoalescing->TokenText =

470 StringRef(NullishCoalescing->TokenText.begin(),

471 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());

472 NullishCoalescing->ColumnWidth += Equal->ColumnWidth;

473 NullishCoalescing->setType(TT_NullCoalescingEqual);

474 Tokens.erase(Tokens.end() - 1);

475 return true;

476}

477

478bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {

479 if (Tokens.size() < 2)

480 return false;

481 const auto At = *(Tokens.end() - 2);

482 if (At->isNot(tok::at))

483 return false;

484 const auto Keyword = *(Tokens.end() - 1);

485 if (Keyword->TokenText == "$")

486 return false;

487 if (!Keywords.isCSharpKeyword(*Keyword))

488 return false;

489

490 At->Tok.setKind(tok::identifier);

491 At->TokenText = StringRef(At->TokenText.begin(),

492 Keyword->TokenText.end() - At->TokenText.begin());

493 At->ColumnWidth += Keyword->ColumnWidth;

494 At->setType(Keyword->getType());

495 Tokens.erase(Tokens.end() - 1);

496 return true;

497}

498

499

500bool FormatTokenLexer::tryTransformCSharpForEach() {

501 if (Tokens.empty())

502 return false;

503 auto &Identifier = *(Tokens.end() - 1);

504 if (Identifier->isNot(tok::identifier))

505 return false;

506 if (Identifier->TokenText != "foreach")

507 return false;

508

509 Identifier->setType(TT_ForEachMacro);

510 Identifier->Tok.setKind(tok::kw_for);

511 return true;

512}

513

514bool FormatTokenLexer::tryMergeForEach() {

515 if (Tokens.size() < 2)

516 return false;

517 auto &For = *(Tokens.end() - 2);

518 auto &Each = *(Tokens.end() - 1);

519 if (For->isNot(tok::kw_for))

520 return false;

521 if (Each->isNot(tok::identifier))

522 return false;

523 if (Each->TokenText != "each")

524 return false;

525

526 For->setType(TT_ForEachMacro);

527 For->Tok.setKind(tok::kw_for);

528

529 For->TokenText = StringRef(For->TokenText.begin(),

530 Each->TokenText.end() - For->TokenText.begin());

531 For->ColumnWidth += Each->ColumnWidth;

532 Tokens.erase(Tokens.end() - 1);

533 return true;

534}

535

536bool FormatTokenLexer::tryTransformTryUsageForC() {

537 if (Tokens.size() < 2)

538 return false;

539 auto &Try = *(Tokens.end() - 2);

540 if (Try->isNot(tok::kw_try))

541 return false;

542 auto &Next = *(Tokens.end() - 1);

543 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))

544 return false;

545

546 if (Tokens.size() > 2) {

547 auto &At = *(Tokens.end() - 3);

548 if (At->is(tok::at))

549 return false;

550 }

551

552 Try->Tok.setKind(tok::identifier);

553 return true;

554}

555

556bool FormatTokenLexer::tryMergeLessLess() {

557

558 if (Tokens.size() < 3)

559 return false;

560

561 auto First = Tokens.end() - 3;

563 return false;

564

565

566 if (First[1]->hasWhitespaceBefore())

567 return false;

568

569 auto X = Tokens.size() > 3 ? First[-1] : nullptr;

570 if (X && X->is(tok::less))

571 return false;

572

573 auto Y = First[2];

574 if ((X || X->isNot(tok::kw_operator)) && Y->is(tok::less))

575 return false;

576

577 First[0]->Tok.setKind(tok::lessless);

578 First[0]->TokenText = "<<";

579 First[0]->ColumnWidth += 1;

580 Tokens.erase(Tokens.end() - 2);

581 return true;

582}

583

584bool FormatTokenLexer::tryMergeGreaterGreater() {

585

586 if (Tokens.size() < 2)

587 return false;

588

589 auto First = Tokens.end() - 2;

591 return false;

592

593

594 if (First[1]->hasWhitespaceBefore())

595 return false;

596

597 auto Tok = Tokens.size() > 2 ? First[-1] : nullptr;

598 if (Tok && Tok->isNot(tok::kw_operator))

599 return false;

600

601 First[0]->Tok.setKind(tok::greatergreater);

602 First[0]->TokenText = ">>";

603 First[0]->ColumnWidth += 1;

604 Tokens.erase(Tokens.end() - 1);

605 return true;

606}

607

608bool FormatTokenLexer::tryMergeUserDefinedLiteral() {

609 if (Tokens.size() < 2)

610 return false;

611

612 auto *First = Tokens.end() - 2;

613 auto &Suffix = First[1];

614 if (Suffix->hasWhitespaceBefore() || Suffix->TokenText != "$")

615 return false;

616

618 if (Literal->Tok.isLiteral())

619 return false;

620

622 if (Text.ends_with("_"))

623 return false;

624

625 Text = StringRef(Text.data(), Text.size() + 1);

627 Tokens.erase(&Suffix);

628 return true;

629}

630

633 if (Tokens.size() < Kinds.size())

634 return false;

635

636 const auto *First = Tokens.end() - Kinds.size();

637 for (unsigned i = 0; i < Kinds.size(); ++i)

639 return false;

640

641 return tryMergeTokens(Kinds.size(), NewType);

642}

643

644bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) {

645 if (Tokens.size() < Count)

646 return false;

647

648 const auto *First = Tokens.end() - Count;

649 unsigned AddLength = 0;

650 for (size_t i = 1; i < Count; ++i) {

651

652

653 if (First[i]->hasWhitespaceBefore())

654 return false;

655 AddLength += First[i]->TokenText.size();

656 }

657

658 Tokens.resize(Tokens.size() - Count + 1);

660 First[0]->TokenText.size() + AddLength);

661 First[0]->ColumnWidth += AddLength;

662 First[0]->setType(NewType);

663 return true;

664}

665

666bool FormatTokenLexer::tryMergeTokensAny(

668 return llvm::any_of(Kinds, [this, NewType](ArrayReftok::TokenKind Kinds) {

669 return tryMergeTokens(Kinds, NewType);

670 });

671}

672

673

674bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {

675

676

677

678 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,

679 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,

680 tok::colon, tok::question, tok::tilde) ||

681 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,

682 tok::kw_else, tok::kw_void, tok::kw_typeof,

683 Keywords.kw_instanceof, Keywords.kw_in) ||

684 Tok->isPlacementOperator() || Tok->isBinaryOperator();

685}

686

687bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {

688 if (!Prev)

689 return true;

690

691

692

693

694

695

696

697 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))

698 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);

699

700

701

702 if (!precedesOperand(Prev))

703 return false;

704

705 return true;

706}

707

708void FormatTokenLexer::tryParseJavaTextBlock() {

709 if (FormatTok->TokenText != "\"\"")

710 return;

711

712 const auto *S = Lex->getBufferLocation();

713 const auto *End = Lex->getBuffer().end();

714

715 if (S == End || *S != '\"')

716 return;

717

718 ++S;

719

720

721 for (int Count = 0; Count < 3 && S < End; ++S) {

722 switch (*S) {

723 case '\\':

724 Count = -1;

725 break;

726 case '\"':

727 ++Count;

728 break;

729 default:

730 Count = 0;

731 }

732 }

733

734

735 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(S)));

736}

737

738

739

740

741

742void FormatTokenLexer::tryParseJSRegexLiteral() {

744 if (RegexToken->isNoneOf(tok::slash, tok::slashequal))

745 return;

746

748 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {

749

750

751 if (FT->isNot(tok::comment)) {

752 Prev = FT;

753 break;

754 }

755 }

756

757 if (!canPrecedeRegexLiteral(Prev))

758 return;

759

760

761 const char *Offset = Lex->getBufferLocation();

762 const char *RegexBegin = Offset - RegexToken->TokenText.size();

763 StringRef Buffer = Lex->getBuffer();

764 bool InCharacterClass = false;

765 bool HaveClosingSlash = false;

766 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {

767

768

769

770 switch (*Offset) {

771 case '\\':

772

773 ++Offset;

774 break;

775 case '[':

776 InCharacterClass = true;

777 break;

778 case ']':

779 InCharacterClass = false;

780 break;

781 case '/':

782 if (!InCharacterClass)

783 HaveClosingSlash = true;

784 break;

785 }

786 }

787

788 RegexToken->setType(TT_RegexLiteral);

789

790 RegexToken->Tok.setKind(tok::string_literal);

791 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);

792 RegexToken->ColumnWidth = RegexToken->TokenText.size();

793

794 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));

795}

796

797static auto lexCSharpString(const char *Begin, const char *End, bool Verbatim,

798 bool Interpolated) {

799 auto Repeated = [&Begin, End]() {

800 return Begin + 1 < End && Begin[1] == Begin[0];

801 };

802

803

804

805

806

807

808

809

810

811

812

813

814

815 for (int UnmatchedOpeningBraceCount = 0; Begin < End; ++Begin) {

816 switch (*Begin) {

817 case '\\':

818 if (!Verbatim)

819 ++Begin;

820 break;

821 case '{':

822 if (Interpolated) {

823

824 if (Repeated())

825 ++Begin;

826 else

827 ++UnmatchedOpeningBraceCount;

828 }

829 break;

830 case '}':

831 if (Interpolated) {

832

833 if (Repeated())

834 ++Begin;

835 else if (UnmatchedOpeningBraceCount > 0)

836 --UnmatchedOpeningBraceCount;

837 else

838 return End;

839 }

840 break;

841 case '"':

842 if (UnmatchedOpeningBraceCount > 0)

843 break;

844

845 if (Verbatim && Repeated()) {

846 ++Begin;

847 break;

848 }

849 return Begin;

850 }

851 }

852

853 return End;

854}

855

856void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {

857 FormatToken *CSharpStringLiteral = Tokens.back();

858

859 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))

860 return;

861

862 auto &TokenText = CSharpStringLiteral->TokenText;

863

864 bool Verbatim = false;

865 bool Interpolated = false;

866 if (TokenText.starts_with(R"($@")") || TokenText.starts_with(R"(@$")")) {

867 Verbatim = true;

868 Interpolated = true;

869 } else if (TokenText.starts_with(R"(@")")) {

870 Verbatim = true;

871 } else if (TokenText.starts_with(R"($")")) {

872 Interpolated = true;

873 }

874

875

876 if (!Verbatim && !Interpolated)

877 return;

878

879 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();

880 const char *Offset = StrBegin;

881 Offset += Verbatim && Interpolated ? 3 : 2;

882

883 const auto End = Lex->getBuffer().end();

884 Offset = lexCSharpString(Offset, End, Verbatim, Interpolated);

885

886

887

888 if (Offset >= End)

889 return;

890

891 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);

893

894

895 size_t FirstBreak = LiteralText.find('\n');

896 StringRef FirstLineText = FirstBreak == StringRef::npos

897 ? LiteralText

898 : LiteralText.substr(0, FirstBreak);

900 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,

901 Encoding);

902 size_t LastBreak = LiteralText.rfind('\n');

903 if (LastBreak != StringRef::npos) {

904 CSharpStringLiteral->IsMultiline = true;

905 unsigned StartColumn = 0;

906 CSharpStringLiteral->LastLineColumnWidth =

908 StartColumn, Style.TabWidth, Encoding);

909 }

910

911 assert(Offset < End);

912 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));

913}

914

915void FormatTokenLexer::handleTableGenMultilineString() {

916 FormatToken *MultiLineString = Tokens.back();

917 if (MultiLineString->isNot(TT_TableGenMultiLineString))

918 return;

919

920 auto OpenOffset = Lex->getCurrentBufferOffset() - 2 ;

921

922 auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset);

923 if (CloseOffset == StringRef::npos)

924 return;

925 auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);

926 MultiLineString->TokenText = Text;

927 resetLexer(SourceMgr.getFileOffset(

928 Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size())));

929 auto FirstLineText = Text;

930 auto FirstBreak = Text.find('\n');

931

932 if (FirstBreak != StringRef::npos) {

933 MultiLineString->IsMultiline = true;

934 FirstLineText = Text.substr(0, FirstBreak + 1);

935

936 auto LastBreak = Text.rfind('\n');

938 Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,

939 Style.TabWidth, Encoding);

940 }

941

943 FirstLineText, MultiLineString->OriginalColumn, Style.TabWidth, Encoding);

944}

945

946void FormatTokenLexer::handleTableGenNumericLikeIdentifier() {

948

949

950 if (Tok->isNot(tok::numeric_constant))

951 return;

952 StringRef Text = Tok->TokenText;

953

954

955

956

957

958

959

960

961 if (Text.empty() || Text[0] == '+' || Text[0] == '-')

962 return;

963 const auto NonDigitPos = Text.find_if([](char C) { return !isdigit(C); });

964

965 if (NonDigitPos == StringRef::npos)

966 return;

967 char FirstNonDigit = Text[NonDigitPos];

968 if (NonDigitPos < Text.size() - 1) {

969 char TheNext = Text[NonDigitPos + 1];

970

971 if (FirstNonDigit == 'b' && (TheNext == '0' || TheNext == '1'))

972 return;

973

974 if (FirstNonDigit == 'x' && isxdigit(TheNext))

975 return;

976 }

977 if (isalpha(FirstNonDigit) || FirstNonDigit == '_') {

978

979 Tok->Tok.setKind(tok::identifier);

980 Tok->Tok.setIdentifierInfo(nullptr);

981 }

982}

983

984void FormatTokenLexer::handleTemplateStrings() {

985 FormatToken *BacktickToken = Tokens.back();

986

987 if (BacktickToken->is(tok::l_brace)) {

989 return;

990 }

991 if (BacktickToken->is(tok::r_brace)) {

992 if (StateStack.size() == 1)

993 return;

994 StateStack.pop();

996 return;

997

998 } else if (BacktickToken->is(tok::unknown) &&

999 BacktickToken->TokenText == "`") {

1001 } else {

1002 return;

1003 }

1004

1005

1006 const char *Offset = Lex->getBufferLocation();

1007 const char *TmplBegin = Offset - BacktickToken->TokenText.size();

1008 for (; Offset != Lex->getBuffer().end(); ++Offset) {

1009 if (Offset[0] == '`') {

1010 StateStack.pop();

1011 ++Offset;

1012 break;

1013 }

1014 if (Offset[0] == '\\') {

1015 ++Offset;

1016 } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&

1017 Offset[1] == '{') {

1018

1020 Offset += 2;

1021 break;

1022 }

1023 }

1024

1025 StringRef LiteralText(TmplBegin, Offset - TmplBegin);

1026 BacktickToken->setType(TT_TemplateString);

1027 BacktickToken->Tok.setKind(tok::string_literal);

1028 BacktickToken->TokenText = LiteralText;

1029

1030

1031 size_t FirstBreak = LiteralText.find('\n');

1032 StringRef FirstLineText = FirstBreak == StringRef::npos

1033 ? LiteralText

1034 : LiteralText.substr(0, FirstBreak);

1036 FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);

1037 size_t LastBreak = LiteralText.rfind('\n');

1038 if (LastBreak != StringRef::npos) {

1039 BacktickToken->IsMultiline = true;

1040 unsigned StartColumn = 0;

1041 BacktickToken->LastLineColumnWidth =

1043 StartColumn, Style.TabWidth, Encoding);

1044 }

1045

1046 SourceLocation loc = Lex->getSourceLocation(Offset);

1047 resetLexer(SourceMgr.getFileOffset(loc));

1048}

1049

1050void FormatTokenLexer::tryParsePythonComment() {

1052 if (HashToken->isNoneOf(tok::hash, tok::hashhash))

1053 return;

1054

1055 const char *CommentBegin =

1056 Lex->getBufferLocation() - HashToken->TokenText.size();

1057 size_t From = CommentBegin - Lex->getBuffer().begin();

1058 size_t To = Lex->getBuffer().find_first_of('\n', From);

1059 if (To == StringRef::npos)

1060 To = Lex->getBuffer().size();

1061 size_t Len = To - From;

1062 HashToken->setType(TT_LineComment);

1063 HashToken->Tok.setKind(tok::comment);

1064 HashToken->TokenText = Lex->getBuffer().substr(From, Len);

1065 SourceLocation Loc = To < Lex->getBuffer().size()

1066 ? Lex->getSourceLocation(CommentBegin + Len)

1067 : SourceMgr.getLocForEndOfFile(ID);

1068 resetLexer(SourceMgr.getFileOffset(Loc));

1069}

1070

1071bool FormatTokenLexer::tryMerge_TMacro() {

1072 if (Tokens.size() < 4)

1073 return false;

1075 if (Last->isNot(tok::r_paren))

1076 return false;

1077

1078 FormatToken *String = Tokens[Tokens.size() - 2];

1079 if (String->isNot(tok::string_literal) || String->IsMultiline)

1080 return false;

1081

1082 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))

1083 return false;

1084

1086 if (Macro->TokenText != "_T")

1087 return false;

1088

1089 const char *Start = Macro->TokenText.data();

1090 const char *End = Last->TokenText.data() + Last->TokenText.size();

1091 String->TokenText = StringRef(Start, End - Start);

1092 String->IsFirst = Macro->IsFirst;

1093 String->LastNewlineOffset = Macro->LastNewlineOffset;

1094 String->WhitespaceRange = Macro->WhitespaceRange;

1095 String->OriginalColumn = Macro->OriginalColumn;

1097 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);

1098 String->NewlinesBefore = Macro->NewlinesBefore;

1099 String->HasUnescapedNewline = Macro->HasUnescapedNewline;

1100

1101 Tokens.pop_back();

1102 Tokens.pop_back();

1103 Tokens.pop_back();

1104 Tokens.back() = String;

1105 if (FirstInLineIndex >= Tokens.size())

1106 FirstInLineIndex = Tokens.size() - 1;

1107 return true;

1108}

1109

1110bool FormatTokenLexer::tryMergeConflictMarkers() {

1111 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))

1112 return false;

1113

1114

1115

1116

1117

1118

1119

1120

1121

1122

1123 FileID ID;

1124

1125 unsigned FirstInLineOffset;

1126 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(

1127 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());

1128 StringRef Buffer = SourceMgr.getBufferOrFake(ID).getBuffer();

1129

1130 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);

1131 if (LineOffset == StringRef::npos)

1132 LineOffset = 0;

1133 else

1134 ++LineOffset;

1135

1136 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);

1137 StringRef LineStart;

1138 if (FirstSpace == StringRef::npos)

1139 LineStart = Buffer.substr(LineOffset);

1140 else

1141 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);

1142

1144 if (LineStart == "<<<<<<<" || LineStart == ">>>>") {

1145 Type = TT_ConflictStart;

1146 } else if (LineStart == "|||||||" || LineStart == "=======" ||

1147 LineStart == "====") {

1148 Type = TT_ConflictAlternative;

1149 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {

1150 Type = TT_ConflictEnd;

1151 }

1152

1153 if (Type != TT_Unknown) {

1155

1156 Tokens.resize(FirstInLineIndex + 1);

1157

1158

1159

1160 Tokens.back()->setType(Type);

1161 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);

1162

1163 Tokens.push_back(Next);

1164 return true;

1165 }

1166

1167 return false;

1168}

1169

1170FormatToken *FormatTokenLexer::getStashedToken() {

1171

1172 Token Tok = FormatTok->Tok;

1173 StringRef TokenText = FormatTok->TokenText;

1174

1175 unsigned OriginalColumn = FormatTok->OriginalColumn;

1176 FormatTok = new (Allocator.Allocate()) FormatToken;

1177 FormatTok->Tok = Tok;

1178 SourceLocation TokLocation =

1179 FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1);

1180 FormatTok->Tok.setLocation(TokLocation);

1181 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);

1182 FormatTok->TokenText = TokenText;

1183 FormatTok->ColumnWidth = 1;

1185

1186 return FormatTok;

1187}

1188

1189

1190

1191

1192

1193

1194

1195void FormatTokenLexer::truncateToken(size_t NewLen) {

1196 assert(NewLen <= FormatTok->TokenText.size());

1197 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(

1198 Lex->getBufferLocation() - FormatTok->TokenText.size() + NewLen)));

1199 FormatTok->TokenText = FormatTok->TokenText.substr(0, NewLen);

1201 FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth,

1202 Encoding);

1203 FormatTok->Tok.setLength(NewLen);

1204}

1205

1206

1208

1209

1210

1211

1212

1213 const unsigned char *const Begin = Text.bytes_begin();

1214 const unsigned char *const End = Text.bytes_end();

1215 const unsigned char *Cur = Begin;

1216 while (Cur < End) {

1218 ++Cur;

1219 } else if (Cur[0] == '\\') {

1220

1221

1222

1223

1224

1225 const auto *Lookahead = Cur + 1;

1227 ++Lookahead;

1228

1230 break;

1231

1232 Cur = Lookahead + 1;

1233 } else {

1234 break;

1235 }

1236 }

1237 return Cur - Begin;

1238}

1239

1240FormatToken *FormatTokenLexer::getNextToken() {

1242 StateStack.pop();

1243 return getStashedToken();

1244 }

1245

1246 FormatTok = new (Allocator.Allocate()) FormatToken;

1247 readRawToken(*FormatTok);

1248 SourceLocation WhitespaceStart =

1250 FormatTok->IsFirst = IsFirstToken;

1251 IsFirstToken = false;

1252

1253

1254

1255

1256

1257 unsigned WhitespaceLength = TrailingWhitespace;

1258 while (FormatTok->isNot(tok::eof)) {

1260 if (LeadingWhitespace == 0)

1261 break;

1262 if (LeadingWhitespace < FormatTok->TokenText.size())

1263 truncateToken(LeadingWhitespace);

1265 bool InEscape = false;

1266 for (int i = 0, e = Text.size(); i != e; ++i) {

1267 switch (Text[i]) {

1268 case '\r':

1269

1270

1271

1272 if (i + 1 < e && Text[i + 1] == '\n')

1273 break;

1274 [[fallthrough]];

1275 case '\n':

1277 if (!InEscape)

1279 else

1280 InEscape = false;

1283 break;

1284 case '\f':

1286

1287 i > 0 && Text[i - 1] == '\n' &&

1288 ((i + 1 < e && Text[i + 1] == '\n') ||

1289 (i + 2 < e && Text[i + 1] == '\r' && Text[i + 2] == '\n'))) {

1291 }

1292 [[fallthrough]];

1293 case '\v':

1294 Column = 0;

1295 break;

1296 case ' ':

1297 ++Column;

1298 break;

1299 case '\t':

1300 Column +=

1301 Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0);

1302 break;

1303 case '\\':

1304

1305

1306

1307

1308 assert([&]() -> bool {

1309 size_t j = i + 1;

1311 ++j;

1312 return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r');

1313 }());

1314 InEscape = true;

1315 break;

1316 default:

1317

1318 assert(false);

1319 break;

1320 }

1321 }

1322 WhitespaceLength += Text.size();

1323 readRawToken(*FormatTok);

1324 }

1325

1326 if (FormatTok->is(tok::unknown))

1327 FormatTok->setType(TT_ImplicitStringLiteral);

1328

1329 const bool IsCpp = Style.isCpp();

1330

1331

1332

1333

1334

1335

1336

1337

1338 if (const auto Text = FormatTok->TokenText;

1339 Text.starts_with("//") &&

1340 (IsCpp || Style.isJavaScript() || Style.isJava())) {

1341 assert(FormatTok->is(tok::comment));

1342 for (auto Pos = Text.find('\\'); Pos++ != StringRef::npos;

1343 Pos = Text.find('\\', Pos)) {

1344 if (Pos < Text.size() && Text[Pos] == '\n' &&

1345 (!IsCpp || Text.substr(Pos + 1).ltrim().starts_with("//"))) {

1346 truncateToken(Pos);

1347 break;

1348 }

1349 }

1350 }

1351

1352 if (Style.isVerilog()) {

1353 static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase);

1354 SmallVector<StringRef, 1> Matches;

1355

1356

1357

1358

1359

1360 if (FormatTok->is(tok::numeric_constant)) {

1361

1362 auto Quote = FormatTok->TokenText.find('\'');

1363 if (Quote != StringRef::npos)

1364 truncateToken(Quote);

1365 } else if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {

1366 FormatTok->Tok.setKind(tok::raw_identifier);

1367 } else if (FormatTok->is(tok::raw_identifier)) {

1368 if (FormatTok->TokenText == "`") {

1369 FormatTok->Tok.setIdentifierInfo(nullptr);

1370 FormatTok->Tok.setKind(tok::hash);

1371 } else if (FormatTok->TokenText == "``") {

1372 FormatTok->Tok.setIdentifierInfo(nullptr);

1373 FormatTok->Tok.setKind(tok::hashhash);

1374 } else if (!Tokens.empty() && Tokens.back()->is(Keywords.kw_apostrophe) &&

1375 NumberBase.match(FormatTok->TokenText, &Matches)) {

1376

1377

1378

1379

1380 truncateToken(Matches[0].size());

1381 FormatTok->setFinalizedType(TT_VerilogNumberBase);

1382 }

1383 }

1384 }

1385

1386 FormatTok->WhitespaceRange = SourceRange(

1387 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));

1388

1389 FormatTok->OriginalColumn = Column;

1390

1391 TrailingWhitespace = 0;

1392 if (FormatTok->is(tok::comment)) {

1393

1394 StringRef UntrimmedText = FormatTok->TokenText;

1395 FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");

1396 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();

1397 } else if (FormatTok->is(tok::raw_identifier)) {

1398 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);

1399 FormatTok->Tok.setIdentifierInfo(&Info);

1400 FormatTok->Tok.setKind(Info.getTokenID());

1401 if (Style.isJava() &&

1402 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,

1403 tok::kw_operator)) {

1404 FormatTok->Tok.setKind(tok::identifier);

1405 } else if (Style.isJavaScript() &&

1406 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,

1407 tok::kw_operator)) {

1408 FormatTok->Tok.setKind(tok::identifier);

1409 } else if (Style.isTableGen() && !Keywords.isTableGenKeyword(*FormatTok)) {

1410 FormatTok->Tok.setKind(tok::identifier);

1411 } else if (Style.isVerilog() && Keywords.isVerilogIdentifier(*FormatTok)) {

1412 FormatTok->Tok.setKind(tok::identifier);

1413 }

1414 } else if (const bool Greater = FormatTok->is(tok::greatergreater);

1415 Greater || FormatTok->is(tok::lessless)) {

1416 FormatTok->Tok.setKind(Greater ? tok::greater : tok::less);

1417 FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);

1418 ++Column;

1420 } else if (Style.isJava() && FormatTok->is(tok::string_literal)) {

1421 tryParseJavaTextBlock();

1422 }

1423

1424 if (Style.isVerilog() && !Tokens.empty() &&

1425 Tokens.back()->is(TT_VerilogNumberBase) &&

1426 FormatTok->Tok.isOneOf(tok::identifier, tok::question)) {

1427

1428 FormatTok->Tok.setKind(tok::numeric_constant);

1429 }

1430

1431

1432

1433 StringRef Text = FormatTok->TokenText;

1434 size_t FirstNewlinePos = Text.find('\n');

1435 if (FirstNewlinePos == StringRef::npos) {

1436

1437

1438 FormatTok->ColumnWidth =

1440 Column += FormatTok->ColumnWidth;

1441 } else {

1442 FormatTok->IsMultiline = true;

1443

1444

1446 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);

1447

1448

1449

1451 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);

1452 Column = FormatTok->LastLineColumnWidth;

1453 }

1454

1455 if (IsCpp) {

1456 auto *Identifier = FormatTok->Tok.getIdentifierInfo();

1457 auto it = Macros.find(Identifier);

1458 if ((Tokens.empty() || !Tokens.back()->Tok.getIdentifierInfo() ||

1459 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() !=

1460 tok::pp_define) &&

1461 it != Macros.end()) {

1462 FormatTok->setType(it->second);

1463 if (it->second == TT_IfMacro) {

1464

1465

1466

1467

1468 FormatTok->Tok.setKind(tok::kw_if);

1469 }

1470 } else if (FormatTok->is(tok::identifier)) {

1471 if (MacroBlockBeginRegex.match(Text))

1472 FormatTok->setType(TT_MacroBlockBegin);

1473 else if (MacroBlockEndRegex.match(Text))

1474 FormatTok->setType(TT_MacroBlockEnd);

1475 else if (MacrosSkippedByRemoveParentheses.contains(Identifier))

1476 FormatTok->setFinalizedType(TT_FunctionLikeMacro);

1477 else if (TemplateNames.contains(Identifier))

1478 FormatTok->setFinalizedType(TT_TemplateName);

1479 else if (TypeNames.contains(Identifier))

1480 FormatTok->setFinalizedType(TT_TypeName);

1481 else if (VariableTemplates.contains(Identifier))

1482 FormatTok->setFinalizedType(TT_VariableTemplate);

1483 }

1484 }

1485

1486 return FormatTok;

1487}

1488

1489bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {

1490 const char *Start = Lex->getBufferLocation();

1491 size_t Len;

1492 switch (Start[0]) {

1493

1494 case '\'':

1495 Len = 1;

1496 break;

1497

1498

1499 case '`':

1500 if (Start[1] == '`')

1501 Len = 2;

1502 else

1503 Len = 1;

1504 break;

1505

1506

1507

1508

1509

1510 case '\\':

1511

1512

1513 if (Start[1] == '\r' || Start[1] == '\n')

1514 return false;

1515 Len = 1;

1516 while (Start[Len] != '\0' && Start[Len] != '\f' && Start[Len] != '\n' &&

1517 Start[Len] != '\r' && Start[Len] != '\t' && Start[Len] != '\v' &&

1518 Start[Len] != ' ') {

1519

1520

1521 if (Start[Len] == '\\' && Start[Len + 1] == '\r' &&

1522 Start[Len + 2] == '\n') {

1523 Len += 3;

1524 } else if (Start[Len] == '\\' &&

1525 (Start[Len + 1] == '\r' || Start[Len + 1] == '\n')) {

1526 Len += 2;

1527 } else {

1528 Len += 1;

1529 }

1530 }

1531 break;

1532 default:

1533 return false;

1534 }

1535

1536

1537

1538

1539 Tok.setKind(tok::raw_identifier);

1540 Tok.setLength(Len);

1541 Tok.setLocation(Lex->getSourceLocation(Start, Len));

1542 Tok.setRawIdentifierData(Start);

1543 Lex->seek(Lex->getCurrentBufferOffset() + Len, false);

1544 return true;

1545}

1546

1547void FormatTokenLexer::readRawToken(FormatToken &Tok) {

1548

1549

1550 if (!Style.isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))

1551 Lex->LexFromRawLexer(Tok.Tok);

1552 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),

1553 Tok.Tok.getLength());

1554

1555

1556 if (Tok.is(tok::unknown)) {

1557 if (Tok.TokenText.starts_with("\"")) {

1558 Tok.Tok.setKind(tok::string_literal);

1559 Tok.IsUnterminatedLiteral = true;

1560 } else if (Style.isJavaScript() && Tok.TokenText == "''") {

1561 Tok.Tok.setKind(tok::string_literal);

1562 }

1563 }

1564

1565 if ((Style.isJavaScript() || Style.isProto()) && Tok.is(tok::char_constant))

1566 Tok.Tok.setKind(tok::string_literal);

1567

1569 FormattingDisabled = false;

1570

1571 Tok.Finalized = FormattingDisabled;

1572

1574 FormattingDisabled = true;

1575}

1576

1577void FormatTokenLexer::resetLexer(unsigned Offset) {

1578 StringRef Buffer = SourceMgr.getBufferData(ID);

1579 Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), LangOpts,

1580 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));

1581 Lex->SetKeepWhitespaceMode(true);

1582 TrailingWhitespace = 0;

1583}

1584

1585}

1586}

This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...

This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...

StringRef TokenText

The raw text of the token.

unsigned NewlinesBefore

The number of newlines immediately before the Token.

unsigned OriginalColumn

The original 0-based column of this token, including expanded tabs.

FormatToken * Next

The next token in the unwrapped line.

bool is(tok::TokenKind Kind) const

Various functions to configurably format source code.

Defines the clang::SourceLocation class and associated facilities.

Defines the SourceManager interface.

An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...

Implements an efficient mapping from strings to IdentifierInfo nodes.

Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.

SourceLocation getLocWithOffset(IntTy Offset) const

Return a source location with the specified offset from this SourceLocation.

This class handles loading and caching of source files into memory.

Token - This structure provides full information about a lexed token.

SourceLocation getLocation() const

Return a source location identifier for the specified offset in the current file.

FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)

Definition FormatTokenLexer.cpp:26

ArrayRef< FormatToken * > lex()

Definition FormatTokenLexer.cpp:87

uint32_t Literal

Literals are represented as positive integers.

unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)

Returns the number of columns required to display the Text, starting from the StartColumn on a termin...

static auto lexCSharpString(const char *Begin, const char *End, bool Verbatim, bool Interpolated)

Definition FormatTokenLexer.cpp:797

static size_t countLeadingWhitespace(StringRef Text)

Count the length of leading whitespace in a token.

Definition FormatTokenLexer.cpp:1207

bool isClangFormatOff(StringRef Comment)

bool isClangFormatOn(StringRef Comment)

TokenType

Determines the semantic type of a syntactic token, e.g.

LangOptions getFormattingLangOpts(const FormatStyle &Style)

TokenKind

Provides a simple uniform namespace for tokens from all C languages.

The JSON file list parser is used to communicate input to InstallAPI.

LLVM_READONLY bool isVerticalWhitespace(unsigned char c)

Returns true if this character is vertical ASCII whitespace: '\n', '\r'.

std::vector< std::string > Macros

A list of macros of the form = .

@ TemplateName

The identifier is a template name. FIXME: Add an annotation for that.

nullptr

This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...

std::vector< std::string > TypeNames

A vector of non-keyword identifiers that should be interpreted as type names.

LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)

Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.

LLVM_READONLY bool isWhitespace(unsigned char c)

Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t',...

@ Keyword

The name has been typo-corrected to a keyword.

@ Type

The name was classified as a type.

std::vector< std::string > MacrosSkippedByRemoveParentheses

A vector of function-like macros whose invocations should be skipped by RemoveParentheses.

std::vector< std::string > TemplateNames

A vector of non-keyword identifiers that should be interpreted as template names.

std::vector< std::string > VariableTemplates

A vector of non-keyword identifiers that should be interpreted as variable template names.

A wrapper around a Token storing information about the whitespace characters preceding it.

StringRef TokenText

The raw text of the token.

unsigned LastNewlineOffset

The offset just past the last ' ' in this token's leading whitespace (relative to WhiteSpaceStart).

unsigned NewlinesBefore

The number of newlines immediately before the Token.

unsigned HasUnescapedNewline

Whether there is at least one unescaped newline before the Token.

bool HasFormFeedBefore

Has "\n\f\n" or "\n\f\r\n" before TokenText.

unsigned IsFirst

Indicates that this is the first token of the file.