FormatTokenLexer.cpp Source File (original) (raw)

20#include "llvm/Support/Regex.h"

23namespace format {

28 llvm::SpecificBumpPtrAllocator &Allocator,

33 Style(Style), IdentTable(IdentTable), Keywords(IdentTable),

34 Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0),

35 FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin),

37 Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts));

38 Lex->SetKeepWhitespaceMode(true);

40 for (const std::string &ForEachMacro : Style.ForEachMacros) {

41 auto Identifier = &IdentTable.get(ForEachMacro);

42 Macros.insert({Identifier, TT_ForEachMacro});

43 }

44 for (const std::string &IfMacro : Style.IfMacros) {

45 auto Identifier = &IdentTable.get(IfMacro);

46 Macros.insert({Identifier, TT_IfMacro});

47 }

48 for (const std::string &AttributeMacro : Style.AttributeMacros) {

49 auto Identifier = &IdentTable.get(AttributeMacro);

51 }

52 for (const std::string &StatementMacro : Style.StatementMacros) {

53 auto Identifier = &IdentTable.get(StatementMacro);

55 }

56 for (const std::string &TypenameMacro : Style.TypenameMacros) {

57 auto Identifier = &IdentTable.get(TypenameMacro);

59 }

60 for (const std::string &NamespaceMacro : Style.NamespaceMacros) {

61 auto Identifier = &IdentTable.get(NamespaceMacro);

63 }

64 for (const std::string &WhitespaceSensitiveMacro :

66 auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);

68 }

69 for (const std::string &StatementAttributeLikeMacro :

71 auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);

73 }

75 for (const auto &TemplateName : Style.TemplateNames)

76 TemplateNames.insert(&IdentTable.get(TemplateName));

77 for (const auto &TypeName : Style.TypeNames)

78 TypeNames.insert(&IdentTable.get(TypeName));

80 VariableTemplates.insert(&IdentTable.get(VariableTemplate));

81}

84 assert(Tokens.empty());

85 assert(FirstInLineIndex == 0);

86 do {

87 Tokens.push_back(getNextToken());

89 tryParseJSRegexLiteral();

90 handleTemplateStrings();

91 }

93 tryParsePythonComment();

94 tryMergePreviousTokens();

98 handleCSharpVerbatimAndInterpolatedStrings();

99 }

101 handleTableGenMultilineString();

102 handleTableGenNumericLikeIdentifier();

103 }

104 if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)

105 FirstInLineIndex = Tokens.size() - 1;

106 } while (Tokens.back()->isNot(tok::eof));

108 auto &TokEOF = *Tokens.back();

109 if (TokEOF.NewlinesBefore == 0) {

110 TokEOF.NewlinesBefore = 1;

111 TokEOF.OriginalColumn = 0;

112 }

113 }

114 return Tokens;

115}

116

117void FormatTokenLexer::tryMergePreviousTokens() {

118 if (tryMerge_TMacro())

119 return;

120 if (tryMergeConflictMarkers())

121 return;

122 if (tryMergeLessLess())

123 return;

124 if (tryMergeGreaterGreater())

125 return;

126 if (tryMergeForEach())

127 return;

128 if (Style.isCpp() && tryTransformTryUsageForC())

129 return;

130

132 static const tok::TokenKind NullishCoalescingOperator[] = {tok::question,

133 tok::question};

134 static const tok::TokenKind NullPropagatingOperator[] = {tok::question,

135 tok::period};

136 static const tok::TokenKind FatArrow[] = {tok::equal, tok::greater};

137

138 if (tryMergeTokens(FatArrow, TT_FatArrow))

139 return;

140 if (tryMergeTokens(NullishCoalescingOperator, TT_NullCoalescingOperator)) {

141

142 Tokens.back()->Tok.setKind(tok::pipepipe);

143 return;

144 }

145 if (tryMergeTokens(NullPropagatingOperator, TT_NullPropagatingOperator)) {

146

147 Tokens.back()->Tok.setKind(tok::period);

148 return;

149 }

150 if (tryMergeNullishCoalescingEqual())

151 return;

152 }

153

155 static const tok::TokenKind CSharpNullConditionalLSquare[] = {

156 tok::question, tok::l_square};

157

158 if (tryMergeCSharpKeywordVariables())

159 return;

160 if (tryMergeCSharpStringLiteral())

161 return;

162 if (tryTransformCSharpForEach())

163 return;

164 if (tryMergeTokens(CSharpNullConditionalLSquare,

165 TT_CSharpNullConditionalLSquare)) {

166

167 Tokens.back()->Tok.setKind(tok::l_square);

168 return;

169 }

170 }

171

172 if (tryMergeNSStringLiteral())

173 return;

174

176 static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal};

177 static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal,

178 tok::equal};

179 static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater,

180 tok::greaterequal};

181 static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star};

182 static const tok::TokenKind JSExponentiationEqual[] = {tok::star,

183 tok::starequal};

184 static const tok::TokenKind JSPipePipeEqual[] = {tok::pipepipe, tok::equal};

185 static const tok::TokenKind JSAndAndEqual[] = {tok::ampamp, tok::equal};

186

187

188 if (tryMergeTokens(JSIdentity, TT_BinaryOperator))

189 return;

190 if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator))

191 return;

192 if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator))

193 return;

194 if (tryMergeTokens(JSExponentiation, TT_JsExponentiation))

195 return;

196 if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) {

197 Tokens.back()->Tok.setKind(tok::starequal);

198 return;

199 }

200 if (tryMergeTokens(JSAndAndEqual, TT_JsAndAndEqual) ||

201 tryMergeTokens(JSPipePipeEqual, TT_JsPipePipeEqual)) {

202

203 Tokens.back()->Tok.setKind(tok::equal);

204 return;

205 }

206 if (tryMergeJSPrivateIdentifier())

207 return;

208 }

209

211 static const tok::TokenKind JavaRightLogicalShiftAssign[] = {

212 tok::greater, tok::greater, tok::greaterequal};

213 if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator))

214 return;

215 }

216

218

219 if (Tokens.size() >= 3 && Tokens.end()[-3]->is(TT_VerilogNumberBase) &&

220 Tokens.end()[-2]->is(tok::numeric_constant) &&

221 Tokens.back()->isOneOf(tok::numeric_constant, tok::identifier,

222 tok::question) &&

223 tryMergeTokens(2, TT_Unknown)) {

224 return;

225 }

226

227 if (tryMergeTokensAny({{tok::minus, tok::colon}, {tok::plus, tok::colon}},

228 TT_BitFieldColon)) {

229 return;

230 }

231

232

233

234

235 if (Tokens.back()->TokenText.size() == 1 &&

236 tryMergeTokensAny({{tok::caret, tok::tilde}, {tok::tilde, tok::caret}},

237 TT_BinaryOperator)) {

238 Tokens.back()->Tok.setKind(tok::caret);

239 return;

240 }

241

242 if (tryMergeTokens({tok::less, tok::less}, TT_BinaryOperator)) {

243 Tokens.back()->Tok.setKind(tok::lessless);

244 return;

245 }

246 if (tryMergeTokens({tok::greater, tok::greater}, TT_BinaryOperator)) {

247 Tokens.back()->Tok.setKind(tok::greatergreater);

248 return;

249 }

250 if (tryMergeTokensAny({{tok::lessless, tok::equal},

251 {tok::lessless, tok::lessequal},

252 {tok::greatergreater, tok::equal},

253 {tok::greatergreater, tok::greaterequal},

254 {tok::colon, tok::equal},

255 {tok::colon, tok::slash}},

256 TT_BinaryOperator)) {

258 return;

259 }

260

261 if (tryMergeTokensAny({{tok::star, tok::star},

262 {tok::lessless, tok::less},

263 {tok::greatergreater, tok::greater},

264 {tok::exclaimequal, tok::equal},

265 {tok::exclaimequal, tok::question},

266 {tok::equalequal, tok::equal},

267 {tok::equalequal, tok::question}},

268 TT_BinaryOperator)) {

269 return;

270 }

271

272

273 if (tryMergeTokensAny({{tok::plusequal, tok::greater},

274 {tok::plus, tok::star, tok::greater},

275 {tok::minusequal, tok::greater},

276 {tok::minus, tok::star, tok::greater},

277 {tok::less, tok::arrow},

278 {tok::equal, tok::greater},

279 {tok::star, tok::greater},

280 {tok::pipeequal, tok::greater},

281 {tok::pipe, tok::arrow},

282 {tok::hash, tok::minus, tok::hash},

283 {tok::hash, tok::equal, tok::hash}},

284 TT_BinaryOperator) ||

285 Tokens.back()->is(tok::arrow)) {

286 Tokens.back()->ForcedPrecedence = prec::Comma;

287 return;

288 }

289 }

291

292 if (tryMergeTokens({tok::l_square, tok::l_brace},

293 TT_TableGenMultiLineString)) {

294

295 Tokens.back()->setFinalizedType(TT_TableGenMultiLineString);

296 Tokens.back()->Tok.setKind(tok::string_literal);

297 return;

298 }

299

300

301 if (tryMergeTokens({tok::exclaim, tok::identifier},

302 TT_TableGenBangOperator)) {

303 Tokens.back()->Tok.setKind(tok::identifier);

304 Tokens.back()->Tok.setIdentifierInfo(nullptr);

305 if (Tokens.back()->TokenText == "!cond")

306 Tokens.back()->setFinalizedType(TT_TableGenCondOperator);

307 else

308 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);

309 return;

310 }

311 if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) {

312

313

314 Tokens.back()->Tok.setKind(tok::identifier);

315 Tokens.back()->Tok.setIdentifierInfo(nullptr);

316 Tokens.back()->setFinalizedType(TT_TableGenBangOperator);

317 return;

318 }

319

320 if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) {

321 Tokens.back()->Tok.setKind(tok::numeric_constant);

322 return;

323 }

324 if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) {

325 Tokens.back()->Tok.setKind(tok::numeric_constant);

326 return;

327 }

328 }

329}

330

331bool FormatTokenLexer::tryMergeNSStringLiteral() {

332 if (Tokens.size() < 2)

333 return false;

334 auto &At = *(Tokens.end() - 2);

335 auto &String = *(Tokens.end() - 1);

336 if (At->isNot(tok::at) || String->isNot(tok::string_literal))

337 return false;

338 At->Tok.setKind(tok::string_literal);

339 At->TokenText = StringRef(At->TokenText.begin(),

340 String->TokenText.end() - At->TokenText.begin());

341 At->ColumnWidth += String->ColumnWidth;

342 At->setType(TT_ObjCStringLiteral);

343 Tokens.erase(Tokens.end() - 1);

344 return true;

345}

346

347bool FormatTokenLexer::tryMergeJSPrivateIdentifier() {

348

349

350 if (Tokens.size() < 2)

351 return false;

352 auto &Hash = *(Tokens.end() - 2);

353 auto &Identifier = *(Tokens.end() - 1);

354 if (Hash->isNot(tok::hash) || Identifier->isNot(tok::identifier))

355 return false;

356 Hash->Tok.setKind(tok::identifier);

357 Hash->TokenText =

358 StringRef(Hash->TokenText.begin(),

359 Identifier->TokenText.end() - Hash->TokenText.begin());

360 Hash->ColumnWidth += Identifier->ColumnWidth;

361 Hash->setType(TT_JsPrivateIdentifier);

362 Tokens.erase(Tokens.end() - 1);

363 return true;

364}

365

366

367

368

369

370

371bool FormatTokenLexer::tryMergeCSharpStringLiteral() {

372 if (Tokens.size() < 2)

373 return false;

374

375

376 const auto String = *(Tokens.end() - 1);

377 if (String->isNot(tok::string_literal))

378 return false;

379

380 auto Prefix = *(Tokens.end() - 2);

381 if (Prefix->isNot(tok::at) && Prefix->TokenText != "$")

382 return false;

383

384 if (Tokens.size() > 2) {

385 const auto Tok = *(Tokens.end() - 3);

386 if ((Tok->TokenText == "$" && Prefix->is(tok::at)) ||

387 (Tok->is(tok::at) && Prefix->TokenText == "$")) {

388

389 Tok->ColumnWidth += Prefix->ColumnWidth;

390 Tokens.erase(Tokens.end() - 2);

391 Prefix = Tok;

392 }

393 }

394

395

396 Prefix->Tok.setKind(tok::string_literal);

397 Prefix->TokenText =

398 StringRef(Prefix->TokenText.begin(),

399 String->TokenText.end() - Prefix->TokenText.begin());

400 Prefix->ColumnWidth += String->ColumnWidth;

401 Prefix->setType(TT_CSharpStringLiteral);

402 Tokens.erase(Tokens.end() - 1);

403 return true;

404}

405

406

407

408const llvm::StringSet<> FormatTokenLexer::CSharpAttributeTargets = {

409 "assembly", "module", "field", "event", "method",

410 "param", "property", "return", "type",

411};

412

413bool FormatTokenLexer::tryMergeNullishCoalescingEqual() {

414 if (Tokens.size() < 2)

415 return false;

416 auto &NullishCoalescing = *(Tokens.end() - 2);

417 auto &Equal = *(Tokens.end() - 1);

418 if (NullishCoalescing->isNot(TT_NullCoalescingOperator) ||

419 Equal->isNot(tok::equal)) {

420 return false;

421 }

422 NullishCoalescing->Tok.setKind(tok::equal);

423 NullishCoalescing->TokenText =

424 StringRef(NullishCoalescing->TokenText.begin(),

425 Equal->TokenText.end() - NullishCoalescing->TokenText.begin());

426 NullishCoalescing->ColumnWidth += Equal->ColumnWidth;

427 NullishCoalescing->setType(TT_NullCoalescingEqual);

428 Tokens.erase(Tokens.end() - 1);

429 return true;

430}

431

432bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {

433 if (Tokens.size() < 2)

434 return false;

435 const auto At = *(Tokens.end() - 2);

436 if (At->isNot(tok::at))

437 return false;

438 const auto Keyword = *(Tokens.end() - 1);

439 if (Keyword->TokenText == "$")

440 return false;

442 return false;

443

444 At->Tok.setKind(tok::identifier);

445 At->TokenText = StringRef(At->TokenText.begin(),

446 Keyword->TokenText.end() - At->TokenText.begin());

447 At->ColumnWidth += Keyword->ColumnWidth;

448 At->setType(Keyword->getType());

449 Tokens.erase(Tokens.end() - 1);

450 return true;

451}

452

453

454bool FormatTokenLexer::tryTransformCSharpForEach() {

455 if (Tokens.size() < 1)

456 return false;

457 auto &Identifier = *(Tokens.end() - 1);

458 if (Identifier->isNot(tok::identifier))

459 return false;

460 if (Identifier->TokenText != "foreach")

461 return false;

462

463 Identifier->setType(TT_ForEachMacro);

464 Identifier->Tok.setKind(tok::kw_for);

465 return true;

466}

467

468bool FormatTokenLexer::tryMergeForEach() {

469 if (Tokens.size() < 2)

470 return false;

471 auto &For = *(Tokens.end() - 2);

472 auto &Each = *(Tokens.end() - 1);

473 if (For->isNot(tok::kw_for))

474 return false;

475 if (Each->isNot(tok::identifier))

476 return false;

477 if (Each->TokenText != "each")

478 return false;

479

480 For->setType(TT_ForEachMacro);

481 For->Tok.setKind(tok::kw_for);

482

483 For->TokenText = StringRef(For->TokenText.begin(),

484 Each->TokenText.end() - For->TokenText.begin());

485 For->ColumnWidth += Each->ColumnWidth;

486 Tokens.erase(Tokens.end() - 1);

487 return true;

488}

489

490bool FormatTokenLexer::tryTransformTryUsageForC() {

491 if (Tokens.size() < 2)

492 return false;

493 auto &Try = *(Tokens.end() - 2);

494 if (Try->isNot(tok::kw_try))

495 return false;

496 auto &Next = *(Tokens.end() - 1);

497 if (Next->isOneOf(tok::l_brace, tok::colon, tok::hash, tok::comment))

498 return false;

499

500 if (Tokens.size() > 2) {

501 auto &At = *(Tokens.end() - 3);

502 if (At->is(tok::at))

503 return false;

504 }

505

506 Try->Tok.setKind(tok::identifier);

507 return true;

508}

509

510bool FormatTokenLexer::tryMergeLessLess() {

511

512 if (Tokens.size() < 3)

513 return false;

514

515 auto First = Tokens.end() - 3;

516 if (First[0]->isNot(tok::less) || First[1]->isNot(tok::less))

517 return false;

518

519

520 if (First[1]->hasWhitespaceBefore())

521 return false;

522

523 auto X = Tokens.size() > 3 ? First[-1] : nullptr;

524 if (X && X->is(tok::less))

525 return false;

526

527 auto Y = First[2];

528 if (( || X->isNot(tok::kw_operator)) && Y->is(tok::less))

529 return false;

530

531 First[0]->Tok.setKind(tok::lessless);

532 First[0]->TokenText = "<<";

533 First[0]->ColumnWidth += 1;

534 Tokens.erase(Tokens.end() - 2);

535 return true;

536}

537

538bool FormatTokenLexer::tryMergeGreaterGreater() {

539

540 if (Tokens.size() < 2)

541 return false;

542

543 auto First = Tokens.end() - 2;

544 if (First[0]->isNot(tok::greater) || First[1]->isNot(tok::greater))

545 return false;

546

547

548 if (First[1]->hasWhitespaceBefore())

549 return false;

550

551 auto Tok = Tokens.size() > 2 ? First[-1] : nullptr;

552 if (Tok && Tok->isNot(tok::kw_operator))

553 return false;

554

555 First[0]->Tok.setKind(tok::greatergreater);

556 First[0]->TokenText = ">>";

557 First[0]->ColumnWidth += 1;

558 Tokens.erase(Tokens.end() - 1);

559 return true;

560}

561

562bool FormatTokenLexer::tryMergeTokens(ArrayReftok::TokenKind Kinds,

564 if (Tokens.size() < Kinds.size())

565 return false;

566

567 const auto *First = Tokens.end() - Kinds.size();

568 for (unsigned i = 0; i < Kinds.size(); ++i)

569 if (First[i]->isNot(Kinds[i]))

570 return false;

571

572 return tryMergeTokens(Kinds.size(), NewType);

573}

574

575bool FormatTokenLexer::tryMergeTokens(size_t Count, TokenType NewType) {

576 if (Tokens.size() < Count)

577 return false;

578

579 const auto *First = Tokens.end() - Count;

580 unsigned AddLength = 0;

581 for (size_t i = 1; i < Count; ++i) {

582

583

584 if (First[i]->hasWhitespaceBefore())

585 return false;

586 AddLength += First[i]->TokenText.size();

587 }

588

589 Tokens.resize(Tokens.size() - Count + 1);

590 First[0]->TokenText = StringRef(First[0]->TokenText.data(),

591 First[0]->TokenText.size() + AddLength);

592 First[0]->ColumnWidth += AddLength;

593 First[0]->setType(NewType);

594 return true;

595}

596

597bool FormatTokenLexer::tryMergeTokensAny(

598 ArrayRef<ArrayReftok::TokenKind> Kinds, TokenType NewType) {

599 return llvm::any_of(Kinds, [this, NewType](ArrayReftok::TokenKind Kinds) {

600 return tryMergeTokens(Kinds, NewType);

601 });

602}

603

604

605bool FormatTokenLexer::precedesOperand(FormatToken *Tok) {

606

607

608

609 return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,

610 tok::r_brace, tok::l_square, tok::semi, tok::exclaim,

611 tok::colon, tok::question, tok::tilde) ||

612 Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,

613 tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,

615 Tok->isBinaryOperator();

616}

617

618bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) {

619 if (!Prev)

620 return true;

621

622

623

624

625

626

627

628 if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim))

629 return Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]);

630

631

632

633 if (!precedesOperand(Prev))

634 return false;

635

636 return true;

637}

638

639

640

641

642

643void FormatTokenLexer::tryParseJSRegexLiteral() {

644 FormatToken *RegexToken = Tokens.back();

645 if (!RegexToken->isOneOf(tok::slash, tok::slashequal))

646 return;

647

648 FormatToken *Prev = nullptr;

649 for (FormatToken *FT : llvm::drop_begin(llvm::reverse(Tokens))) {

650

651

652 if (FT->isNot(tok::comment)) {

653 Prev = FT;

654 break;

655 }

656 }

657

658 if (!canPrecedeRegexLiteral(Prev))

659 return;

660

661

662 const char *Offset = Lex->getBufferLocation();

663 const char *RegexBegin = Offset - RegexToken->TokenText.size();

664 StringRef Buffer = Lex->getBuffer();

665 bool InCharacterClass = false;

666 bool HaveClosingSlash = false;

667 for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {

668

669

670

671 switch (*Offset) {

672 case '\\':

673

674 ++Offset;

675 break;

676 case '[':

677 InCharacterClass = true;

678 break;

679 case ']':

680 InCharacterClass = false;

681 break;

682 case '/':

683 if (!InCharacterClass)

684 HaveClosingSlash = true;

685 break;

686 }

687 }

688

689 RegexToken->setType(TT_RegexLiteral);

690

691 RegexToken->Tok.setKind(tok::string_literal);

692 RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);

693 RegexToken->ColumnWidth = RegexToken->TokenText.size();

694

695 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));

696}

697

699 bool Interpolated) {

700 auto Repeated = [&Begin, End]() {

702 };

703

704

705

706

707

708

709

710

711

712

713

714

715

716 for (int UnmatchedOpeningBraceCount = 0; Begin < End; ++Begin) {

717 switch (*Begin) {

718 case '\\':

719 if (!Verbatim)

721 break;

722 case '{':

723 if (Interpolated) {

724

725 if (Repeated())

727 else

728 ++UnmatchedOpeningBraceCount;

729 }

730 break;

731 case '}':

732 if (Interpolated) {

733

734 if (Repeated())

736 else if (UnmatchedOpeningBraceCount > 0)

737 --UnmatchedOpeningBraceCount;

738 else

739 return End;

740 }

741 break;

742 case '"':

743 if (UnmatchedOpeningBraceCount > 0)

744 break;

745

746 if (Verbatim && Repeated()) {

748 break;

749 }

751 }

752 }

753

754 return End;

755}

756

757void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() {

758 FormatToken *CSharpStringLiteral = Tokens.back();

759

760 if (CSharpStringLiteral->isNot(TT_CSharpStringLiteral))

761 return;

762

763 auto &TokenText = CSharpStringLiteral->TokenText;

764

765 bool Verbatim = false;

766 bool Interpolated = false;

767 if (TokenText.starts_with(R"($@")") || TokenText.starts_with(R"(@$")")) {

768 Verbatim = true;

769 Interpolated = true;

770 } else if (TokenText.starts_with(R"(@")")) {

771 Verbatim = true;

772 } else if (TokenText.starts_with(R"($")")) {

773 Interpolated = true;

774 }

775

776

777 if (!Verbatim && !Interpolated)

778 return;

779

780 const char *StrBegin = Lex->getBufferLocation() - TokenText.size();

781 const char *Offset = StrBegin;

782 if (Verbatim && Interpolated)

783 Offset += 3;

784 else

785 Offset += 2;

786

787 const auto End = Lex->getBuffer().end();

788 Offset = lexCSharpString(Offset, End, Verbatim, Interpolated);

789

790

791

792 if (Offset >= End)

793 return;

794

795 StringRef LiteralText(StrBegin, Offset - StrBegin + 1);

796 TokenText = LiteralText;

797

798

799 size_t FirstBreak = LiteralText.find('\n');

800 StringRef FirstLineText = FirstBreak == StringRef::npos

801 ? LiteralText

802 : LiteralText.substr(0, FirstBreak);

804 FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth,

805 Encoding);

806 size_t LastBreak = LiteralText.rfind('\n');

807 if (LastBreak != StringRef::npos) {

808 CSharpStringLiteral->IsMultiline = true;

809 unsigned StartColumn = 0;

810 CSharpStringLiteral->LastLineColumnWidth =

812 StartColumn, Style.TabWidth, Encoding);

813 }

814

815 assert(Offset < End);

816 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset + 1)));

817}

818

819void FormatTokenLexer::handleTableGenMultilineString() {

820 FormatToken *MultiLineString = Tokens.back();

821 if (MultiLineString->isNot(TT_TableGenMultiLineString))

822 return;

823

824 auto OpenOffset = Lex->getCurrentBufferOffset() - 2 ;

825

826 auto CloseOffset = Lex->getBuffer().find("}]", OpenOffset);

827 if (CloseOffset == StringRef::npos)

828 return;

829 auto Text = Lex->getBuffer().substr(OpenOffset, CloseOffset - OpenOffset + 2);

830 MultiLineString->TokenText = Text;

832 Lex->getSourceLocation(Lex->getBufferLocation() - 2 + Text.size())));

833 auto FirstLineText = Text;

834 auto FirstBreak = Text.find('\n');

835

836 if (FirstBreak != StringRef::npos) {

837 MultiLineString->IsMultiline = true;

838 FirstLineText = Text.substr(0, FirstBreak + 1);

839

840 auto LastBreak = Text.rfind('\n');

842 Text.substr(LastBreak + 1), MultiLineString->OriginalColumn,

844 }

845

847 FirstLineText, MultiLineString->OriginalColumn, Style.TabWidth, Encoding);

848}

849

850void FormatTokenLexer::handleTableGenNumericLikeIdentifier() {

851 FormatToken *Tok = Tokens.back();

852

853

854 if (Tok->isNot(tok::numeric_constant))

855 return;

856 StringRef Text = Tok->TokenText;

857

858

859

860

861

862

863

864

865 if (Text.size() < 1 || Text[0] == '+' || Text[0] == '-')

866 return;

867 const auto NonDigitPos = Text.find_if([](char C) { return !isdigit(C); });

868

869 if (NonDigitPos == StringRef::npos)

870 return;

871 char FirstNonDigit = Text[NonDigitPos];

872 if (NonDigitPos < Text.size() - 1) {

873 char TheNext = Text[NonDigitPos + 1];

874

875 if (FirstNonDigit == 'b' && (TheNext == '0' || TheNext == '1'))

876 return;

877

878 if (FirstNonDigit == 'x' && isxdigit(TheNext))

879 return;

880 }

881 if (isalpha(FirstNonDigit) || FirstNonDigit == '_') {

882

883 Tok->Tok.setKind(tok::identifier);

884 Tok->Tok.setIdentifierInfo(nullptr);

885 }

886}

887

888void FormatTokenLexer::handleTemplateStrings() {

889 FormatToken *BacktickToken = Tokens.back();

890

891 if (BacktickToken->is(tok::l_brace)) {

893 return;

894 }

895 if (BacktickToken->is(tok::r_brace)) {

896 if (StateStack.size() == 1)

897 return;

898 StateStack.pop();

900 return;

901

902 } else if (BacktickToken->is(tok::unknown) &&

903 BacktickToken->TokenText == "`") {

905 } else {

906 return;

907 }

908

909

910 const char *Offset = Lex->getBufferLocation();

911 const char *TmplBegin = Offset - BacktickToken->TokenText.size();

912 for (; Offset != Lex->getBuffer().end(); ++Offset) {

913 if (Offset[0] == '`') {

914 StateStack.pop();

915 ++Offset;

916 break;

917 }

918 if (Offset[0] == '\\') {

919 ++Offset;

920 } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' &&

921 Offset[1] == '{') {

922

924 Offset += 2;

925 break;

926 }

927 }

928

929 StringRef LiteralText(TmplBegin, Offset - TmplBegin);

930 BacktickToken->setType(TT_TemplateString);

931 BacktickToken->Tok.setKind(tok::string_literal);

932 BacktickToken->TokenText = LiteralText;

933

934

935 size_t FirstBreak = LiteralText.find('\n');

936 StringRef FirstLineText = FirstBreak == StringRef::npos

937 ? LiteralText

938 : LiteralText.substr(0, FirstBreak);

940 FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding);

941 size_t LastBreak = LiteralText.rfind('\n');

942 if (LastBreak != StringRef::npos) {

943 BacktickToken->IsMultiline = true;

944 unsigned StartColumn = 0;

945 BacktickToken->LastLineColumnWidth =

947 StartColumn, Style.TabWidth, Encoding);

948 }

949

950 SourceLocation loc = Lex->getSourceLocation(Offset);

952}

953

954void FormatTokenLexer::tryParsePythonComment() {

955 FormatToken *HashToken = Tokens.back();

956 if (!HashToken->isOneOf(tok::hash, tok::hashhash))

957 return;

958

959 const char *CommentBegin =

960 Lex->getBufferLocation() - HashToken->TokenText.size();

961 size_t From = CommentBegin - Lex->getBuffer().begin();

962 size_t To = Lex->getBuffer().find_first_of('\n', From);

963 if (To == StringRef::npos)

964 To = Lex->getBuffer().size();

965 size_t Len = To - From;

966 HashToken->setType(TT_LineComment);

967 HashToken->Tok.setKind(tok::comment);

968 HashToken->TokenText = Lex->getBuffer().substr(From, Len);

969 SourceLocation Loc = To < Lex->getBuffer().size()

970 ? Lex->getSourceLocation(CommentBegin + Len)

973}

974

975bool FormatTokenLexer::tryMerge_TMacro() {

976 if (Tokens.size() < 4)

977 return false;

978 FormatToken *Last = Tokens.back();

979 if (Last->isNot(tok::r_paren))

980 return false;

981

982 FormatToken *String = Tokens[Tokens.size() - 2];

983 if (String->isNot(tok::string_literal) || String->IsMultiline)

984 return false;

985

986 if (Tokens[Tokens.size() - 3]->isNot(tok::l_paren))

987 return false;

988

989 FormatToken *Macro = Tokens[Tokens.size() - 4];

990 if (Macro->TokenText != "_T")

991 return false;

992

993 const char *Start = Macro->TokenText.data();

994 const char *End = Last->TokenText.data() + Last->TokenText.size();

995 String->TokenText = StringRef(Start, End - Start);

996 String->IsFirst = Macro->IsFirst;

997 String->LastNewlineOffset = Macro->LastNewlineOffset;

998 String->WhitespaceRange = Macro->WhitespaceRange;

999 String->OriginalColumn = Macro->OriginalColumn;

1001 String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);

1002 String->NewlinesBefore = Macro->NewlinesBefore;

1003 String->HasUnescapedNewline = Macro->HasUnescapedNewline;

1004

1005 Tokens.pop_back();

1006 Tokens.pop_back();

1007 Tokens.pop_back();

1008 Tokens.back() = String;

1009 if (FirstInLineIndex >= Tokens.size())

1010 FirstInLineIndex = Tokens.size() - 1;

1011 return true;

1012}

1013

1014bool FormatTokenLexer::tryMergeConflictMarkers() {

1015 if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof))

1016 return false;

1017

1018

1019

1020

1021

1022

1023

1024

1025

1026

1027 FileID ID;

1028

1029 unsigned FirstInLineOffset;

1030 std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc(

1031 Tokens[FirstInLineIndex]->getStartOfNonWhitespace());

1032 StringRef Buffer = SourceMgr.getBufferOrFake(ID).getBuffer();

1033

1034 auto LineOffset = Buffer.rfind('\n', FirstInLineOffset);

1035 if (LineOffset == StringRef::npos)

1036 LineOffset = 0;

1037 else

1038 ++LineOffset;

1039

1040 auto FirstSpace = Buffer.find_first_of(" \n", LineOffset);

1041 StringRef LineStart;

1042 if (FirstSpace == StringRef::npos)

1043 LineStart = Buffer.substr(LineOffset);

1044 else

1045 LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset);

1046

1048 if (LineStart == "<<<<<<<" || LineStart == ">>>>") {

1049 Type = TT_ConflictStart;

1050 } else if (LineStart == "|||||||" || LineStart == "=======" ||

1051 LineStart == "====") {

1052 Type = TT_ConflictAlternative;

1053 } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") {

1054 Type = TT_ConflictEnd;

1055 }

1056

1057 if (Type != TT_Unknown) {

1058 FormatToken *Next = Tokens.back();

1059

1060 Tokens.resize(FirstInLineIndex + 1);

1061

1062

1063

1064 Tokens.back()->setType(Type);

1065 Tokens.back()->Tok.setKind(tok::kw___unknown_anytype);

1066

1067 Tokens.push_back(Next);

1068 return true;

1069 }

1070

1071 return false;

1072}

1073

1074FormatToken *FormatTokenLexer::getStashedToken() {

1075

1076 Token Tok = FormatTok->Tok;

1077 StringRef TokenText = FormatTok->TokenText;

1078

1079 unsigned OriginalColumn = FormatTok->OriginalColumn;

1080 FormatTok = new (Allocator.Allocate()) FormatToken;

1081 FormatTok->Tok = Tok;

1082 SourceLocation TokLocation =

1085 FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation);

1086 FormatTok->TokenText = TokenText;

1089

1090 return FormatTok;

1091}

1092

1093

1094

1095

1096

1097

1098

1099void FormatTokenLexer::truncateToken(size_t NewLen) {

1100 assert(NewLen <= FormatTok->TokenText.size());

1101 resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(

1102 Lex->getBufferLocation() - FormatTok->TokenText.size() + NewLen)));

1106 Encoding);

1108}

1109

1110

1112

1113

1114

1115

1116

1117 const unsigned char *const Begin = Text.bytes_begin();

1118 const unsigned char *const End = Text.bytes_end();

1119 const unsigned char *Cur = Begin;

1120 while (Cur < End) {

1121 if (isspace(Cur[0])) {

1122 ++Cur;

1123 } else if (Cur[0] == '\\' && (Cur[1] == '\n' || Cur[1] == '\r')) {

1124

1125

1126

1127

1128

1129 assert(End - Cur >= 2);

1130 Cur += 2;

1131 } else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&

1132 (Cur[3] == '\n' || Cur[3] == '\r')) {

1133

1134

1135

1136 assert(End - Cur >= 4);

1137 Cur += 4;

1138 } else {

1139 break;

1140 }

1141 }

1142 return Cur - Begin;

1143}

1144

1145FormatToken *FormatTokenLexer::getNextToken() {

1147 StateStack.pop();

1148 return getStashedToken();

1149 }

1150

1151 FormatTok = new (Allocator.Allocate()) FormatToken;

1152 readRawToken(*FormatTok);

1153 SourceLocation WhitespaceStart =

1155 FormatTok->IsFirst = IsFirstToken;

1156 IsFirstToken = false;

1157

1158

1159

1160

1161

1162 unsigned WhitespaceLength = TrailingWhitespace;

1163 while (FormatTok->isNot(tok::eof)) {

1165 if (LeadingWhitespace == 0)

1166 break;

1167 if (LeadingWhitespace < FormatTok->TokenText.size())

1168 truncateToken(LeadingWhitespace);

1170 bool InEscape = false;

1171 for (int i = 0, e = Text.size(); i != e; ++i) {

1172 switch (Text[i]) {

1173 case '\r':

1174

1175

1176

1177 if (i + 1 < e && Text[i + 1] == '\n')

1178 break;

1179 [[fallthrough]];

1180 case '\n':

1182 if (!InEscape)

1184 else

1185 InEscape = false;

1187 Column = 0;

1188 break;

1189 case '\f':

1191

1192 i > 0 && Text[i - 1] == '\n' &&

1193 ((i + 1 < e && Text[i + 1] == '\n') ||

1194 (i + 2 < e && Text[i + 1] == '\r' && Text[i + 2] == '\n'))) {

1196 }

1197 [[fallthrough]];

1198 case '\v':

1199 Column = 0;

1200 break;

1201 case ' ':

1202 ++Column;

1203 break;

1204 case '\t':

1205 Column +=

1207 break;

1208 case '\\':

1209 case '?':

1210 case '/':

1211

1212

1213 assert(Text.substr(i, 2) == "\\\r" || Text.substr(i, 2) == "\\\n" ||

1214 Text.substr(i, 4) == "\?\?/\r" ||

1215 Text.substr(i, 4) == "\?\?/\n" ||

1216 (i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||

1217 Text.substr(i - 1, 4) == "\?\?/\n")) ||

1218 (i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||

1219 Text.substr(i - 2, 4) == "\?\?/\n")));

1220 InEscape = true;

1221 break;

1222 default:

1223

1224 assert(false);

1225 break;

1226 }

1227 }

1228 WhitespaceLength += Text.size();

1229 readRawToken(*FormatTok);

1230 }

1231

1232 if (FormatTok->is(tok::unknown))

1233 FormatTok->setType(TT_ImplicitStringLiteral);

1234

1235

1236

1237

1238

1239

1240

1241

1243 FormatTok->is(tok::comment) && FormatTok->TokenText.starts_with("//")) {

1244 size_t BackslashPos = FormatTok->TokenText.find('\\');

1245 while (BackslashPos != StringRef::npos) {

1246 if (BackslashPos + 1 < FormatTok->TokenText.size() &&

1247 FormatTok->TokenText[BackslashPos + 1] == '\n') {

1248 truncateToken(BackslashPos + 1);

1249 break;

1250 }

1251 BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1);

1252 }

1253 }

1254

1256 static const llvm::Regex NumberBase("^s?[bdho]", llvm::Regex::IgnoreCase);

1257 SmallVector<StringRef, 1> Matches;

1258

1259

1260

1261

1262

1263 if (FormatTok->is(tok::numeric_constant)) {

1264

1265 auto Quote = FormatTok->TokenText.find('\'');

1266 if (Quote != StringRef::npos)

1267 truncateToken(Quote);

1268 } else if (FormatTok->isOneOf(tok::hash, tok::hashhash)) {

1269 FormatTok->Tok.setKind(tok::raw_identifier);

1270 } else if (FormatTok->is(tok::raw_identifier)) {

1271 if (FormatTok->TokenText == "`") {

1274 } else if (FormatTok->TokenText == "``") {

1276 FormatTok->Tok.setKind(tok::hashhash);

1277 } else if (Tokens.size() > 0 &&

1279 NumberBase.match(FormatTok->TokenText, &Matches)) {

1280

1281

1282

1283

1284 truncateToken(Matches[0].size());

1286 }

1287 }

1288 }

1289

1291 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));

1292

1294

1295 TrailingWhitespace = 0;

1296 if (FormatTok->is(tok::comment)) {

1297

1298 StringRef UntrimmedText = FormatTok->TokenText;

1300 TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();

1301 } else if (FormatTok->is(tok::raw_identifier)) {

1302 IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);

1304 FormatTok->Tok.setKind(Info.getTokenID());

1306 FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,

1307 tok::kw_operator)) {

1308 FormatTok->Tok.setKind(tok::identifier);

1311 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,

1312 tok::kw_operator)) {

1313 FormatTok->Tok.setKind(tok::identifier);

1316 FormatTok->Tok.setKind(tok::identifier);

1318 }

1319 } else if (FormatTok->is(tok::greatergreater)) {

1320 FormatTok->Tok.setKind(tok::greater);

1322 ++Column;

1324 } else if (FormatTok->is(tok::lessless)) {

1327 ++Column;

1329 }

1330

1331 if (Style.isVerilog() && Tokens.size() > 0 &&

1332 Tokens.back()->is(TT_VerilogNumberBase) &&

1333 FormatTok->Tok.isOneOf(tok::identifier, tok::question)) {

1334

1335 FormatTok->Tok.setKind(tok::numeric_constant);

1336 }

1337

1338

1339

1341 size_t FirstNewlinePos = Text.find('\n');

1342 if (FirstNewlinePos == StringRef::npos) {

1343

1344

1348 } else {

1350

1351

1353 Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);

1354

1355

1356

1358 Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding);

1360 }

1361

1362 if (Style.isCpp()) {

1365 if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() &&

1366 Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==

1367 tok::pp_define) &&

1368 it != Macros.end()) {

1369 FormatTok->setType(it->second);

1370 if (it->second == TT_IfMacro) {

1371

1372

1373

1374

1376 }

1377 } else if (FormatTok->is(tok::identifier)) {

1378 if (MacroBlockBeginRegex.match(Text))

1379 FormatTok->setType(TT_MacroBlockBegin);

1380 else if (MacroBlockEndRegex.match(Text))

1381 FormatTok->setType(TT_MacroBlockEnd);

1382 else if (TemplateNames.contains(Identifier))

1384 else if (TypeNames.contains(Identifier))

1386 else if (VariableTemplates.contains(Identifier))

1388 }

1389 }

1390

1391 return FormatTok;

1392}

1393

1394bool FormatTokenLexer::readRawTokenVerilogSpecific(Token &Tok) {

1395 const char *Start = Lex->getBufferLocation();

1396 size_t Len;

1397 switch (Start[0]) {

1398

1399 case '\'':

1400 Len = 1;

1401 break;

1402

1403

1404 case '`':

1405 if (Start[1] == '`')

1406 Len = 2;

1407 else

1408 Len = 1;

1409 break;

1410

1411

1412

1413

1414

1415 case '\\':

1416

1417

1418 if (Start[1] == '\r' || Start[1] == '\n')

1419 return false;

1420 Len = 1;

1421 while (Start[Len] != '\0' && Start[Len] != '\f' && Start[Len] != '\n' &&

1422 Start[Len] != '\r' && Start[Len] != '\t' && Start[Len] != '\v' &&

1423 Start[Len] != ' ') {

1424

1425

1426 if (Start[Len] == '\\' && Start[Len + 1] == '\r' &&

1427 Start[Len + 2] == '\n') {

1428 Len += 3;

1429 } else if (Start[Len] == '\\' &&

1430 (Start[Len + 1] == '\r' || Start[Len + 1] == '\n')) {

1431 Len += 2;

1432 } else {

1433 Len += 1;

1434 }

1435 }

1436 break;

1437 default:

1438 return false;

1439 }

1440

1441

1442

1443

1444 Tok.setKind(tok::raw_identifier);

1445 Tok.setLength(Len);

1446 Tok.setLocation(Lex->getSourceLocation(Start, Len));

1447 Tok.setRawIdentifierData(Start);

1448 Lex->seek(Lex->getCurrentBufferOffset() + Len, false);

1449 return true;

1450}

1451

1452void FormatTokenLexer::readRawToken(FormatToken &Tok) {

1453

1454

1455 if (!Style.isVerilog() || !readRawTokenVerilogSpecific(Tok.Tok))

1456 Lex->LexFromRawLexer(Tok.Tok);

1457 Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),

1458 Tok.Tok.getLength());

1459

1460

1461 if (Tok.is(tok::unknown)) {

1462 if (Tok.TokenText.starts_with("\"")) {

1463 Tok.Tok.setKind(tok::string_literal);

1464 Tok.IsUnterminatedLiteral = true;

1465 } else if (Style.isJavaScript() && Tok.TokenText == "''") {

1466 Tok.Tok.setKind(tok::string_literal);

1467 }

1468 }

1469

1470 if ((Style.isJavaScript() || Style.isProto()) && Tok.is(tok::char_constant))

1471 Tok.Tok.setKind(tok::string_literal);

1472

1473 if (Tok.is(tok::comment) && isClangFormatOn(Tok.TokenText))

1474 FormattingDisabled = false;

1475

1476 Tok.Finalized = FormattingDisabled;

1477

1478 if (Tok.is(tok::comment) && isClangFormatOff(Tok.TokenText))

1479 FormattingDisabled = true;

1480}

1481

1482void FormatTokenLexer::resetLexer(unsigned Offset) {

1485 Buffer.begin(), Buffer.begin() + Offset, Buffer.end()));

1486 Lex->SetKeepWhitespaceMode(true);

1487 TrailingWhitespace = 0;

1488}

1489

1490}

1491}

This file contains FormatTokenLexer, which tokenizes a source file into a token stream suitable for C...

This file contains the declaration of the FormatToken, a wrapper around Token with additional informa...

Various functions to configurably format source code.

Defines the clang::SourceLocation class and associated facilities.

Defines the SourceManager interface.

An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...

Implements an efficient mapping from strings to IdentifierInfo nodes.

IdentifierInfo & get(StringRef Name)

Return the identifier token info for the specified named identifier.

Lexer - This provides a simple interface that turns a text buffer into a stream of tokens.

SourceLocation getLocWithOffset(IntTy Offset) const

Return a source location with the specified offset from this SourceLocation.

This class handles loading and caching of source files into memory.

unsigned getFileOffset(SourceLocation SpellingLoc) const

Returns the offset from the start of the file that the specified SourceLocation represents.

StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const

Return a StringRef to the source buffer data for the specified FileID.

SourceLocation getLocForEndOfFile(FileID FID) const

Return the source location corresponding to the last byte of the specified file.

const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const

Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer.

llvm::MemoryBufferRef getBufferOrFake(FileID FID, SourceLocation Loc=SourceLocation()) const

Return the buffer for the specified FileID.

std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const

Decompose the specified location into a raw FileID + Offset pair.

SourceLocation getLocForStartOfFile(FileID FID) const

Return the source location corresponding to the first byte of the specified file.

IdentifierInfo * getIdentifierInfo() const

SourceLocation getLocation() const

Return a source location identifier for the specified offset in the current file.

void setLength(unsigned Len)

void setKind(tok::TokenKind K)

void setLocation(SourceLocation L)

bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const

void setIdentifierInfo(IdentifierInfo *II)

FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding, llvm::SpecificBumpPtrAllocator< FormatToken > &Allocator, IdentifierTable &IdentTable)

ArrayRef< FormatToken * > lex()

unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)

Returns the number of columns required to display the Text, starting from the StartColumn on a termin...

static auto lexCSharpString(const char *Begin, const char *End, bool Verbatim, bool Interpolated)

static size_t countLeadingWhitespace(StringRef Text)

Count the length of leading whitespace in a token.

bool isClangFormatOff(StringRef Comment)

LangOptions getFormattingLangOpts(const FormatStyle &Style=getLLVMStyle())

Returns the LangOpts that the formatter expects you to set.

bool isClangFormatOn(StringRef Comment)

TokenType

Determines the semantic type of a syntactic token, e.g.

TokenKind

Provides a simple uniform namespace for tokens from all C languages.

The JSON file list parser is used to communicate input to InstallAPI.

IdentifierInfo * kw_apostrophe

bool isTableGenKeyword(const FormatToken &Tok) const

bool isCSharpKeyword(const FormatToken &Tok) const

Returns true if Tok is a C# keyword, returns false if it is a anything else.

IdentifierInfo * kw_instanceof

The FormatStyle is used to configure the formatting to follow specific guidelines.

@ LK_Java

Should be used for Java.

@ LK_TextProto

Should be used for Protocol Buffer messages in text format (https://developers.google....

std::vector< std::string > AttributeMacros

This option is renamed to BreakTemplateDeclarations.

bool KeepFormFeed

This option is deprecated.

std::vector< std::string > VariableTemplates

A vector of non-keyword identifiers that should be interpreted as variable template names.

std::string MacroBlockBegin

A regular expression matching macros that start a block.

LanguageKind Language

Language, this format style is targeted at.

unsigned TabWidth

The number of columns used for tab stops.

std::vector< std::string > StatementAttributeLikeMacros

Macros which are ignored in front of a statement, as if they were an attribute.

std::vector< std::string > IfMacros

A vector of macros that should be interpreted as conditionals instead of as function calls.

std::vector< std::string > ForEachMacros

A vector of macros that should be interpreted as foreach loops instead of as function calls.

std::vector< std::string > TypeNames

A vector of non-keyword identifiers that should be interpreted as type names.

std::vector< std::string > WhitespaceSensitiveMacros

A vector of macros which are whitespace-sensitive and should not be touched.

std::vector< std::string > TemplateNames

A vector of non-keyword identifiers that should be interpreted as template names.

bool isJavaScript() const

std::vector< std::string > NamespaceMacros

A vector of macros which are used to open namespace blocks.

std::vector< std::string > StatementMacros

A vector of macros that should be interpreted as complete statements.

std::string MacroBlockEnd

A regular expression matching macros that end a block.

bool InsertNewlineAtEOF

Insert a newline at end of file if missing.

std::vector< std::string > TypenameMacros

A vector of macros that should be interpreted as type declarations instead of as function calls.

unsigned OriginalColumn

The original 0-based column of this token, including expanded tabs.

StringRef TokenText

The raw text of the token.

unsigned LastNewlineOffset

The offset just past the last ' ' in this token's leading whitespace (relative to WhiteSpaceStart).

unsigned IsMultiline

Whether the token text contains newlines (escaped or not).

unsigned NewlinesBefore

The number of newlines immediately before the Token.

unsigned HasUnescapedNewline

Whether there is at least one unescaped newline before the Token.

unsigned ColumnWidth

The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...

void setType(TokenType T)

bool HasFormFeedBefore

Has "\n\f\n" or "\n\f\r\n" before TokenText.

bool is(tok::TokenKind Kind) const

unsigned LastLineColumnWidth

Contains the width in columns of the last line of a multi-line token.

bool isOneOf(A K1, B K2) const

unsigned IsFirst

Indicates that this is the first token of the file.

SourceRange WhitespaceRange

The range of the whitespace immediately preceding the Token.

void setFinalizedType(TokenType T)

Sets the type and also the finalized flag.