clang: lib/Format/BreakableToken.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

19#include "llvm/ADT/STLExtras.h"

20#include "llvm/Support/Debug.h"

21#include

22

23#define DEBUG_TYPE "format-token-breaker"

24

26namespace format {

27

28static constexpr StringRef Blanks = " \t\v\f\r";

30 switch (C) {

31 case ' ':

32 case '\t':

33 case '\v':

34 case '\f':

35 case '\r':

36 return true;

37 default:

38 return false;

39 }

40}

41

44 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",

45 "//!", "//:", "//"};

46 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",

47 "//", "#"};

50 KnownPrefixes = KnownTextProtoPrefixes;

51

52 assert(

53 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {

54 return Lhs.size() > Rhs.size();

55 }));

56

57 for (StringRef KnownPrefix : KnownPrefixes) {

58 if (Comment.starts_with(KnownPrefix)) {

59 const auto PrefixLength =

60 Comment.find_first_not_of(' ', KnownPrefix.size());

61 return Comment.substr(0, PrefixLength);

62 }

63 }

64 return {};

65}

66

69 unsigned ColumnLimit, unsigned TabWidth,

71 bool DecorationEndsWithStar = false) {

72 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text

73 << "\", Column limit: " << ColumnLimit

74 << ", Content start: " << ContentStartColumn << "\n");

75 if (ColumnLimit <= ContentStartColumn + 1)

77

78 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;

79 unsigned MaxSplitBytes = 0;

80

81 for (unsigned NumChars = 0;

82 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {

83 unsigned BytesInChar =

86 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,

87 TabWidth, Encoding);

88 MaxSplitBytes += BytesInChar;

89 }

90

91

92

93

95 StringRef::size_type SpaceOffset =

96 Text.find_first_of(Blanks, MaxSplitBytes);

97 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&

98 Text[SpaceOffset + 1] == '{') {

99 MaxSplitBytes = SpaceOffset + 1;

100 }

101 }

102

103 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);

104

105 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");

106

107 while (SpaceOffset != StringRef::npos) {

108

109

110

111

112

113 if (Style.isCpp()) {

114 StringRef::size_type LastNonBlank =

115 Text.find_last_not_of(Blanks, SpaceOffset);

116 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {

117 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);

118 continue;

119 }

120 }

121

122

123

124 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {

125 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);

126 continue;

127 }

128

129

131 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {

132 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);

133 continue;

134 }

135

136 break;

137 }

138

139 if (SpaceOffset == StringRef::npos ||

140

141 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {

142

143

144 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);

145 if (FirstNonWhitespace == StringRef::npos) {

146

148 }

149 SpaceOffset = Text.find_first_of(

150 Blanks, std::max(MaxSplitBytes, FirstNonWhitespace));

151 }

152 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {

153

154

155

156

157 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')

159 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);

160 StringRef AfterCut = Text.substr(SpaceOffset);

161

162 if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')

163 AfterCut = AfterCut.ltrim(Blanks);

165 AfterCut.begin() - BeforeCut.end());

166 }

168}

169

173

174 if (Text.empty())

176 if (ColumnLimit <= UsedColumns)

178 unsigned MaxSplit = ColumnLimit - UsedColumns;

179 StringRef::size_type SpaceOffset = 0;

180 StringRef::size_type SlashOffset = 0;

181 StringRef::size_type WordStartOffset = 0;

182 StringRef::size_type SplitPoint = 0;

183 for (unsigned Chars = 0;;) {

184 unsigned Advance;

185 if (Text[0] == '\\') {

187 Chars += Advance;

188 } else {

191 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);

192 }

193

194 if (Chars > MaxSplit || Text.size() <= Advance)

195 break;

196

198 SpaceOffset = SplitPoint;

199 if (Text[0] == '/')

200 SlashOffset = SplitPoint;

202 WordStartOffset = SplitPoint;

203

204 SplitPoint += Advance;

206 }

207

208 if (SpaceOffset != 0)

210 if (SlashOffset != 0)

212 if (WordStartOffset != 0)

214 if (SplitPoint != 0)

217}

218

220 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&

221 "formatting regions are switched by comment tokens");

222 StringRef Content = Token.TokenText.substr(2).ltrim();

223 return Content.starts_with("clang-format on") ||

224 Content.starts_with("clang-format off");

225}

226

227unsigned

230

231

232

233

234

235

236

237

238

239

240

241 return RemainingTokenColumns + 1 - Split.second;

242}

243

245

247 unsigned Offset,

248 StringRef::size_type Length,

249 unsigned StartColumn) const {

250 llvm_unreachable("Getting the length of a part of the string literal "

251 "indicates that the code tries to reflow it.");

252}

253

254unsigned

256 unsigned StartColumn) const {

260}

261

263 bool Break) const {

265}

266

268 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,

269 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,

271 : BreakableToken(Tok, InPPDirective, Encoding, Style),

272 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),

273 UnbreakableTailLength(UnbreakableTailLength) {

277}

278

280 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

281 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {

284}

285

288 unsigned ContentIndent,

290 Whitespaces.replaceWhitespaceInToken(

293}

294

297 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,

300 Tok, StartColumn, QuoteStyle == SingleQuotes ? "'"

301 : QuoteStyle == AtDoubleQuotes ? "@\""

302 : "\"",

303 QuoteStyle == SingleQuotes ? "'" : "\"",

304 UnbreakableTailLength, InPPDirective, Encoding, Style),

305 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),

306 QuoteStyle(QuoteStyle) {

307

308

309

310

311

312

313

314 bool SignOnNewLine =

317

319

320

326 } else {

327

328

332 Postfix = SignOnNewLine ? "'" : "' +";

333 Prefix = SignOnNewLine ? "+ '" : "'";

334 } else {

337 Prefix = SignOnNewLine ? "+ @\"" : "@\"";

338 } else {

340 Prefix = SignOnNewLine ? "+ \"" : "\"";

341 }

343 Postfix = SignOnNewLine ? "\"" : "\" +";

344 }

345 }

346

347

349

350

354 }

355}

356

358 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {

362}

363

364unsigned

366 bool Break) const {

367 return std::max(

368 0,

372 : 0) +

374}

375

377 unsigned LineIndex, unsigned TailOffset, Split Split,

379 Whitespaces.replaceWhitespaceInToken(

382 Split.second, Postfix,

384

386}

387

390

392 return;

393

394

395

396

397

398

399 Whitespaces.replaceWhitespaceInToken(

400 Tok, 0, 1, "",

402 0);

403 Whitespaces.replaceWhitespaceInToken(

404 Tok, Tok.TokenText.size() - 1, 1,

406 "", InPPDirective, 0, 0);

407}

408

410 unsigned StartColumn, bool InPPDirective,

414 StartColumn(StartColumn) {}

415

417

420 unsigned ColumnLimit, unsigned ContentStartColumn,

421 const llvm::Regex &CommentPragmasRegex) const {

422

424 return Split(StringRef::npos, 0);

428}

429

431 unsigned LineIndex, unsigned TailOffset, Split Split,

433 StringRef Text = Content[LineIndex].substr(TailOffset);

434

435

436

437

438 unsigned BreakOffsetInToken =

440 unsigned CharsToRemove = Split.second;

441 Whitespaces.replaceWhitespaceInToken(

442 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",

443 false, 0, 1);

444}

445

448}

449

451 Content = Content.trim(Blanks);

452

453

454 bool hasSpecialMeaningPrefix = false;

455 for (StringRef Prefix :

456 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {

457 if (Content.starts_with(Prefix)) {

458 hasSpecialMeaningPrefix = true;

459 break;

460 }

461 }

462

463

464

465

466 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");

467 hasSpecialMeaningPrefix =

468 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);

469

470

471

472

473 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&

474 !Content.ends_with("\\") &&

475

476

478}

479

482 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,

485 DelimitersOnNewline(false),

486 UnbreakableTailLength(Token.UnbreakableTailLength) {

487 assert(Tok.is(TT_BlockComment) &&

488 "block comment section must start with a block comment");

489

491 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));

492 TokenText.substr(2, TokenText.size() - 4)

493 .split(Lines, UseCRLF ? "\r\n" : "\n");

494

495 int IndentDelta = StartColumn - OriginalStartColumn;

499

502 for (size_t i = 1; i < Lines.size(); ++i)

503 adjustWhitespace(i, IndentDelta);

504

505

506

508

509

510

511

512

513

514 if (Lines.size() >= 2 && Content[1].starts_with("**") &&

517 }

518

519 Decoration = "* ";

520 if (Lines.size() == 1 && !FirstInLine) {

521

522

523

524

525

526 Decoration = "";

527 }

528 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {

530 if (i + 1 == e) {

531

532 if (Text.empty())

533 break;

534 } else if (Text.empty() && Decoration.starts_with(Text)) {

535 continue;

536 }

537 while (Text.starts_with(Decoration))

538 Decoration = Decoration.drop_back(1);

539 }

540

541 LastLineNeedsDecoration = true;

543 for (size_t i = 1, e = Lines.size(); i < e; ++i) {

544 if (Content[i].empty()) {

545 if (i + 1 == e) {

546

547

548

549 LastLineNeedsDecoration = false;

550

551 if (e >= 2 && !Decoration.empty())

553 } else if (Decoration.empty()) {

554

555

557 }

558 continue;

559 }

560

561

562

563

564

565 unsigned DecorationSize = Decoration.starts_with(Content[i])

567 : Decoration.size();

568 if (DecorationSize)

569 ContentColumn[i] = DecorationColumn + DecorationSize;

571 if (!Decoration.starts_with(Content[i])) {

572 IndentAtLineBreak =

573 std::min(IndentAtLineBreak, std::max(0, ContentColumn[i]));

574 }

575 }

576 IndentAtLineBreak = std::max(IndentAtLineBreak, Decoration.size());

577

578

580 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {

581

582 DelimitersOnNewline = true;

583 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {

584

585

586

587 unsigned EndColumn =

591 2;

593 }

594 }

595

596 LLVM_DEBUG({

597 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";

598 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";

599 for (size_t i = 0; i < Lines.size(); ++i) {

600 llvm::dbgs() << i << " |" << Content[i] << "| "

602 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";

603 }

604 });

605}

606

608 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,

609 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {

610

612 return Split(StringRef::npos, 0);

616}

617

618void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,

619 int IndentDelta) {

620

621

622

623

624

625 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();

627 --EndOfPreviousLine;

628

629

630 EndOfPreviousLine =

631 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);

632 if (EndOfPreviousLine == StringRef::npos)

633 EndOfPreviousLine = 0;

634 else

635 ++EndOfPreviousLine;

636

637 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);

638 if (StartOfLine == StringRef::npos)

639 StartOfLine = Lines[LineIndex].size();

640

641 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);

642

643 size_t PreviousContentOffset =

644 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();

645 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(

646 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);

647 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);

648

649

652 IndentDelta;

653}

654

656 unsigned Offset,

657 StringRef::size_type Length,

658 unsigned StartColumn) const {

662}

663

665 unsigned Offset,

666 unsigned StartColumn) const {

667 unsigned LineLength =

668 UnbreakableTailLength +

670 if (LineIndex + 1 == Lines.size()) {

671 LineLength += 2;

672

673 bool HasRemainingText = Offset < Content[LineIndex].size();

674 if (!HasRemainingText) {

675 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);

676 if (HasDecoration)

677 LineLength -= Decoration.size();

678 }

679 }

680 return LineLength;

681}

682

684 bool Break) const {

685 if (Break)

686 return IndentAtLineBreak;

688}

689

690const llvm::StringSet<>

692 "@param", "@return", "@returns", "@throws", "@type", "@template",

693 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",

694};

695

698 return 0;

699

700

701

702 StringRef ContentWithNoDecoration = Content[LineIndex];

703 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))

704 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);

705 StringRef FirstWord = ContentWithNoDecoration.substr(

706 0, ContentWithNoDecoration.find_first_of(Blanks));

709 return 0;

710}

711

715 StringRef Text = Content[LineIndex].substr(TailOffset);

716 StringRef Prefix = Decoration;

717

718

719

720 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;

721 if (LineIndex + 1 == Lines.size() &&

723

724 Prefix = "";

725 if (LocalIndentAtLineBreak >= 2)

726 LocalIndentAtLineBreak -= 2;

727 }

728

729

730 unsigned BreakOffsetInToken =

732 unsigned CharsToRemove = Split.second;

733 assert(LocalIndentAtLineBreak >= Prefix.size());

734 std::string PrefixWithTrailingIndent = std::string(Prefix);

735 PrefixWithTrailingIndent.append(ContentIndent, ' ');

736 Whitespaces.replaceWhitespaceInToken(

737 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",

738 PrefixWithTrailingIndent, InPPDirective, 1,

739 LocalIndentAtLineBreak + ContentIndent -

740 PrefixWithTrailingIndent.size());

741}

742

744 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

745 if (mayReflow(LineIndex, CommentPragmasRegex))

746 return Split(StringRef::npos, 0);

747

748

749

750 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);

751 if (LineIndex) {

752 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);

753 if (PreviousContentIndent && Trimmed != StringRef::npos &&

754 Trimmed != PreviousContentIndent) {

755 return Split(StringRef::npos, 0);

756 }

757 }

758

759 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);

760}

761

763

764 return DelimitersOnNewline &&

765 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;

766}

767

770 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);

771

772 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&

773 "Reflowing whitespace within a token");

774

775

776 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

777 Content[LineIndex - 1].size() -

779 unsigned WhitespaceLength = TrimmedContent.data() -

781 WhitespaceOffsetInToken;

782 Whitespaces.replaceWhitespaceInToken(

783 tokenAt(LineIndex), WhitespaceOffsetInToken,

784 WhitespaceLength, "",

786 0);

787}

788

791 if (LineIndex == 0) {

792 if (DelimitersOnNewline) {

793

794

795

796

797 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);

798 if (BreakLength != StringRef::npos) {

799 insertBreak(LineIndex, 0, Split(1, BreakLength), 0,

800 Whitespaces);

801 }

802 }

803 return;

804 }

805

806

807 StringRef Prefix = Decoration;

808 if (Content[LineIndex].empty()) {

809 if (LineIndex + 1 == Lines.size()) {

810 if (!LastLineNeedsDecoration) {

811

812

813 Prefix = "";

814 }

815 } else if (!Decoration.empty()) {

816

817

818 Prefix = Prefix.substr(0, 1);

819 }

821

822 Prefix = Prefix.substr(0, 1);

823 }

824

825

826 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +

827 Content[LineIndex - 1].size() -

829 unsigned WhitespaceLength = Content[LineIndex].data() -

831 WhitespaceOffsetInToken;

832 Whitespaces.replaceWhitespaceInToken(

833 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,

835}

836

839 if (DelimitersOnNewline) {

840

841

842

843 StringRef Line = Content.back().substr(TailOffset);

844 StringRef TrimmedLine = Line.rtrim(Blanks);

845 if (!TrimmedLine.empty())

846 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());

847 }

848 return Split(StringRef::npos, 0);

849}

850

852 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

853

854

855 StringRef IndentContent = Content[LineIndex];

856 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))

857 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);

859 !CommentPragmasRegex.match(IndentContent) &&

862}

863

865 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,

868 assert(Tok.is(TT_LineComment) &&

869 "line comment section must start with a line comment");

872

873

874 int FirstLineSpaceChange = 0;

876 CurrentTok && CurrentTok->is(TT_LineComment);

877 CurrentTok = CurrentTok->Next) {

878 LastLineTok = LineTok;

879 StringRef TokenText(CurrentTok->TokenText);

880 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&

881 "unsupported line comment prefix, '//' and '#' are supported");

882 size_t FirstLineIndex = Lines.size();

883 TokenText.split(Lines, "\n");

886 PrefixSpaceChange.resize(Lines.size());

888 Prefix.resize(Lines.size());

889 OriginalPrefix.resize(Lines.size());

890 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {

893 OriginalPrefix[i] = IndentPrefix;

894 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');

895

896

897

898 const auto NoSpaceBeforeFirstCommentChar = [&]() {

899 assert(Lines[i].size() > IndentPrefix.size());

900 const char FirstCommentChar = Lines[i][IndentPrefix.size()];

901 const unsigned FirstCharByteSize =

904 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),

906 return false;

907 }

908

909

910

911

912

913

914

915

916

917 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))

918 return false;

919 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||

921 };

922

923

924

925

926

927

928 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=

929 OriginalPrefix[i - 1].rtrim(Blanks)) {

930 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&

931 !NoSpaceBeforeFirstCommentChar()) {

932 FirstLineSpaceChange = Minimum - SpacesInPrefix;

933 } else if (static_cast<unsigned>(SpacesInPrefix) >

935 FirstLineSpaceChange =

937 } else {

938 FirstLineSpaceChange = 0;

939 }

940 }

941

942 if (Lines[i].size() != IndentPrefix.size()) {

943 PrefixSpaceChange[i] = FirstLineSpaceChange;

944

945 if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {

946 PrefixSpaceChange[i] +=

947 Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);

948 }

949

950 assert(Lines[i].size() > IndentPrefix.size());

951 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];

952 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);

953 const bool LineRequiresLeadingSpace =

954 !NoSpaceBeforeFirstCommentChar() ||

955 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);

956 const bool AllowsSpaceChange =

957 !IsFormatComment &&

958 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);

959

960 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {

961 Prefix[i] = IndentPrefix.str();

962 Prefix[i].append(PrefixSpaceChange[i], ' ');

963 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {

964 Prefix[i] = IndentPrefix

965 .drop_back(std::minstd::size\_t(

966 -PrefixSpaceChange[i], SpacesInPrefix))

967 .str();

968 } else {

969 Prefix[i] = IndentPrefix.str();

970 }

971 } else {

972

973

974 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();

975 }

976

978 Content[i] = Lines[i].substr(IndentPrefix.size());

982

983

984 size_t EndOfLine = Content[i].find_last_not_of(Blanks);

985 if (EndOfLine == StringRef::npos)

986 EndOfLine = Content[i].size();

987 else

988 ++EndOfLine;

990 }

991 LineTok = CurrentTok->Next;

992 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {

993

994

995

996

997

998

999

1000

1001

1002

1003

1004

1005

1006

1007 break;

1008 }

1009 }

1010}

1011

1012unsigned

1014 StringRef::size_type Length,

1015 unsigned StartColumn) const {

1019}

1020

1021unsigned

1023 bool ) const {

1025}

1026

1028 unsigned LineIndex, unsigned TailOffset, Split Split,

1030 StringRef Text = Content[LineIndex].substr(TailOffset);

1031

1032

1033 unsigned BreakOffsetInToken =

1035 unsigned CharsToRemove = Split.second;

1036 Whitespaces.replaceWhitespaceInToken(

1037 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",

1038 Prefix[LineIndex], InPPDirective, 1,

1039 ContentColumn[LineIndex] - Prefix[LineIndex].size());

1040}

1041

1043 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

1044 if (mayReflow(LineIndex, CommentPragmasRegex))

1045 return Split(StringRef::npos, 0);

1046

1047 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);

1048

1049

1050

1051

1052

1053 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);

1054}

1055

1058 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {

1059

1060

1061 Whitespaces.replaceWhitespace(

1062 *Tokens[LineIndex], 0, 0,

1063 StartColumn, true,

1064 false);

1065 } else if (LineIndex > 0) {

1066

1067

1068

1069

1070

1071

1072

1073

1074

1075

1076 unsigned Offset = Lines[LineIndex - 1].data() +

1077 Lines[LineIndex - 1].size() -

1079

1080

1081 unsigned WhitespaceLength =

1083 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,

1084 WhitespaceLength,

1085 "",

1086 "",

1087 false,

1088 0,

1089 0);

1090 }

1091

1092 unsigned Offset =

1094 unsigned WhitespaceLength =

1095 Content[LineIndex].data() - Lines[LineIndex].data();

1096 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,

1097 WhitespaceLength,

1098 "",

1100 false,

1101 0,

1102 0);

1103}

1104

1107

1108

1109

1110

1111

1112

1113 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {

1114

1115

1116

1117

1118

1119 unsigned LineColumn =

1121 (Content[LineIndex].data() - Lines[LineIndex].data()) +

1122 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());

1123

1124

1125

1126

1127

1128 Whitespaces.replaceWhitespace(*Tokens[LineIndex],

1129 1,

1130 LineColumn,

1131 LineColumn,

1132 true,

1133 false);

1134 }

1135 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {

1136

1137 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);

1138 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);

1139 Whitespaces.replaceWhitespaceInToken(

1140 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,

1141 SpacesToRemove, "", "", false,

1142 0, SpacesToAdd);

1143 }

1144}

1145

1147 if (LastLineTok)

1148 State.NextToken = LastLineTok->Next;

1149}

1150

1152 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {

1153

1154

1155 StringRef IndentContent = Content[LineIndex];

1156 if (Lines[LineIndex].starts_with("//"))

1157 IndentContent = Lines[LineIndex].substr(2);

1158

1159

1160

1161

1162

1163

1165 !CommentPragmasRegex.match(IndentContent) &&

1168 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];

1169}

1170

1171}

1172}

Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...

This file implements an indenter that manages the indentation of continuations.

Various functions to configurably format source code.

Token - This structure provides full information about a lexed token.

bool is(tok::TokenKind K) const

is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....

BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)

Creates a breakable token for a single line string literal for C#, Java, JavaScript,...

QuoteStyleType QuoteStyle

void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override

Emits the previously retrieved Split via Whitespaces.

StringRef RightBraceQuote

unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override

Returns the column at which content in line LineIndex starts, assuming no reflow.

void updateAfterBroken(WhitespaceManager &Whitespaces) const override

Adds replacements that are needed when the token is broken.

unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override

Returns the number of columns required to format the text following the byte Offset in the line LineI...

Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override

Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...

unsigned UnbreakableTailLength

unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override

Returns the number of columns required to format the text in the byte range [Offset,...

unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override

Returns the column at which content in line LineIndex starts, assuming no reflow.

unsigned getLineCount() const override

Returns the number of lines in this token in the original code.

unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override

Returns the number of columns required to format the text following the byte Offset in the line LineI...

BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)

Creates a breakable token for a single line string literal.

void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override

Emits the previously retrieved Split via Whitespaces.

Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...

std::pair< StringRef::size_type, unsigned > Split

Contains starting character index and length of split.

const FormatStyle & Style

unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const

Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...

const encoding::Encoding Encoding

Manages the whitespaces around tokens and their replacements.

unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)

Returns the number of columns required to display the Text, starting from the StartColumn on a termin...

unsigned getEscapeSequenceLength(StringRef Text)

Gets the length of an escape sequence inside a C++ string literal.

unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)

Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...

unsigned columnWidth(StringRef Text, Encoding Encoding)

Returns the number of columns required to display the Text on a generic Unicode-capable terminal.

static constexpr StringRef Blanks

bool switchesFormatting(const FormatToken &Token)

Checks if Token switches formatting, like /* clang-format off *‍/.

static bool IsBlank(char C)

static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)

static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)

static bool mayReflowContent(StringRef Content)

static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)

The JSON file list parser is used to communicate input to InstallAPI.

LLVM_READONLY bool isAlphanumeric(unsigned char c)

Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].

LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)

Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.

LLVM_READONLY bool isPunctuation(unsigned char c)

Return true if this character is an ASCII punctuation character.

bool Other

Put a space in parentheses not covered by preceding options.

The FormatStyle is used to configure the formatting to follow specific guidelines.

unsigned ContinuationIndentWidth

Indent width for line continuations.

@ LK_Java

Should be used for Java.

@ LK_TextProto

Should be used for Protocol Buffer messages in text format (https://developers.google....

bool Cpp11BracedListStyle

If true, format braced lists as best suited for C++11 braced lists.

BinaryOperatorStyle BreakBeforeBinaryOperators

The way to wrap binary operators.

@ BOS_None

Break after operators.

LanguageKind Language

Language, this format style is targeted at.

unsigned TabWidth

The number of columns used for tab stops.

OperandAlignmentStyle AlignOperands

If true, horizontally align operands of binary and ternary expressions.

SpacesInParensCustom SpacesInParensOptions

Control of individual spaces in parentheses.

SpacesInLineComment SpacesInLineCommentPrefix

How many spaces are allowed at the start of a line comment.

bool isJavaScript() const

@ OAS_AlignAfterOperator

Horizontally align operands of binary and ternary expressions.

unsigned ColumnLimit

The column limit.

A wrapper around a Token storing information about the whitespace characters preceding it.

StringRef TokenText

The raw text of the token.

unsigned Finalized

If true, this token has been fully formatted (indented and potentially re-formatted inside),...

FormatToken * Next

The next token in the unwrapped line.

bool is(tok::TokenKind Kind) const

The current state when indenting a unwrapped line.