clang: lib/Format/BreakableToken.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/Support/Debug.h"
21#include
22
23#define DEBUG_TYPE "format-token-breaker"
24
26namespace format {
27
28static constexpr StringRef Blanks = " \t\v\f\r";
30 switch (C) {
31 case ' ':
32 case '\t':
33 case '\v':
34 case '\f':
35 case '\r':
36 return true;
37 default:
38 return false;
39 }
40}
41
44 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45 "//!", "//:", "//"};
46 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47 "//", "#"};
50 KnownPrefixes = KnownTextProtoPrefixes;
51
52 assert(
53 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54 return Lhs.size() > Rhs.size();
55 }));
56
57 for (StringRef KnownPrefix : KnownPrefixes) {
58 if (Comment.starts_with(KnownPrefix)) {
59 const auto PrefixLength =
60 Comment.find_first_not_of(' ', KnownPrefix.size());
61 return Comment.substr(0, PrefixLength);
62 }
63 }
64 return {};
65}
66
69 unsigned ColumnLimit, unsigned TabWidth,
71 bool DecorationEndsWithStar = false) {
72 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
73 << "\", Column limit: " << ColumnLimit
74 << ", Content start: " << ContentStartColumn << "\n");
75 if (ColumnLimit <= ContentStartColumn + 1)
77
78 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
79 unsigned MaxSplitBytes = 0;
80
81 for (unsigned NumChars = 0;
82 NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
83 unsigned BytesInChar =
86 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
87 TabWidth, Encoding);
88 MaxSplitBytes += BytesInChar;
89 }
90
91
92
93
95 StringRef::size_type SpaceOffset =
96 Text.find_first_of(Blanks, MaxSplitBytes);
97 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98 Text[SpaceOffset + 1] == '{') {
99 MaxSplitBytes = SpaceOffset + 1;
100 }
101 }
102
103 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104
105 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106
107 while (SpaceOffset != StringRef::npos) {
108
109
110
111
112
113 if (Style.isCpp()) {
114 StringRef::size_type LastNonBlank =
115 Text.find_last_not_of(Blanks, SpaceOffset);
116 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118 continue;
119 }
120 }
121
122
123
124 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
125 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126 continue;
127 }
128
129
131 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133 continue;
134 }
135
136 break;
137 }
138
139 if (SpaceOffset == StringRef::npos ||
140
141 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
142
143
144 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
145 if (FirstNonWhitespace == StringRef::npos) {
146
148 }
149 SpaceOffset = Text.find_first_of(
150 Blanks, std::max(MaxSplitBytes, FirstNonWhitespace));
151 }
152 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
153
154
155
156
157 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
159 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
160 StringRef AfterCut = Text.substr(SpaceOffset);
161
162 if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
163 AfterCut = AfterCut.ltrim(Blanks);
165 AfterCut.begin() - BeforeCut.end());
166 }
168}
169
173
174 if (Text.empty())
176 if (ColumnLimit <= UsedColumns)
178 unsigned MaxSplit = ColumnLimit - UsedColumns;
179 StringRef::size_type SpaceOffset = 0;
180 StringRef::size_type SlashOffset = 0;
181 StringRef::size_type WordStartOffset = 0;
182 StringRef::size_type SplitPoint = 0;
183 for (unsigned Chars = 0;;) {
184 unsigned Advance;
185 if (Text[0] == '\\') {
187 Chars += Advance;
188 } else {
191 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
192 }
193
194 if (Chars > MaxSplit || Text.size() <= Advance)
195 break;
196
198 SpaceOffset = SplitPoint;
199 if (Text[0] == '/')
200 SlashOffset = SplitPoint;
202 WordStartOffset = SplitPoint;
203
204 SplitPoint += Advance;
206 }
207
208 if (SpaceOffset != 0)
210 if (SlashOffset != 0)
212 if (WordStartOffset != 0)
214 if (SplitPoint != 0)
217}
218
220 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
221 "formatting regions are switched by comment tokens");
222 StringRef Content = Token.TokenText.substr(2).ltrim();
223 return Content.starts_with("clang-format on") ||
224 Content.starts_with("clang-format off");
225}
226
227unsigned
230
231
232
233
234
235
236
237
238
239
240
241 return RemainingTokenColumns + 1 - Split.second;
242}
243
245
247 unsigned Offset,
248 StringRef::size_type Length,
249 unsigned StartColumn) const {
250 llvm_unreachable("Getting the length of a part of the string literal "
251 "indicates that the code tries to reflow it.");
252}
253
254unsigned
256 unsigned StartColumn) const {
260}
261
263 bool Break) const {
265}
266
268 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
269 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
271 : BreakableToken(Tok, InPPDirective, Encoding, Style),
272 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
273 UnbreakableTailLength(UnbreakableTailLength) {
277}
278
280 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
284}
285
288 unsigned ContentIndent,
290 Whitespaces.replaceWhitespaceInToken(
293}
294
297 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
300 Tok, StartColumn, QuoteStyle == SingleQuotes ? "'"
301 : QuoteStyle == AtDoubleQuotes ? "@\""
302 : "\"",
303 QuoteStyle == SingleQuotes ? "'" : "\"",
304 UnbreakableTailLength, InPPDirective, Encoding, Style),
305 BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
306 QuoteStyle(QuoteStyle) {
307
308
309
310
311
312
313
314 bool SignOnNewLine =
317
319
320
326 } else {
327
328
332 Postfix = SignOnNewLine ? "'" : "' +";
333 Prefix = SignOnNewLine ? "+ '" : "'";
334 } else {
337 Prefix = SignOnNewLine ? "+ @\"" : "@\"";
338 } else {
340 Prefix = SignOnNewLine ? "+ \"" : "\"";
341 }
343 Postfix = SignOnNewLine ? "\"" : "\" +";
344 }
345 }
346
347
349
350
354 }
355}
356
358 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
362}
363
364unsigned
366 bool Break) const {
367 return std::max(
368 0,
372 : 0) +
374}
375
377 unsigned LineIndex, unsigned TailOffset, Split Split,
379 Whitespaces.replaceWhitespaceInToken(
384
386}
387
390
392 return;
393
394
395
396
397
398
399 Whitespaces.replaceWhitespaceInToken(
400 Tok, 0, 1, "",
402 0);
403 Whitespaces.replaceWhitespaceInToken(
404 Tok, Tok.TokenText.size() - 1, 1,
406 "", InPPDirective, 0, 0);
407}
408
410 unsigned StartColumn, bool InPPDirective,
414 StartColumn(StartColumn) {}
415
417
420 unsigned ColumnLimit, unsigned ContentStartColumn,
421 const llvm::Regex &CommentPragmasRegex) const {
422
424 return Split(StringRef::npos, 0);
428}
429
431 unsigned LineIndex, unsigned TailOffset, Split Split,
433 StringRef Text = Content[LineIndex].substr(TailOffset);
434
435
436
437
438 unsigned BreakOffsetInToken =
440 unsigned CharsToRemove = Split.second;
441 Whitespaces.replaceWhitespaceInToken(
442 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
443 false, 0, 1);
444}
445
448}
449
451 Content = Content.trim(Blanks);
452
453
454 bool hasSpecialMeaningPrefix = false;
455 for (StringRef Prefix :
456 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
457 if (Content.starts_with(Prefix)) {
458 hasSpecialMeaningPrefix = true;
459 break;
460 }
461 }
462
463
464
465
466 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
467 hasSpecialMeaningPrefix =
468 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
469
470
471
472
473 return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
474 !Content.ends_with("\\") &&
475
476
478}
479
482 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
485 DelimitersOnNewline(false),
486 UnbreakableTailLength(Token.UnbreakableTailLength) {
487 assert(Tok.is(TT_BlockComment) &&
488 "block comment section must start with a block comment");
489
491 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
492 TokenText.substr(2, TokenText.size() - 4)
493 .split(Lines, UseCRLF ? "\r\n" : "\n");
494
495 int IndentDelta = StartColumn - OriginalStartColumn;
499
502 for (size_t i = 1; i < Lines.size(); ++i)
503 adjustWhitespace(i, IndentDelta);
504
505
506
508
509
510
511
512
513
514 if (Lines.size() >= 2 && Content[1].starts_with("**") &&
517 }
518
519 Decoration = "* ";
520 if (Lines.size() == 1 && !FirstInLine) {
521
522
523
524
525
526 Decoration = "";
527 }
528 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
530 if (i + 1 == e) {
531
532 if (Text.empty())
533 break;
534 } else if (.empty() && Decoration.starts_with(Text)) {
535 continue;
536 }
537 while (.starts_with(Decoration))
538 Decoration = Decoration.drop_back(1);
539 }
540
541 LastLineNeedsDecoration = true;
543 for (size_t i = 1, e = Lines.size(); i < e; ++i) {
544 if (Content[i].empty()) {
545 if (i + 1 == e) {
546
547
548
549 LastLineNeedsDecoration = false;
550
551 if (e >= 2 && !Decoration.empty())
553 } else if (Decoration.empty()) {
554
555
557 }
558 continue;
559 }
560
561
562
563
564
565 unsigned DecorationSize = Decoration.starts_with(Content[i])
567 : Decoration.size();
568 if (DecorationSize)
569 ContentColumn[i] = DecorationColumn + DecorationSize;
571 if (!Decoration.starts_with(Content[i])) {
572 IndentAtLineBreak =
573 std::min(IndentAtLineBreak, std::max(0, ContentColumn[i]));
574 }
575 }
576 IndentAtLineBreak = std::max(IndentAtLineBreak, Decoration.size());
577
578
580 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
581
582 DelimitersOnNewline = true;
583 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
584
585
586
587 unsigned EndColumn =
591 2;
593 }
594 }
595
596 LLVM_DEBUG({
597 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
598 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
599 for (size_t i = 0; i < Lines.size(); ++i) {
600 llvm::dbgs() << i << " |" << Content[i] << "| "
602 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
603 }
604 });
605}
606
608 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
610
612 return Split(StringRef::npos, 0);
616}
617
618void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
619 int IndentDelta) {
620
621
622
623
624
625 size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
627 --EndOfPreviousLine;
628
629
630 EndOfPreviousLine =
631 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
632 if (EndOfPreviousLine == StringRef::npos)
633 EndOfPreviousLine = 0;
634 else
635 ++EndOfPreviousLine;
636
637 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
638 if (StartOfLine == StringRef::npos)
639 StartOfLine = Lines[LineIndex].size();
640
641 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
642
643 size_t PreviousContentOffset =
644 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
645 Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
646 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
647 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
648
649
652 IndentDelta;
653}
654
656 unsigned Offset,
657 StringRef::size_type Length,
658 unsigned StartColumn) const {
662}
663
665 unsigned Offset,
666 unsigned StartColumn) const {
667 unsigned LineLength =
668 UnbreakableTailLength +
670 if (LineIndex + 1 == Lines.size()) {
671 LineLength += 2;
672
673 bool HasRemainingText = Offset < Content[LineIndex].size();
674 if (!HasRemainingText) {
675 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
676 if (HasDecoration)
677 LineLength -= Decoration.size();
678 }
679 }
680 return LineLength;
681}
682
684 bool Break) const {
685 if (Break)
686 return IndentAtLineBreak;
688}
689
690const llvm::StringSet<>
692 "@param", "@return", "@returns", "@throws", "@type", "@template",
693 "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
694};
695
698 return 0;
699
700
701
702 StringRef ContentWithNoDecoration = Content[LineIndex];
703 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
704 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
705 StringRef FirstWord = ContentWithNoDecoration.substr(
706 0, ContentWithNoDecoration.find_first_of(Blanks));
709 return 0;
710}
711
715 StringRef Text = Content[LineIndex].substr(TailOffset);
716 StringRef Prefix = Decoration;
717
718
719
720 unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
721 if (LineIndex + 1 == Lines.size() &&
723
724 Prefix = "";
725 if (LocalIndentAtLineBreak >= 2)
726 LocalIndentAtLineBreak -= 2;
727 }
728
729
730 unsigned BreakOffsetInToken =
732 unsigned CharsToRemove = Split.second;
733 assert(LocalIndentAtLineBreak >= Prefix.size());
734 std::string PrefixWithTrailingIndent = std::string(Prefix);
735 PrefixWithTrailingIndent.append(ContentIndent, ' ');
736 Whitespaces.replaceWhitespaceInToken(
737 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
738 PrefixWithTrailingIndent, InPPDirective, 1,
739 LocalIndentAtLineBreak + ContentIndent -
740 PrefixWithTrailingIndent.size());
741}
742
744 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
745 if ((LineIndex, CommentPragmasRegex))
746 return Split(StringRef::npos, 0);
747
748
749
750 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
751 if (LineIndex) {
752 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
753 if (PreviousContentIndent && Trimmed != StringRef::npos &&
754 Trimmed != PreviousContentIndent) {
755 return Split(StringRef::npos, 0);
756 }
757 }
758
759 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
760}
761
763
764 return DelimitersOnNewline &&
765 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
766}
767
770 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
771
772 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
773 "Reflowing whitespace within a token");
774
775
776 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
777 Content[LineIndex - 1].size() -
779 unsigned WhitespaceLength = TrimmedContent.data() -
781 WhitespaceOffsetInToken;
782 Whitespaces.replaceWhitespaceInToken(
783 tokenAt(LineIndex), WhitespaceOffsetInToken,
784 WhitespaceLength, "",
786 0);
787}
788
791 if (LineIndex == 0) {
792 if (DelimitersOnNewline) {
793
794
795
796
797 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
798 if (BreakLength != StringRef::npos) {
799 insertBreak(LineIndex, 0, Split(1, BreakLength), 0,
800 Whitespaces);
801 }
802 }
803 return;
804 }
805
806
807 StringRef Prefix = Decoration;
808 if (Content[LineIndex].empty()) {
809 if (LineIndex + 1 == Lines.size()) {
810 if (!LastLineNeedsDecoration) {
811
812
813 Prefix = "";
814 }
815 } else if (!Decoration.empty()) {
816
817
818 Prefix = Prefix.substr(0, 1);
819 }
821
822 Prefix = Prefix.substr(0, 1);
823 }
824
825
826 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
827 Content[LineIndex - 1].size() -
829 unsigned WhitespaceLength = Content[LineIndex].data() -
831 WhitespaceOffsetInToken;
832 Whitespaces.replaceWhitespaceInToken(
833 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
835}
836
839 if (DelimitersOnNewline) {
840
841
842
843 StringRef Line = Content.back().substr(TailOffset);
844 StringRef TrimmedLine = Line.rtrim(Blanks);
845 if (!TrimmedLine.empty())
846 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
847 }
848 return Split(StringRef::npos, 0);
849}
850
852 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
853
854
855 StringRef IndentContent = Content[LineIndex];
856 if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
857 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
859 !CommentPragmasRegex.match(IndentContent) &&
862}
863
865 const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
868 assert(Tok.is(TT_LineComment) &&
869 "line comment section must start with a line comment");
872
873
874 int FirstLineSpaceChange = 0;
876 CurrentTok && CurrentTok->is(TT_LineComment);
877 CurrentTok = CurrentTok->Next) {
878 LastLineTok = LineTok;
879 StringRef TokenText(CurrentTok->TokenText);
880 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
881 "unsupported line comment prefix, '//' and '#' are supported");
882 size_t FirstLineIndex = Lines.size();
883 TokenText.split(Lines, "\n");
886 PrefixSpaceChange.resize(Lines.size());
888 Prefix.resize(Lines.size());
889 OriginalPrefix.resize(Lines.size());
890 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
893 OriginalPrefix[i] = IndentPrefix;
894 const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
895
896
897
898 const auto NoSpaceBeforeFirstCommentChar = [&]() {
899 assert(Lines[i].size() > IndentPrefix.size());
900 const char FirstCommentChar = Lines[i][IndentPrefix.size()];
901 const unsigned FirstCharByteSize =
904 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
906 return false;
907 }
908
909
910
911
912
913
914
915
916
917 if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
918 return false;
919 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
921 };
922
923
924
925
926
927
928 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
929 OriginalPrefix[i - 1].rtrim(Blanks)) {
930 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
931 !NoSpaceBeforeFirstCommentChar()) {
932 FirstLineSpaceChange = Minimum - SpacesInPrefix;
933 } else if (static_cast<unsigned>(SpacesInPrefix) >
935 FirstLineSpaceChange =
937 } else {
938 FirstLineSpaceChange = 0;
939 }
940 }
941
942 if (Lines[i].size() != IndentPrefix.size()) {
943 PrefixSpaceChange[i] = FirstLineSpaceChange;
944
945 if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
946 PrefixSpaceChange[i] +=
947 Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
948 }
949
950 assert(Lines[i].size() > IndentPrefix.size());
951 const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
952 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
953 const bool LineRequiresLeadingSpace =
954 !NoSpaceBeforeFirstCommentChar() ||
955 (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
956 const bool AllowsSpaceChange =
957 !IsFormatComment &&
958 (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
959
960 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
961 Prefix[i] = IndentPrefix.str();
962 Prefix[i].append(PrefixSpaceChange[i], ' ');
963 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
964 Prefix[i] = IndentPrefix
965 .drop_back(std::minstd::size\_t(
966 -PrefixSpaceChange[i], SpacesInPrefix))
967 .str();
968 } else {
969 Prefix[i] = IndentPrefix.str();
970 }
971 } else {
972
973
974 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
975 }
976
978 Content[i] = Lines[i].substr(IndentPrefix.size());
982
983
984 size_t EndOfLine = Content[i].find_last_not_of(Blanks);
985 if (EndOfLine == StringRef::npos)
986 EndOfLine = Content[i].size();
987 else
988 ++EndOfLine;
990 }
991 LineTok = CurrentTok->Next;
992 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007 break;
1008 }
1009 }
1010}
1011
1012unsigned
1014 StringRef::size_type Length,
1015 unsigned StartColumn) const {
1019}
1020
1021unsigned
1023 bool ) const {
1025}
1026
1028 unsigned LineIndex, unsigned TailOffset, Split Split,
1030 StringRef Text = Content[LineIndex].substr(TailOffset);
1031
1032
1033 unsigned BreakOffsetInToken =
1035 unsigned CharsToRemove = Split.second;
1036 Whitespaces.replaceWhitespaceInToken(
1037 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
1038 Prefix[LineIndex], InPPDirective, 1,
1039 ContentColumn[LineIndex] - Prefix[LineIndex].size());
1040}
1041
1043 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1044 if ((LineIndex, CommentPragmasRegex))
1045 return Split(StringRef::npos, 0);
1046
1047 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
1048
1049
1050
1051
1052
1053 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
1054}
1055
1058 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1059
1060
1061 Whitespaces.replaceWhitespace(
1062 *Tokens[LineIndex], 0, 0,
1063 StartColumn, true,
1064 false);
1065 } else if (LineIndex > 0) {
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076 unsigned Offset = Lines[LineIndex - 1].data() +
1077 Lines[LineIndex - 1].size() -
1079
1080
1081 unsigned WhitespaceLength =
1083 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1084 WhitespaceLength,
1085 "",
1086 "",
1087 false,
1088 0,
1089 0);
1090 }
1091
1092 unsigned Offset =
1094 unsigned WhitespaceLength =
1095 Content[LineIndex].data() - Lines[LineIndex].data();
1096 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
1097 WhitespaceLength,
1098 "",
1100 false,
1101 0,
1102 0);
1103}
1104
1107
1108
1109
1110
1111
1112
1113 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
1114
1115
1116
1117
1118
1119 unsigned LineColumn =
1121 (Content[LineIndex].data() - Lines[LineIndex].data()) +
1122 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
1123
1124
1125
1126
1127
1128 Whitespaces.replaceWhitespace(*Tokens[LineIndex],
1129 1,
1130 LineColumn,
1131 LineColumn,
1132 true,
1133 false);
1134 }
1135 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
1136
1137 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1138 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
1139 Whitespaces.replaceWhitespaceInToken(
1140 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1141 SpacesToRemove, "", "", false,
1142 0, SpacesToAdd);
1143 }
1144}
1145
1147 if (LastLineTok)
1148 State.NextToken = LastLineTok->Next;
1149}
1150
1152 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
1153
1154
1155 StringRef IndentContent = Content[LineIndex];
1156 if (Lines[LineIndex].starts_with("//"))
1157 IndentContent = Lines[LineIndex].substr(2);
1158
1159
1160
1161
1162
1163
1165 !CommentPragmasRegex.match(IndentContent) &&
1168 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
1169}
1170
1171}
1172}
Declares BreakableToken, BreakableStringLiteral, BreakableComment, BreakableBlockComment and Breakabl...
This file implements an indenter that manages the indentation of continuations.
Various functions to configurably format source code.
Token - This structure provides full information about a lexed token.
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {....
BreakableStringLiteralUsingOperators(const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal for C#, Java, JavaScript,...
QuoteStyleType QuoteStyle
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
StringRef RightBraceQuote
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
void updateAfterBroken(WhitespaceManager &Whitespaces) const override
Adds replacements that are needed when the token is broken.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const override
Returns a range (offset, length) at which to break the line at LineIndex, if previously broken at Tai...
unsigned UnbreakableTailLength
unsigned getRangeLength(unsigned LineIndex, unsigned Offset, StringRef::size_type Length, unsigned StartColumn) const override
Returns the number of columns required to format the text in the byte range [Offset,...
unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override
Returns the column at which content in line LineIndex starts, assuming no reflow.
unsigned getLineCount() const override
Returns the number of lines in this token in the original code.
unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, unsigned StartColumn) const override
Returns the number of columns required to format the text following the byte Offset in the line LineI...
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
Creates a breakable token for a single line string literal.
void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, unsigned ContentIndent, WhitespaceManager &Whitespaces) const override
Emits the previously retrieved Split via Whitespaces.
Base class for tokens / ranges of tokens that can allow breaking within the tokens - for example,...
std::pair< StringRef::size_type, unsigned > Split
Contains starting character index and length of split.
const FormatStyle & Style
unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, Split Split) const
Returns the number of columns needed to format RemainingTokenColumns, assuming that Split is within t...
const encoding::Encoding Encoding
Manages the whitespaces around tokens and their replacements.
unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, unsigned TabWidth, Encoding Encoding)
Returns the number of columns required to display the Text, starting from the StartColumn on a termin...
unsigned getEscapeSequenceLength(StringRef Text)
Gets the length of an escape sequence inside a C++ string literal.
unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding)
Gets the number of bytes in a sequence representing a single codepoint and starting with FirstChar in...
unsigned columnWidth(StringRef Text, Encoding Encoding)
Returns the number of columns required to display the Text on a generic Unicode-capable terminal.
static constexpr StringRef Blanks
bool switchesFormatting(const FormatToken &Token)
Checks if Token switches formatting, like /* clang-format off */.
static bool IsBlank(char C)
static BreakableToken::Split getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding)
static StringRef getLineCommentIndentPrefix(StringRef Comment, const FormatStyle &Style)
static bool mayReflowContent(StringRef Content)
static BreakableToken::Split getCommentSplit(StringRef Text, unsigned ContentStartColumn, unsigned ColumnLimit, unsigned TabWidth, encoding::Encoding Encoding, const FormatStyle &Style, bool DecorationEndsWithStar=false)
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isAlphanumeric(unsigned char c)
Return true if this character is an ASCII letter or digit: [a-zA-Z0-9].
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
LLVM_READONLY bool isPunctuation(unsigned char c)
Return true if this character is an ASCII punctuation character.
bool Other
Put a space in parentheses not covered by preceding options.
The FormatStyle is used to configure the formatting to follow specific guidelines.
unsigned ContinuationIndentWidth
Indent width for line continuations.
@ LK_Java
Should be used for Java.
@ LK_TextProto
Should be used for Protocol Buffer messages in text format (https://developers.google....
bool Cpp11BracedListStyle
If true, format braced lists as best suited for C++11 braced lists.
BinaryOperatorStyle BreakBeforeBinaryOperators
The way to wrap binary operators.
@ BOS_None
Break after operators.
LanguageKind Language
Language, this format style is targeted at.
unsigned TabWidth
The number of columns used for tab stops.
OperandAlignmentStyle AlignOperands
If true, horizontally align operands of binary and ternary expressions.
SpacesInParensCustom SpacesInParensOptions
Control of individual spaces in parentheses.
SpacesInLineComment SpacesInLineCommentPrefix
How many spaces are allowed at the start of a line comment.
bool isJavaScript() const
@ OAS_AlignAfterOperator
Horizontally align operands of binary and ternary expressions.
unsigned ColumnLimit
The column limit.
A wrapper around a Token storing information about the whitespace characters preceding it.
StringRef TokenText
The raw text of the token.
unsigned Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside),...
FormatToken * Next
The next token in the unwrapped line.
bool is(tok::TokenKind Kind) const
The current state when indenting a unwrapped line.