LLVM: lib/Support/YAMLParser.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
29#include
30#include
31#include
32#include
33#include
34#include
35#include <system_error>
36#include
37
38using namespace llvm;
39using namespace yaml;
40
49
50
51
52using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
53
54
55
56
57
58
59
61 if (Input.empty())
63
65 case 0x00:
66 if (Input.size() >= 4) {
67 if ( Input[1] == 0
73 }
74
75 if (Input.size() >= 2 && Input[1] != 0)
78 case 0xFF:
79 if ( Input.size() >= 4
84
88 case 0xFE:
92 case 0xEF:
93 if ( Input.size() >= 3
98 }
99
100
103
104 if (Input.size() >= 2 && Input[1] == 0)
106
108}
109
110
111void Node::anchor() {}
112void NullNode::anchor() {}
113void ScalarNode::anchor() {}
114void BlockScalarNode::anchor() {}
115void KeyValueNode::anchor() {}
116void MappingNode::anchor() {}
117void SequenceNode::anchor() {}
118void AliasNode::anchor() {}
119
120namespace llvm {
121namespace yaml {
122
123
150
151
152
154
155
157
159};
160
161}
162}
163
165
166namespace {
167
168
169
170
171
172
173
174
175
176
177struct SimpleKey {
179 unsigned Column = 0;
180 unsigned Line = 0;
181 unsigned FlowLevel = 0;
182 bool IsRequired = false;
183
185 return Tok == Other.Tok;
186 }
187};
188
189}
190
191
192
193
195
199
200
201 if (Position < End && (*Position & 0x80) == 0) {
202 return {*Position, 1};
203 }
204
205
206 if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
207 ((*(Position + 1) & 0xC0) == 0x80)) {
208 uint32_t codepoint = ((*Position & 0x1F) << 6) |
209 (*(Position + 1) & 0x3F);
210 if (codepoint >= 0x80)
211 return {codepoint, 2};
212 }
213
214
215 if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
216 ((*(Position + 1) & 0xC0) == 0x80) &&
217 ((*(Position + 2) & 0xC0) == 0x80)) {
218 uint32_t codepoint = ((*Position & 0x0F) << 12) |
219 ((*(Position + 1) & 0x3F) << 6) |
220 (*(Position + 2) & 0x3F);
221
222
223 if (codepoint >= 0x800 &&
224 (codepoint < 0xD800 || codepoint > 0xDFFF))
225 return {codepoint, 3};
226 }
227
228
229 if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
230 ((*(Position + 1) & 0xC0) == 0x80) &&
231 ((*(Position + 2) & 0xC0) == 0x80) &&
232 ((*(Position + 3) & 0xC0) == 0x80)) {
233 uint32_t codepoint = ((*Position & 0x07) << 18) |
234 ((*(Position + 1) & 0x3F) << 12) |
235 ((*(Position + 2) & 0x3F) << 6) |
236 (*(Position + 3) & 0x3F);
237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
238 return {codepoint, 4};
239 }
240 return {0, 0};
241}
242
243namespace llvm {
244namespace yaml {
245
246
248public:
250 std::error_code *EC = nullptr);
252 std::error_code *EC = nullptr);
253
254
256
257
259
264
266 if (Position >= End)
267 Position = End - 1;
268
269
270 if (EC)
272
273
274
275 if (!Failed)
277 Failed = true;
278 }
279
280
284
285private:
287
289 return StringRef(Current, End - Current);
290 }
291
292
293
294
295
296
297
299 return ::decodeUTF8(StringRef(Position, End - Position));
300 }
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
330
331
332
333
334
335
336
338
339
340
341
342
343
344
346
347
348
349
350
351
352
354
355
356
357
358
359
360
362
364
365
366
367
368
369
372
373
374
375 void advanceWhile(SkipWhileFunc Func);
376
377
378
379
380 void scan_ns_uri_char();
381
382
383
384
386
387
389
390
391
393
394
395
397
398
399 bool isLineEmpty(StringRef Line);
400
401
402
403
404 bool consumeLineBreakIfPresent();
405
406
408 , unsigned AtColumn
409 , bool IsRequired);
410
411
412
413
414
415 void removeStaleSimpleKeyCandidates();
416
417
418 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
419
420
421
422 bool unrollIndent(int ToColumn);
423
424
425
426 bool rollIndent( int ToColumn
429
430
431
433
434
435 void scanToNextToken();
436
437
438 bool scanStreamStart();
439
440
441 bool scanStreamEnd();
442
443
444 bool scanDirective();
445
446
447 bool scanDocumentIndicator(bool IsStart);
448
449
450 bool scanFlowCollectionStart(bool IsSequence);
451
452
453 bool scanFlowCollectionEnd(bool IsSequence);
454
455
456 bool scanFlowEntry();
457
458
459 bool scanBlockEntry();
460
461
462 bool scanKey();
463
464
465 bool scanValue();
466
467
468 bool scanFlowScalar(bool IsDoubleQuoted);
469
470
471 bool scanPlainScalar();
472
473
474 bool scanAliasOrAnchor(bool IsAlias);
475
476
477 bool scanBlockScalar(bool IsLiteral);
478
479
480
481
482
483
484
485 bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
486 unsigned &IndentIndicator, bool &IsDone);
487
488
489 char scanBlockStyleIndicator();
490
491
492 char scanBlockChompingIndicator();
493
494
495 unsigned scanBlockIndentationIndicator();
496
497
498
499
500 bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
501 bool &IsDone);
502
503
504
505
506 bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
507 unsigned &LineBreaks, bool &IsDone);
508
509
510
511
512 bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
513 bool &IsDone);
514
515
516 bool scanTag();
517
518
519 bool fetchMoreTokens();
520
521
523
524
526
527
529
530
532
533
534 int Indent;
535
536
537 unsigned Column;
538
539
540 unsigned Line;
541
542
543 unsigned FlowLevel;
544
545
546 bool IsStartOfStream;
547
548
549 bool IsSimpleKeyAllowed;
550
551
552
553 bool IsAdjacentValueAllowedInFlow;
554
555
557
558
559 bool ShowColors;
560
561
562
563
565
566
568
569
571
572 std::error_code *EC;
573};
574
575}
576}
577
578
581 if (UnicodeScalarValue <= 0x7F) {
582 Result.push_back(UnicodeScalarValue & 0x7F);
583 } else if (UnicodeScalarValue <= 0x7FF) {
584 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
585 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
586 Result.push_back(FirstByte);
587 Result.push_back(SecondByte);
588 } else if (UnicodeScalarValue <= 0xFFFF) {
589 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
591 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
592 Result.push_back(FirstByte);
593 Result.push_back(SecondByte);
594 Result.push_back(ThirdByte);
595 } else if (UnicodeScalarValue <= 0x10FFFF) {
596 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
597 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
598 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
599 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
600 Result.push_back(FirstByte);
601 Result.push_back(SecondByte);
602 Result.push_back(ThirdByte);
603 Result.push_back(FourthByte);
604 }
605}
606
610 while (true) {
612 switch (T.Kind) {
614 OS << "Stream-Start: ";
615 break;
617 OS << "Stream-End: ";
618 break;
620 OS << "Version-Directive: ";
621 break;
623 OS << "Tag-Directive: ";
624 break;
626 OS << "Document-Start: ";
627 break;
629 OS << "Document-End: ";
630 break;
632 OS << "Block-Entry: ";
633 break;
635 OS << "Block-End: ";
636 break;
638 OS << "Block-Sequence-Start: ";
639 break;
641 OS << "Block-Mapping-Start: ";
642 break;
644 OS << "Flow-Entry: ";
645 break;
647 OS << "Flow-Sequence-Start: ";
648 break;
650 OS << "Flow-Sequence-End: ";
651 break;
653 OS << "Flow-Mapping-Start: ";
654 break;
656 OS << "Flow-Mapping-End: ";
657 break;
659 OS << "Key: ";
660 break;
662 OS << "Value: ";
663 break;
665 OS << "Scalar: ";
666 break;
668 OS << "Block Scalar: ";
669 break;
671 OS << "Alias: ";
672 break;
674 OS << "Anchor: ";
675 break;
677 OS << "Tag: ";
678 break;
680 break;
681 }
682 OS << T.Range << "\n";
684 break;
686 return false;
687 }
688 return true;
689}
690
694 while (true) {
697 break;
699 return false;
700 }
701 return true;
702}
703
705 std::string EscapedInput;
707 if (*i == '\\')
708 EscapedInput += "\\\\";
709 else if (*i == '"')
710 EscapedInput += "\\\"";
711 else if (*i == 0)
712 EscapedInput += "\\0";
713 else if (*i == 0x07)
714 EscapedInput += "\\a";
715 else if (*i == 0x08)
716 EscapedInput += "\\b";
717 else if (*i == 0x09)
718 EscapedInput += "\\t";
719 else if (*i == 0x0A)
720 EscapedInput += "\\n";
721 else if (*i == 0x0B)
722 EscapedInput += "\\v";
723 else if (*i == 0x0C)
724 EscapedInput += "\\f";
725 else if (*i == 0x0D)
726 EscapedInput += "\\r";
727 else if (*i == 0x1B)
728 EscapedInput += "\\e";
729 else if ((unsigned char)*i < 0x20) {
730 std::string HexStr = utohexstr(*i);
731 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
732 } else if (*i & 0x80) {
735 if (UnicodeScalarValue.second == 0) {
736
740
741 return EscapedInput;
742 }
743 if (UnicodeScalarValue.first == 0x85)
744 EscapedInput += "\\N";
745 else if (UnicodeScalarValue.first == 0xA0)
746 EscapedInput += "\\_";
747 else if (UnicodeScalarValue.first == 0x2028)
748 EscapedInput += "\\L";
749 else if (UnicodeScalarValue.first == 0x2029)
750 EscapedInput += "\\P";
751 else if (!EscapePrintable &&
753 EscapedInput += StringRef(i, UnicodeScalarValue.second);
754 else {
755 std::string HexStr = utohexstr(UnicodeScalarValue.first);
756 if (HexStr.size() <= 2)
757 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
758 else if (HexStr.size() <= 4)
759 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
760 else if (HexStr.size() <= 8)
761 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
762 }
763 i += UnicodeScalarValue.second - 1;
764 } else {
765 EscapedInput.push_back(*i);
766 }
767 }
768 return EscapedInput;
769}
770
772 switch (S.size()) {
773 case 1:
774 switch (S.front()) {
775 case 'y':
776 case 'Y':
777 return true;
778 case 'n':
779 case 'N':
780 return false;
781 default:
782 return std::nullopt;
783 }
784 case 2:
785 switch (S.front()) {
786 case 'O':
787 if (S[1] == 'N')
788 return true;
789 [[fallthrough]];
790 case 'o':
791 if (S[1] == 'n')
792 return true;
793 return std::nullopt;
794 case 'N':
795 if (S[1] == 'O')
796 return false;
797 [[fallthrough]];
798 case 'n':
799 if (S[1] == 'o')
800 return false;
801 return std::nullopt;
802 default:
803 return std::nullopt;
804 }
805 case 3:
806 switch (S.front()) {
807 case 'O':
809 return false;
810 [[fallthrough]];
811 case 'o':
813 return false;
814 return std::nullopt;
815 case 'Y':
817 return true;
818 [[fallthrough]];
819 case 'y':
821 return true;
822 return std::nullopt;
823 default:
824 return std::nullopt;
825 }
826 case 4:
827 switch (S.front()) {
828 case 'T':
830 return true;
831 [[fallthrough]];
832 case 't':
833 if (S.drop_front() == "rue")
834 return true;
835 return std::nullopt;
836 default:
837 return std::nullopt;
838 }
839 case 5:
840 switch (S.front()) {
841 case 'F':
842 if (S.drop_front() == "ALSE")
843 return false;
844 [[fallthrough]];
845 case 'f':
846 if (S.drop_front() == "alse")
847 return false;
848 return std::nullopt;
849 default:
850 return std::nullopt;
851 }
852 default:
853 return std::nullopt;
854 }
855}
856
858 std::error_code *EC)
859 : SM(sm), ShowColors(ShowColors), EC(EC) {
861}
862
864 std::error_code *EC)
865 : SM(SM_), ShowColors(ShowColors), EC(EC) {
866 init(Buffer);
867}
868
870 InputBuffer = Buffer;
873 Indent = -1;
874 Column = 0;
875 Line = 0;
876 FlowLevel = 0;
877 IsStartOfStream = true;
878 IsSimpleKeyAllowed = true;
879 IsAdjacentValueAllowedInFlow = false;
881 std::unique_ptr InputBufferOwner =
884}
885
887
888
889 bool NeedMore = false;
890 while (true) {
891 if (TokenQueue.empty() || NeedMore) {
892 if (!fetchMoreTokens()) {
893 TokenQueue.clear();
894 SimpleKeys.clear();
895 TokenQueue.push_back(Token());
896 return TokenQueue.front();
897 }
898 }
899 assert(!TokenQueue.empty() &&
900 "fetchMoreTokens lied about getting tokens!");
901
902 removeStaleSimpleKeyCandidates();
903 SimpleKey SK;
904 SK.Tok = TokenQueue.begin();
906 break;
907 else
908 NeedMore = true;
909 }
910 return TokenQueue.front();
911}
912
915
916 if (!TokenQueue.empty())
917 TokenQueue.pop_front();
918
919
920
921 if (TokenQueue.empty())
922 TokenQueue.resetAlloc();
923
924 return Ret;
925}
926
928 if (Position == End)
929 return Position;
930
931 if ( *Position == 0x09
932 || (*Position >= 0x20 && *Position <= 0x7E))
933 return Position + 1;
934
935
936 if (uint8_t(*Position) & 0x80) {
938 if ( u8d.second != 0
939 && u8d.first != 0xFEFF
940 && ( u8d.first == 0x85
941 || ( u8d.first >= 0xA0
942 && u8d.first <= 0xD7FF)
943 || ( u8d.first >= 0xE000
944 && u8d.first <= 0xFFFD)
945 || ( u8d.first >= 0x10000
946 && u8d.first <= 0x10FFFF)))
947 return Position + u8d.second;
948 }
949 return Position;
950}
951
953 if (Position == End)
954 return Position;
955 if (*Position == 0x0D) {
956 if (Position + 1 != End && *(Position + 1) == 0x0A)
957 return Position + 2;
958 return Position + 1;
959 }
960
961 if (*Position == 0x0A)
962 return Position + 1;
963 return Position;
964}
965
967 if (Position == End)
968 return Position;
969 if (*Position == ' ')
970 return Position + 1;
971 return Position;
972}
973
975 if (Position == End)
976 return Position;
977 if (*Position == ' ' || *Position == '\t')
978 return Position + 1;
979 return Position;
980}
981
983 if (Position == End)
984 return Position;
985 if (*Position == ' ' || *Position == '\t')
986 return Position;
987 return skip_nb_char(Position);
988}
989
992 while (true) {
994 if (i == Position)
995 break;
996 Position = i;
997 }
998 return Position;
999}
1000
1001void Scanner::advanceWhile(SkipWhileFunc Func) {
1002 auto Final = skip_while(Func, Current);
1003 Column += Final - Current;
1004 Current = Final;
1005}
1006
1008
1010
1011void Scanner::scan_ns_uri_char() {
1012 while (true) {
1013 if (Current == End)
1014 break;
1015 if ((*Current == '%' && Current + 2 < End &&
1018 StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") !=
1020 ++Current;
1021 ++Column;
1022 } else {
1023 break;
1024 }
1025 }
1026}
1027
1028bool Scanner::consume(uint32_t Expected) {
1029 if (Expected >= 0x80) {
1030 setError("Cannot consume non-ascii characters", Current);
1031 return false;
1032 }
1033 if (Current == End)
1034 return false;
1035 if (uint8_t(*Current) >= 0x80) {
1036 setError("Cannot consume non-ascii characters", Current);
1037 return false;
1038 }
1039 if (uint8_t(*Current) == Expected) {
1040 ++Current;
1041 ++Column;
1042 return true;
1043 }
1044 return false;
1045}
1046
1047void Scanner::skip(uint32_t Distance) {
1048 Current += Distance;
1049 Column += Distance;
1050 assert(Current <= End && "Skipped past the end");
1051}
1052
1054 if (Position == End)
1055 return false;
1056 return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
1057 *Position == '\n';
1058}
1059
1061 if (Position == End || isBlankOrBreak(Position))
1062 return false;
1063 if (FlowLevel &&
1064 StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos)
1065 return false;
1066 return true;
1067}
1068
1069bool Scanner::isLineEmpty(StringRef Line) {
1070 for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
1071 if (!isBlankOrBreak(Position))
1072 return false;
1073 return true;
1074}
1075
1076bool Scanner::consumeLineBreakIfPresent() {
1077 auto Next = skip_b_break(Current);
1078 if (Next == Current)
1079 return false;
1080 Column = 0;
1081 ++Line;
1082 Current = Next;
1083 return true;
1084}
1085
1087 , unsigned AtColumn
1088 , bool IsRequired) {
1089 if (IsSimpleKeyAllowed) {
1090 SimpleKey SK;
1091 SK.Tok = Tok;
1092 SK.Line = Line;
1093 SK.Column = AtColumn;
1094 SK.IsRequired = IsRequired;
1095 SK.FlowLevel = FlowLevel;
1096 SimpleKeys.push_back(SK);
1097 }
1098}
1099
1100void Scanner::removeStaleSimpleKeyCandidates() {
1102 i != SimpleKeys.end();) {
1103 if (i->Line != Line || i->Column + 1024 < Column) {
1104 if (i->IsRequired)
1105 setError( "Could not find expected : for simple key"
1106 , i->Tok->Range.begin());
1107 i = SimpleKeys.erase(i);
1108 } else {
1109 ++i;
1110 }
1111 }
1112}
1113
1114void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
1115 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
1116 SimpleKeys.pop_back();
1117}
1118
1119bool Scanner::unrollIndent(int ToColumn) {
1120 Token T;
1121
1122 if (FlowLevel != 0)
1123 return true;
1124
1125 while (Indent > ToColumn) {
1127 T.Range = StringRef(Current, 1);
1128 TokenQueue.push_back(T);
1129 Indent = Indents.pop_back_val();
1130 }
1131
1132 return true;
1133}
1134
1135bool Scanner::rollIndent( int ToColumn
1138 if (FlowLevel)
1139 return true;
1140 if (Indent < ToColumn) {
1141 Indents.push_back(Indent);
1142 Indent = ToColumn;
1143
1144 Token T;
1146 T.Range = StringRef(Current, 0);
1147 TokenQueue.insert(InsertPoint, T);
1148 }
1149 return true;
1150}
1151
1152void Scanner::skipComment() {
1153 if (Current == End || *Current != '#')
1154 return;
1155 while (true) {
1156
1157
1159 if (I == Current)
1160 break;
1161 Current = I;
1162 ++Column;
1163 }
1164}
1165
1166void Scanner::scanToNextToken() {
1167 while (true) {
1168 while (Current != End && (*Current == ' ' || *Current == '\t')) {
1169 skip(1);
1170 }
1171
1172 skipComment();
1173
1174
1176 if (i == Current)
1177 break;
1178 Current = i;
1179 ++Line;
1180 Column = 0;
1181
1182 if (!FlowLevel)
1183 IsSimpleKeyAllowed = true;
1184 }
1185}
1186
1187bool Scanner::scanStreamStart() {
1188 IsStartOfStream = false;
1189
1191
1192 Token T;
1194 T.Range = StringRef(Current, EI.second);
1195 TokenQueue.push_back(T);
1196 Current += EI.second;
1197 return true;
1198}
1199
1200bool Scanner::scanStreamEnd() {
1201
1202 if (Column != 0) {
1203 Column = 0;
1204 ++Line;
1205 }
1206
1207 unrollIndent(-1);
1208 SimpleKeys.clear();
1209 IsSimpleKeyAllowed = false;
1210 IsAdjacentValueAllowedInFlow = false;
1211
1212 Token T;
1214 T.Range = StringRef(Current, 0);
1215 TokenQueue.push_back(T);
1216 return true;
1217}
1218
1219bool Scanner::scanDirective() {
1220
1221 unrollIndent(-1);
1222 SimpleKeys.clear();
1223 IsSimpleKeyAllowed = false;
1224 IsAdjacentValueAllowedInFlow = false;
1225
1229 Current = skip_while(&Scanner::skip_ns_char, Current);
1230 StringRef Name(NameStart, Current - NameStart);
1231 Current = skip_while(&Scanner::skip_s_white, Current);
1232
1233 Token T;
1234 if (Name == "YAML") {
1235 Current = skip_while(&Scanner::skip_ns_char, Current);
1237 T.Range = StringRef(Start, Current - Start);
1238 TokenQueue.push_back(T);
1239 return true;
1240 } else if(Name == "TAG") {
1241 Current = skip_while(&Scanner::skip_ns_char, Current);
1242 Current = skip_while(&Scanner::skip_s_white, Current);
1243 Current = skip_while(&Scanner::skip_ns_char, Current);
1245 T.Range = StringRef(Start, Current - Start);
1246 TokenQueue.push_back(T);
1247 return true;
1248 }
1249 return false;
1250}
1251
1252bool Scanner::scanDocumentIndicator(bool IsStart) {
1253 unrollIndent(-1);
1254 SimpleKeys.clear();
1255 IsSimpleKeyAllowed = false;
1256 IsAdjacentValueAllowedInFlow = false;
1257
1258 Token T;
1260 T.Range = StringRef(Current, 3);
1261 skip(3);
1262 TokenQueue.push_back(T);
1263 return true;
1264}
1265
1266bool Scanner::scanFlowCollectionStart(bool IsSequence) {
1267 Token T;
1270 T.Range = StringRef(Current, 1);
1271 skip(1);
1272 TokenQueue.push_back(T);
1273
1274
1275 saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
1276
1277
1278 IsSimpleKeyAllowed = true;
1279
1280 IsAdjacentValueAllowedInFlow = false;
1281 ++FlowLevel;
1282 return true;
1283}
1284
1285bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
1286 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1287 IsSimpleKeyAllowed = false;
1288 IsAdjacentValueAllowedInFlow = true;
1289 Token T;
1292 T.Range = StringRef(Current, 1);
1293 skip(1);
1294 TokenQueue.push_back(T);
1295 if (FlowLevel)
1296 --FlowLevel;
1297 return true;
1298}
1299
1300bool Scanner::scanFlowEntry() {
1301 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1302 IsSimpleKeyAllowed = true;
1303 IsAdjacentValueAllowedInFlow = false;
1304 Token T;
1306 T.Range = StringRef(Current, 1);
1307 skip(1);
1308 TokenQueue.push_back(T);
1309 return true;
1310}
1311
1312bool Scanner::scanBlockEntry() {
1314 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1315 IsSimpleKeyAllowed = true;
1316 IsAdjacentValueAllowedInFlow = false;
1317 Token T;
1319 T.Range = StringRef(Current, 1);
1320 skip(1);
1321 TokenQueue.push_back(T);
1322 return true;
1323}
1324
1325bool Scanner::scanKey() {
1326 if (!FlowLevel)
1328
1329 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1330 IsSimpleKeyAllowed = !FlowLevel;
1331 IsAdjacentValueAllowedInFlow = false;
1332
1333 Token T;
1335 T.Range = StringRef(Current, 1);
1336 skip(1);
1337 TokenQueue.push_back(T);
1338 return true;
1339}
1340
1341bool Scanner::scanValue() {
1342
1343
1344 if (!SimpleKeys.empty()) {
1345 SimpleKey SK = SimpleKeys.pop_back_val();
1346 Token T;
1348 T.Range = SK.Tok->Range;
1350 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
1351 if (i == SK.Tok)
1352 break;
1353 }
1354 if (i == e) {
1355 Failed = true;
1356 return false;
1357 }
1358 i = TokenQueue.insert(i, T);
1359
1360
1362
1363 IsSimpleKeyAllowed = false;
1364 } else {
1365 if (!FlowLevel)
1367 IsSimpleKeyAllowed = !FlowLevel;
1368 }
1369 IsAdjacentValueAllowedInFlow = false;
1370
1371 Token T;
1373 T.Range = StringRef(Current, 1);
1374 skip(1);
1375 TokenQueue.push_back(T);
1376 return true;
1377}
1378
1379
1380
1383
1384
1385
1390
1391
1392 while (I >= First && *I == '\\') --I;
1393
1394
1395 return (Position - 1 - I) % 2 == 1;
1396}
1397
1398bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
1400 unsigned ColStart = Column;
1401 if (IsDoubleQuoted) {
1402 do {
1403 ++Current;
1404 while (Current != End && *Current != '"')
1405 ++Current;
1406
1407
1408 } while ( Current != End
1409 && *(Current - 1) == '\\'
1411 } else {
1412 skip(1);
1413 while (Current != End) {
1414
1415 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
1416 skip(2);
1417 continue;
1418 } else if (*Current == '\'')
1419 break;
1421 if (i == Current) {
1422 i = skip_b_break(Current);
1423 if (i == Current)
1424 break;
1425 Current = i;
1426 Column = 0;
1427 ++Line;
1428 } else {
1429 if (i == End)
1430 break;
1431 Current = i;
1432 ++Column;
1433 }
1434 }
1435 }
1436
1437 if (Current == End) {
1438 setError("Expected quote at end of scalar", Current);
1439 return false;
1440 }
1441
1442 skip(1);
1443 Token T;
1445 T.Range = StringRef(Start, Current - Start);
1446 TokenQueue.push_back(T);
1447
1448 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1449
1450 IsSimpleKeyAllowed = false;
1451 IsAdjacentValueAllowedInFlow = true;
1452
1453 return true;
1454}
1455
1456bool Scanner::scanPlainScalar() {
1458 unsigned ColStart = Column;
1459 unsigned LeadingBlanks = 0;
1460 assert(Indent >= -1 && "Indent must be >= -1 !");
1461 unsigned indent = static_cast<unsigned>(Indent + 1);
1462 while (Current != End) {
1463 if (*Current == '#')
1464 break;
1465
1466 while (Current != End &&
1467 ((*Current != ':' && isPlainSafeNonBlank(Current)) ||
1468 (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) {
1470 if (i == Current)
1471 break;
1472 Current = i;
1473 ++Column;
1474 }
1475
1476
1477 if (!isBlankOrBreak(Current))
1478 break;
1479
1480
1482 while (isBlankOrBreak(Tmp)) {
1484 if (i != Tmp) {
1485 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
1486 setError("Found invalid tab character in indentation", Tmp);
1487 return false;
1488 }
1489 Tmp = i;
1490 ++Column;
1491 } else {
1492 i = skip_b_break(Tmp);
1493 if (!LeadingBlanks)
1494 LeadingBlanks = 1;
1495 Tmp = i;
1496 Column = 0;
1497 ++Line;
1498 }
1499 }
1500
1501 if (!FlowLevel && Column < indent)
1502 break;
1503
1504 Current = Tmp;
1505 }
1506 if (Start == Current) {
1507 setError("Got empty plain scalar", Start);
1508 return false;
1509 }
1510 Token T;
1512 T.Range = StringRef(Start, Current - Start);
1513 TokenQueue.push_back(T);
1514
1515
1516 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1517
1518 IsSimpleKeyAllowed = false;
1519 IsAdjacentValueAllowedInFlow = false;
1520
1521 return true;
1522}
1523
1524bool Scanner::scanAliasOrAnchor(bool IsAlias) {
1526 unsigned ColStart = Column;
1527 skip(1);
1528 while (Current != End) {
1529 if ( *Current == '[' || *Current == ']'
1530 || *Current == '{' || *Current == '}'
1531 || *Current == ','
1532 || *Current == ':')
1533 break;
1535 if (i == Current)
1536 break;
1537 Current = i;
1538 ++Column;
1539 }
1540
1541 if (Start + 1 == Current) {
1542 setError("Got empty alias or anchor", Start);
1543 return false;
1544 }
1545
1546 Token T;
1548 T.Range = StringRef(Start, Current - Start);
1549 TokenQueue.push_back(T);
1550
1551
1552 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1553
1554 IsSimpleKeyAllowed = false;
1555 IsAdjacentValueAllowedInFlow = false;
1556
1557 return true;
1558}
1559
1560bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
1561 char &ChompingIndicator,
1562 unsigned &IndentIndicator,
1563 bool &IsDone) {
1564 StyleIndicator = scanBlockStyleIndicator();
1565 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
1566 return false;
1567 return true;
1568}
1569
1570char Scanner::scanBlockStyleIndicator() {
1571 char Indicator = ' ';
1572 if (Current != End && (*Current == '>' || *Current == '|')) {
1573 Indicator = *Current;
1574 skip(1);
1575 }
1576 return Indicator;
1577}
1578
1579char Scanner::scanBlockChompingIndicator() {
1580 char Indicator = ' ';
1581 if (Current != End && (*Current == '+' || *Current == '-')) {
1582 Indicator = *Current;
1583 skip(1);
1584 }
1585 return Indicator;
1586}
1587
1588
1589
1590
1591
1593 unsigned LineBreaks, StringRef Str) {
1594 if (ChompingIndicator == '-')
1595 return 0;
1596 if (ChompingIndicator == '+')
1597 return LineBreaks;
1598
1599 return Str.empty() ? 0 : 1;
1600}
1601
1602unsigned Scanner::scanBlockIndentationIndicator() {
1603 unsigned Indent = 0;
1604 if (Current != End && (*Current >= '1' && *Current <= '9')) {
1605 Indent = unsigned(*Current - '0');
1606 skip(1);
1607 }
1608 return Indent;
1609}
1610
1611bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
1612 unsigned &IndentIndicator, bool &IsDone) {
1613 auto Start = Current;
1614
1615 ChompingIndicator = scanBlockChompingIndicator();
1616 IndentIndicator = scanBlockIndentationIndicator();
1617
1618 if (ChompingIndicator == ' ')
1619 ChompingIndicator = scanBlockChompingIndicator();
1620 Current = skip_while(&Scanner::skip_s_white, Current);
1621 skipComment();
1622
1623 if (Current == End) {
1624 Token T;
1626 T.Range = StringRef(Start, Current - Start);
1627 TokenQueue.push_back(T);
1628 IsDone = true;
1629 return true;
1630 }
1631
1632 if (!consumeLineBreakIfPresent()) {
1633 setError("Expected a line break after block scalar header", Current);
1634 return false;
1635 }
1636 return true;
1637}
1638
1639bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
1640 unsigned BlockExitIndent,
1641 unsigned &LineBreaks, bool &IsDone) {
1642 unsigned MaxAllSpaceLineCharacters = 0;
1644
1645 while (true) {
1646 advanceWhile(&Scanner::skip_s_space);
1647 if (skip_nb_char(Current) != Current) {
1648
1649 if (Column <= BlockExitIndent) {
1650 IsDone = true;
1651 return true;
1652 }
1653
1654 BlockIndent = Column;
1655 if (MaxAllSpaceLineCharacters > BlockIndent) {
1657 "Leading all-spaces line must be smaller than the block indent",
1658 LongestAllSpaceLine);
1659 return false;
1660 }
1661 return true;
1662 }
1663 if (skip_b_break(Current) != Current &&
1664 Column > MaxAllSpaceLineCharacters) {
1665
1666
1667 MaxAllSpaceLineCharacters = Column;
1668 LongestAllSpaceLine = Current;
1669 }
1670
1671
1672 if (Current == End) {
1673 IsDone = true;
1674 return true;
1675 }
1676
1677 if (!consumeLineBreakIfPresent()) {
1678 IsDone = true;
1679 return true;
1680 }
1681 ++LineBreaks;
1682 }
1683 return true;
1684}
1685
1686bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
1687 unsigned BlockExitIndent, bool &IsDone) {
1688
1689 while (Column < BlockIndent) {
1690 auto I = skip_s_space(Current);
1691 if (I == Current)
1692 break;
1693 Current = I;
1694 ++Column;
1695 }
1696
1697 if (skip_nb_char(Current) == Current)
1698 return true;
1699
1700 if (Column <= BlockExitIndent) {
1701 IsDone = true;
1702 return true;
1703 }
1704
1705 if (Column < BlockIndent) {
1706 if (Current != End && *Current == '#') {
1707 IsDone = true;
1708 return true;
1709 }
1710 setError("A text line is less indented than the block scalar", Current);
1711 return false;
1712 }
1713 return true;
1714}
1715
1716bool Scanner::scanBlockScalar(bool IsLiteral) {
1717 assert(*Current == '|' || *Current == '>');
1718 char StyleIndicator;
1719 char ChompingIndicator;
1720 unsigned BlockIndent;
1721 bool IsDone = false;
1722 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
1723 IsDone))
1724 return false;
1725 if (IsDone)
1726 return true;
1727 bool IsFolded = StyleIndicator == '>';
1728
1729 const auto *Start = Current;
1730 unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
1731 unsigned LineBreaks = 0;
1732 if (BlockIndent == 0) {
1733 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
1734 IsDone))
1735 return false;
1736 }
1737
1738
1739 SmallString<256> Str;
1740 while (!IsDone) {
1741 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
1742 return false;
1743 if (IsDone)
1744 break;
1745
1746
1747 auto LineStart = Current;
1748 advanceWhile(&Scanner::skip_nb_char);
1749 if (LineStart != Current) {
1750 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
1751
1752
1753
1754 if (LineBreaks == 1) {
1755 Str.append(LineBreaks,
1756 isLineEmpty(StringRef(LineStart, Current - LineStart))
1757 ? '\n'
1758 : ' ');
1759 }
1760
1761
1762
1763
1764 LineBreaks--;
1765 }
1766 Str.append(LineBreaks, '\n');
1767 Str.append(StringRef(LineStart, Current - LineStart));
1768 LineBreaks = 0;
1769 }
1770
1771
1772 if (Current == End)
1773 break;
1774
1775 if (!consumeLineBreakIfPresent())
1776 break;
1777 ++LineBreaks;
1778 }
1779
1780 if (Current == End && !LineBreaks)
1781
1782 LineBreaks = 1;
1784
1785
1786 if (!FlowLevel)
1787 IsSimpleKeyAllowed = true;
1788 IsAdjacentValueAllowedInFlow = false;
1789
1790 Token T;
1792 T.Range = StringRef(Start, Current - Start);
1793 T.Value = std::string(Str);
1794 TokenQueue.push_back(T);
1795 return true;
1796}
1797
1798bool Scanner::scanTag() {
1800 unsigned ColStart = Column;
1801 skip(1);
1802 if (Current == End || isBlankOrBreak(Current));
1803 else if (*Current == '<') {
1804 skip(1);
1805 scan_ns_uri_char();
1807 return false;
1808 } else {
1809
1810 Current = skip_while(&Scanner::skip_ns_char, Current);
1811 }
1812
1813 Token T;
1815 T.Range = StringRef(Start, Current - Start);
1816 TokenQueue.push_back(T);
1817
1818
1819 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
1820
1821 IsSimpleKeyAllowed = false;
1822 IsAdjacentValueAllowedInFlow = false;
1823
1824 return true;
1825}
1826
1827bool Scanner::fetchMoreTokens() {
1828 if (IsStartOfStream)
1829 return scanStreamStart();
1830
1831 scanToNextToken();
1832
1833 if (Current == End)
1834 return scanStreamEnd();
1835
1836 removeStaleSimpleKeyCandidates();
1837
1838 unrollIndent(Column);
1839
1840 if (Column == 0 && *Current == '%')
1841 return scanDirective();
1842
1843 if (Column == 0 && Current + 4 <= End
1844 && *Current == '-'
1845 && *(Current + 1) == '-'
1846 && *(Current + 2) == '-'
1847 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1848 return scanDocumentIndicator(true);
1849
1850 if (Column == 0 && Current + 4 <= End
1851 && *Current == '.'
1852 && *(Current + 1) == '.'
1853 && *(Current + 2) == '.'
1854 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1855 return scanDocumentIndicator(false);
1856
1857 if (*Current == '[')
1858 return scanFlowCollectionStart(true);
1859
1860 if (*Current == '{')
1861 return scanFlowCollectionStart(false);
1862
1863 if (*Current == ']')
1864 return scanFlowCollectionEnd(true);
1865
1866 if (*Current == '}')
1867 return scanFlowCollectionEnd(false);
1868
1869 if (*Current == ',')
1870 return scanFlowEntry();
1871
1872 if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
1873 return scanBlockEntry();
1874
1875 if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
1876 return scanKey();
1877
1878 if (*Current == ':' &&
1879 (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
1880 return scanValue();
1881
1882 if (*Current == '*')
1883 return scanAliasOrAnchor(true);
1884
1885 if (*Current == '&')
1886 return scanAliasOrAnchor(false);
1887
1888 if (*Current == '!')
1889 return scanTag();
1890
1891 if (*Current == '|' && !FlowLevel)
1892 return scanBlockScalar(true);
1893
1894 if (*Current == '>' && !FlowLevel)
1895 return scanBlockScalar(false);
1896
1897 if (*Current == '\'')
1898 return scanFlowScalar(false);
1899
1900 if (*Current == '"')
1901 return scanFlowScalar(true);
1902
1903
1904 StringRef FirstChar(Current, 1);
1905 if ((!isBlankOrBreak(Current) &&
1906 FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) ||
1908 isPlainSafeNonBlank(Current + 1)))
1909 return scanPlainScalar();
1910
1911 setError("Unrecognized character while tokenizing.", Current);
1912 return false;
1913}
1914
1916 std::error_code *EC)
1917 : scanner(new Scanner(Input, SM, ShowColors, EC)) {}
1918
1920 std::error_code *EC)
1921 : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
1922
1924
1926
1930
1933 scanner->printError(Range.Start, Kind, Msg, Range);
1934}
1935
1937 if (CurrentDoc)
1939
1940
1941 scanner->getNext();
1942
1943 CurrentDoc.reset(new Document(*this));
1945}
1946
1950
1955
1958 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
1960 SourceRange = SMRange(Start, Start);
1961}
1962
1965 if (!Raw.empty() && Raw != "!") {
1966 std::string Ret;
1968 Ret = std::string(Doc->getTagMap().find("!")->second);
1969 Ret += Raw.substr(1);
1970 return Ret;
1972 Ret = std::string(Doc->getTagMap().find("!!")->second);
1973 Ret += Raw.substr(2);
1974 return Ret;
1975 } else {
1977 std::map<StringRef, StringRef>::const_iterator It =
1978 Doc->getTagMap().find(TagHandle);
1979 if (It != Doc->getTagMap().end())
1980 Ret = std::string(It->second);
1981 else {
1984 T.Range = TagHandle;
1985 setError(Twine("Unknown tag handle ") + TagHandle, T);
1986 }
1988 return Ret;
1989 }
1990 }
1991
1994 return "tag:yaml.org,2002:null";
1997
1998 return "tag:yaml.org,2002:str";
2000 return "tag:yaml.org,2002:map";
2002 return "tag:yaml.org,2002:seq";
2003 }
2004
2005 return "";
2006}
2007
2009 return Doc->peekNext();
2010}
2011
2013 return Doc->getNext();
2014}
2015
2017 return Doc->parseBlockNode();
2018}
2019
2021 return Doc->NodeAllocator;
2022}
2023
2025 Doc->setError(Msg, Tok);
2026}
2027
2029 return Doc->failed();
2030}
2031
2033 if (Value[0] == '"')
2034 return getDoubleQuotedValue(Value, Storage);
2035 if (Value[0] == '\'')
2036 return getSingleQuotedValue(Value, Storage);
2037 return getPlainValue(Value, Storage);
2038}
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2058 UnescapeCallback) {
2059 size_t I = UnquotedValue.find_first_of(LookupChars);
2061 return UnquotedValue;
2062
2063 Storage.clear();
2065 char LastNewLineAddedAs = '\0';
2067 if (UnquotedValue[I] != '\r' && UnquotedValue[I] != '\n') {
2069 UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage);
2070 LastNewLineAddedAs = '\0';
2071 continue;
2072 }
2073 if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(" \t", I);
2077 LastNewLineAddedAs = ' ';
2078 } else {
2079
2080
2081
2082
2083 switch (LastNewLineAddedAs) {
2084 case ' ':
2086 Storage.back() = '\n';
2087 LastNewLineAddedAs = '\n';
2088 break;
2089 case '\n':
2092 break;
2093 default:
2095 LastNewLineAddedAs = ' ';
2096 break;
2097 }
2098 }
2099
2100 if (UnquotedValue.substr(I, 2) == "\r\n")
2101 I++;
2102 UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim(" \t");
2103 }
2106}
2107
2108StringRef
2109ScalarNode::getDoubleQuotedValue(StringRef RawValue,
2110 SmallVectorImpl &Storage) const {
2111 assert(RawValue.size() >= 2 && RawValue.front() == '"' &&
2112 RawValue.back() == '"');
2113 StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
2114
2115 auto UnescapeFunc = [this](StringRef UnquotedValue,
2116 SmallVectorImpl &Storage) {
2118 if (UnquotedValue.size() == 1) {
2119 Token T;
2120 T.Range = UnquotedValue;
2121 setError("Unrecognized escape code", T);
2122 Storage.clear();
2123 return StringRef();
2124 }
2125 UnquotedValue = UnquotedValue.drop_front(1);
2126 switch (UnquotedValue[0]) {
2127 default: {
2128 Token T;
2130 setError("Unrecognized escape code", T);
2131 Storage.clear();
2132 return StringRef();
2133 }
2134 case '\r':
2135
2136 if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\n')
2137 UnquotedValue = UnquotedValue.drop_front(1);
2138 [[fallthrough]];
2139 case '\n':
2141 case '0':
2143 break;
2144 case 'a':
2146 break;
2147 case 'b':
2149 break;
2150 case 't':
2151 case 0x09:
2153 break;
2154 case 'n':
2156 break;
2157 case 'v':
2159 break;
2160 case 'f':
2162 break;
2163 case 'r':
2165 break;
2166 case 'e':
2168 break;
2169 case ' ':
2171 break;
2172 case '"':
2174 break;
2175 case '/':
2177 break;
2178 case '\\':
2180 break;
2181 case 'N':
2183 break;
2184 case '_':
2186 break;
2187 case 'L':
2189 break;
2190 case 'P':
2192 break;
2193 case 'x': {
2194 if (UnquotedValue.size() < 3)
2195
2196 break;
2197 unsigned int UnicodeScalarValue;
2198 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
2199
2200 UnicodeScalarValue = 0xFFFD;
2201 encodeUTF8(UnicodeScalarValue, Storage);
2203 }
2204 case 'u': {
2205 if (UnquotedValue.size() < 5)
2206
2207 break;
2208 unsigned int UnicodeScalarValue;
2209 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
2210
2211 UnicodeScalarValue = 0xFFFD;
2212 encodeUTF8(UnicodeScalarValue, Storage);
2214 }
2215 case 'U': {
2216 if (UnquotedValue.size() < 9)
2217
2218 break;
2219 unsigned int UnicodeScalarValue;
2220 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
2221
2222 UnicodeScalarValue = 0xFFFD;
2223 encodeUTF8(UnicodeScalarValue, Storage);
2225 }
2226 }
2228 };
2229
2230 return parseScalarValue(UnquotedValue, Storage, "\\\r\n", UnescapeFunc);
2231}
2232
2233StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
2234 SmallVectorImpl &Storage) {
2235 assert(RawValue.size() >= 2 && RawValue.front() == '\'' &&
2236 RawValue.back() == '\'');
2237 StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
2238
2239 auto UnescapeFunc = [](StringRef UnquotedValue,
2240 SmallVectorImpl &Storage) {
2244 };
2245
2246 return parseScalarValue(UnquotedValue, Storage, "'\r\n", UnescapeFunc);
2247}
2248
2249StringRef ScalarNode::getPlainValue(StringRef RawValue,
2250 SmallVectorImpl &Storage) {
2251
2252
2253
2254 RawValue = RawValue.rtrim("\r\n \t");
2255 return parseScalarValue(RawValue, Storage, "\r\n", nullptr);
2256}
2257
2259 if (Key)
2260 return Key;
2261
2262 {
2268 }
2270 getNext();
2271 }
2272
2273
2277 }
2278
2279
2281}
2282
2284 if (Value)
2285 return Value;
2286
2288 Key->skip();
2289 else {
2292 }
2293
2296
2297
2298 {
2306 }
2307
2309 setError("Unexpected token in Key Value.", t);
2311 }
2312 getNext();
2313 }
2314
2315
2319 }
2320
2321
2323}
2324
2325void MappingNode::increment() {
2327 IsAtEnd = true;
2328 CurrentEntry = nullptr;
2329 return;
2330 }
2331 if (CurrentEntry) {
2332 CurrentEntry->skip();
2334 IsAtEnd = true;
2335 CurrentEntry = nullptr;
2336 return;
2337 }
2338 }
2341
2343 } else if (Type == MT_Block) {
2344 switch (T.Kind) {
2347 IsAtEnd = true;
2348 CurrentEntry = nullptr;
2349 break;
2350 default:
2351 setError("Unexpected token. Expected Key or Block End", T);
2352 [[fallthrough]];
2354 IsAtEnd = true;
2355 CurrentEntry = nullptr;
2356 }
2357 } else {
2358 switch (T.Kind) {
2360
2362 return increment();
2365 [[fallthrough]];
2367
2368 IsAtEnd = true;
2369 CurrentEntry = nullptr;
2370 break;
2371 default:
2372 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
2373 "Mapping End."
2374 , T);
2375 IsAtEnd = true;
2376 CurrentEntry = nullptr;
2377 }
2378 }
2379}
2380
2383 IsAtEnd = true;
2384 CurrentEntry = nullptr;
2385 return;
2386 }
2387 if (CurrentEntry)
2388 CurrentEntry->skip();
2391 switch (T.Kind) {
2395 if (!CurrentEntry) {
2396 IsAtEnd = true;
2397 CurrentEntry = nullptr;
2398 }
2399 break;
2402 IsAtEnd = true;
2403 CurrentEntry = nullptr;
2404 break;
2405 default:
2406 setError( "Unexpected token. Expected Block Entry or Block End."
2407 , T);
2408 [[fallthrough]];
2410 IsAtEnd = true;
2411 CurrentEntry = nullptr;
2412 }
2414 switch (T.Kind) {
2418 if (!CurrentEntry) {
2419 IsAtEnd = true;
2420 CurrentEntry = nullptr;
2421 }
2422 break;
2423 default:
2425 IsAtEnd = true;
2426 CurrentEntry = nullptr;
2427 }
2428 } else if (SeqType == ST_Flow) {
2429 switch (T.Kind) {
2431
2433 WasPreviousTokenFlowEntry = true;
2437 [[fallthrough]];
2439
2440 IsAtEnd = true;
2441 CurrentEntry = nullptr;
2442 break;
2446 setError("Could not find closing ]!", T);
2447
2448 IsAtEnd = true;
2449 CurrentEntry = nullptr;
2450 break;
2451 default:
2452 if (!WasPreviousTokenFlowEntry) {
2453 setError("Expected , between entries!", T);
2454 IsAtEnd = true;
2455 CurrentEntry = nullptr;
2456 break;
2457 }
2458
2460 if (!CurrentEntry) {
2461 IsAtEnd = true;
2462 }
2463 WasPreviousTokenFlowEntry = false;
2464 break;
2465 }
2466 }
2467}
2468
2470
2471 TagMap["!"] = "!";
2472 TagMap["!!"] = "tag:yaml.org,2002:";
2473
2474 if (parseDirectives())
2478 getNext();
2479}
2480
2482 if (stream.scanner->failed())
2483 return false;
2484 if (!Root && ())
2485 return false;
2486 Root->skip();
2489 return false;
2491 getNext();
2492 return skip();
2493 }
2494 return true;
2495}
2496
2497Token &Document::peekNext() {
2498 return stream.scanner->peekNext();
2499}
2500
2501Token Document::getNext() {
2502 return stream.scanner->getNext();
2503}
2504
2505void Document::setError(const Twine &Message, Token &Location) const {
2506 stream.scanner->setError(Message, Location.Range.begin());
2507}
2508
2509bool Document::failed() const {
2510 return stream.scanner->failed();
2511}
2512
2515
2516 Token AnchorInfo;
2518parse_property:
2519 switch (T.Kind) {
2521 getNext();
2522 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
2525 setError("Already encountered an anchor for this node!", T);
2526 return nullptr;
2527 }
2528 AnchorInfo = getNext();
2529 T = peekNext();
2530 goto parse_property;
2533 setError("Already encountered a tag for this node!", T);
2534 return nullptr;
2535 }
2536 TagInfo = getNext();
2537 T = peekNext();
2538 goto parse_property;
2539 default:
2540 break;
2541 }
2542
2543 switch (T.Kind) {
2545
2546
2547
2548 return new (NodeAllocator) SequenceNode( stream.CurrentDoc
2553 getNext();
2554 return new (NodeAllocator)
2560 getNext();
2561 return new (NodeAllocator)
2567 getNext();
2568 return new (NodeAllocator)
2574 getNext();
2575 return new (NodeAllocator)
2581 getNext();
2582 return new (NodeAllocator)
2586 , T.Range);
2588 getNext();
2589 StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
2591 return new (NodeAllocator)
2593 TagInfo.Range, StrCopy, T.Range);
2594 }
2596
2597 return new (NodeAllocator)
2605 default:
2606
2607
2608 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2613 return new (NodeAllocator) NullNode(stream.CurrentDoc);
2614
2615 setError("Unexpected token", T);
2616 return nullptr;
2617 }
2619 return nullptr;
2620 }
2622 return nullptr;
2623}
2624
2625bool Document::parseDirectives() {
2626 bool isDirective = false;
2627 while (true) {
2630 parseTAGDirective();
2631 isDirective = true;
2633 parseYAMLDirective();
2634 isDirective = true;
2635 } else {
2636 break;
2637 }
2638 }
2639 return isDirective;
2640}
2641
2642void Document::parseYAMLDirective() {
2643 getNext();
2644}
2645
2646void Document::parseTAGDirective() {
2647 Token Tag = getNext();
2649
2650 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
2651 std::size_t HandleEnd = T.find_first_of(" \t");
2652 StringRef TagHandle = T.substr(0, HandleEnd);
2653 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
2654 TagMap[TagHandle] = TagPrefix;
2655}
2656
2657bool Document::expectToken(int TK) {
2658 Token T = getNext();
2659 if (T.Kind != TK) {
2660 setError("Unexpected token", T);
2661 return false;
2662 }
2663 return true;
2664}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
static Cursor skipComment(Cursor C)
Skip a line comment and return the updated cursor.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static bool encodeUTF8(size_t CodePoint, char *Output)
This file defines the SmallString class.
This file defines the SmallVector class.
static EncodingInfo getUnicodeEncoding(StringRef Input)
getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.
Definition YAMLParser.cpp:60
static bool is_ns_hex_digit(const char C)
Definition YAMLParser.cpp:1007
static bool is_ns_word_char(const char C)
Definition YAMLParser.cpp:1009
static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)
Get the number of line breaks after chomping.
Definition YAMLParser.cpp:1592
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
Definition YAMLParser.cpp:194
UnicodeEncodingForm
Definition YAMLParser.cpp:41
@ UEF_UTF32_LE
UTF-32 Little Endian.
Definition YAMLParser.cpp:42
@ UEF_UTF16_BE
UTF-16 Big Endian.
Definition YAMLParser.cpp:45
@ UEF_UTF16_LE
UTF-16 Little Endian.
Definition YAMLParser.cpp:44
@ UEF_UTF32_BE
UTF-32 Big Endian.
Definition YAMLParser.cpp:43
@ UEF_UTF8
UTF-8 or ascii.
Definition YAMLParser.cpp:46
@ UEF_Unknown
Not a valid Unicode encoding.
Definition YAMLParser.cpp:47
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
Definition YAMLParser.cpp:1386
static StringRef parseScalarValue(StringRef UnquotedValue, SmallVectorImpl< char > &Storage, StringRef LookupChars, std::function< StringRef(StringRef, SmallVectorImpl< char > &)> UnescapeCallback)
parseScalarValue - A common parsing routine for all flow scalar styles.
Definition YAMLParser.cpp:2055
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.
Definition YAMLParser.cpp:52
BumpPtrList< Token > TokenQueueT
Definition YAMLParser.cpp:164
static UTF8Decoded decodeUTF8(StringRef Range)
Definition YAMLParser.cpp:196
IteratorImpl< T, typename list_type::iterator > iterator
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Tagged union holding either a T or a Error.
const char * getBufferStart() const
const char * getBufferEnd() const
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
Represents a location in source code.
static SMLoc getFromPointer(const char *Ptr)
Represents a range in source code.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.
LLVM_ABI void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const
Emit a message about the specified location with the specified string.
unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)
Add a new source buffer to this source manager.
StringRef - Represent a constant reference to a string, i.e.
LLVM_ABI size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
static constexpr size_t npos
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
constexpr bool empty() const
empty - Check if the string is empty.
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
char back() const
back - Get the last character in the string.
constexpr size_t size() const
size - Get the string size.
char front() const
front - Get the first character in the string.
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
StringRef ltrim(char Char) const
Return string with consecutive Char characters starting from the the left removed.
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
StringRef copy(Allocator &A) const
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
This class implements an extremely fast bulk output stream that can only output to a stream.
Represents an alias to a Node with an anchor.
A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...
LLVM_ABI Node * parseBlockNode()
Root for parsing a node. Returns a single node.
Definition YAMLParser.cpp:2513
LLVM_ABI bool skip()
Finish parsing the current document and return true if there are more.
Definition YAMLParser.cpp:2481
Node * getRoot()
Parse and return the root level node.
LLVM_ABI Document(Stream &ParentStream)
Definition YAMLParser.cpp:2469
The Input class is used to parse a yaml document into in-memory structs and vectors.
Node * getValue()
Parse and return the value.
Definition YAMLParser.cpp:2283
Node * getKey()
Parse and return the key.
Definition YAMLParser.cpp:2258
Represents a YAML map created from either a block map for a flow map.
@ MT_Inline
An inline mapping node is used for "[key: value]".
Abstract base class for all Nodes.
StringRef getRawTag() const
Get the tag as it was written in the document.
unsigned int getType() const
std::string getVerbatimTag() const
Get the verbatium tag for a given Node.
Definition YAMLParser.cpp:1963
bool failed() const
Definition YAMLParser.cpp:2028
std::unique_ptr< Document > & Doc
Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)
BumpPtrAllocator & getAllocator()
Definition YAMLParser.cpp:2020
Node * parseBlockNode()
Definition YAMLParser.cpp:2016
void setError(const Twine &Message, Token &Location) const
Definition YAMLParser.cpp:2024
Token getNext()
Definition YAMLParser.cpp:2012
Token & peekNext()
Definition YAMLParser.cpp:2008
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
Definition YAMLParser.cpp:2032
Scans YAML tokens from a MemoryBuffer.
Definition YAMLParser.cpp:247
Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)
Definition YAMLParser.cpp:857
void setError(const Twine &Message, StringRef::iterator Position)
Definition YAMLParser.cpp:265
Token getNext()
Parse the next token and pop it from the queue.
Definition YAMLParser.cpp:913
bool failed()
Returns true if an error occurred while parsing.
Definition YAMLParser.cpp:281
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges={})
Definition YAMLParser.cpp:260
Token & peekNext()
Parse the next token and return it without popping it.
Definition YAMLParser.cpp:886
Represents a YAML sequence created from either a block sequence for a flow sequence.
void increment()
Definition YAMLParser.cpp:2381
This class represents a YAML stream potentially containing multiple documents.
LLVM_ABI document_iterator end()
Definition YAMLParser.cpp:1947
LLVM_ABI document_iterator begin()
Definition YAMLParser.cpp:1936
LLVM_ABI Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)
This keeps a reference to the string referenced by Input.
Definition YAMLParser.cpp:1915
LLVM_ABI bool failed()
Definition YAMLParser.cpp:1925
LLVM_ABI void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)
Definition YAMLParser.cpp:1927
LLVM_ABI void skip()
Definition YAMLParser.cpp:1951
Iterator abstraction for Documents over a Stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
NodeAddr< FuncNode * > Func
LLVM_ABI bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
LLVM_ABI bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Definition YAMLParser.cpp:607
LLVM_ABI std::optional< bool > parseBool(StringRef S)
Parse S as a bool according to https://yaml.org/type/bool.html.
Definition YAMLParser.cpp:771
LLVM_ABI bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything.
Definition YAMLParser.cpp:691
void skip(CollectionType &C)
LLVM_ABI std::string escape(StringRef Input, bool EscapePrintable=true)
Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...
Definition YAMLParser.cpp:704
This is an optimization pass for GlobalISel generic memory operations.
std::error_code make_error_code(BitcodeError E)
testing::Matcher< const detail::ErrorHolder & > Failed()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool isAlpha(char C)
Checks if character C is a valid letter as classified by "C" locale.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
bool isa(const From &Val)
isa - Return true if the parameter to the template is an instance of one of the template type argu...
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr Next
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
AllocatorList< T, BumpPtrAllocator > BumpPtrList
Token - A single YAML token.
Definition YAMLParser.cpp:124
TokenKind
Definition YAMLParser.cpp:125
@ TK_StreamEnd
Definition YAMLParser.cpp:128
@ TK_BlockMappingStart
Definition YAMLParser.cpp:136
@ TK_BlockSequenceStart
Definition YAMLParser.cpp:135
@ TK_FlowEntry
Definition YAMLParser.cpp:137
@ TK_DocumentStart
Definition YAMLParser.cpp:131
@ TK_Value
Definition YAMLParser.cpp:143
@ TK_Tag
Definition YAMLParser.cpp:148
@ TK_VersionDirective
Definition YAMLParser.cpp:129
@ TK_StreamStart
Definition YAMLParser.cpp:127
@ TK_Key
Definition YAMLParser.cpp:142
@ TK_Error
Definition YAMLParser.cpp:126
@ TK_BlockScalar
Definition YAMLParser.cpp:145
@ TK_BlockEntry
Definition YAMLParser.cpp:133
@ TK_BlockEnd
Definition YAMLParser.cpp:134
@ TK_Anchor
Definition YAMLParser.cpp:147
@ TK_Scalar
Definition YAMLParser.cpp:144
@ TK_DocumentEnd
Definition YAMLParser.cpp:132
@ TK_FlowSequenceStart
Definition YAMLParser.cpp:138
@ TK_TagDirective
Definition YAMLParser.cpp:130
@ TK_FlowSequenceEnd
Definition YAMLParser.cpp:139
@ TK_FlowMappingEnd
Definition YAMLParser.cpp:141
@ TK_FlowMappingStart
Definition YAMLParser.cpp:140
@ TK_Alias
Definition YAMLParser.cpp:146
enum llvm::yaml::Token::TokenKind Kind
std::string Value
The value of a block scalar node.
Definition YAMLParser.cpp:156
StringRef Range
A string of length 0 or more whose begin() points to the logical location of the token in the input.
Definition YAMLParser.cpp:153