LLVM: lib/Support/YAMLParser.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

29#include

30#include

31#include

32#include

33#include

34#include

35#include <system_error>

36#include

37

38using namespace llvm;

39using namespace yaml;

40

49

50

51

52using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;

53

54

55

56

57

58

59

61 if (Input.empty())

63

65 case 0x00:

66 if (Input.size() >= 4) {

67 if ( Input[1] == 0

73 }

74

75 if (Input.size() >= 2 && Input[1] != 0)

78 case 0xFF:

79 if ( Input.size() >= 4

84

88 case 0xFE:

92 case 0xEF:

93 if ( Input.size() >= 3

98 }

99

100

103

104 if (Input.size() >= 2 && Input[1] == 0)

106

108}

109

110

111void Node::anchor() {}

112void NullNode::anchor() {}

113void ScalarNode::anchor() {}

114void BlockScalarNode::anchor() {}

115void KeyValueNode::anchor() {}

116void MappingNode::anchor() {}

117void SequenceNode::anchor() {}

118void AliasNode::anchor() {}

119

120namespace llvm {

121namespace yaml {

122

123

150

151

152

154

155

157

159};

160

161}

162}

163

165

166namespace {

167

168

169

170

171

172

173

174

175

176

177struct SimpleKey {

179 unsigned Column = 0;

180 unsigned Line = 0;

181 unsigned FlowLevel = 0;

182 bool IsRequired = false;

183

185 return Tok == Other.Tok;

186 }

187};

188

189}

190

191

192

193

195

199

200

201 if (Position < End && (*Position & 0x80) == 0) {

202 return {*Position, 1};

203 }

204

205

206 if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&

207 ((*(Position + 1) & 0xC0) == 0x80)) {

208 uint32_t codepoint = ((*Position & 0x1F) << 6) |

209 (*(Position + 1) & 0x3F);

210 if (codepoint >= 0x80)

211 return {codepoint, 2};

212 }

213

214

215 if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&

216 ((*(Position + 1) & 0xC0) == 0x80) &&

217 ((*(Position + 2) & 0xC0) == 0x80)) {

218 uint32_t codepoint = ((*Position & 0x0F) << 12) |

219 ((*(Position + 1) & 0x3F) << 6) |

220 (*(Position + 2) & 0x3F);

221

222

223 if (codepoint >= 0x800 &&

224 (codepoint < 0xD800 || codepoint > 0xDFFF))

225 return {codepoint, 3};

226 }

227

228

229 if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&

230 ((*(Position + 1) & 0xC0) == 0x80) &&

231 ((*(Position + 2) & 0xC0) == 0x80) &&

232 ((*(Position + 3) & 0xC0) == 0x80)) {

233 uint32_t codepoint = ((*Position & 0x07) << 18) |

234 ((*(Position + 1) & 0x3F) << 12) |

235 ((*(Position + 2) & 0x3F) << 6) |

236 (*(Position + 3) & 0x3F);

237 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)

238 return {codepoint, 4};

239 }

240 return {0, 0};

241}

242

243namespace llvm {

244namespace yaml {

245

246

248public:

250 std::error_code *EC = nullptr);

252 std::error_code *EC = nullptr);

253

254

256

257

259

264

266 if (Position >= End)

267 Position = End - 1;

268

269

270 if (EC)

272

273

274

275 if (!Failed)

277 Failed = true;

278 }

279

280

284

285private:

287

289 return StringRef(Current, End - Current);

290 }

291

292

293

294

295

296

297

299 return ::decodeUTF8(StringRef(Position, End - Position));

300 }

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

330

331

332

333

334

335

336

338

339

340

341

342

343

344

346

347

348

349

350

351

352

354

355

356

357

358

359

360

362

364

365

366

367

368

369

372

373

374

375 void advanceWhile(SkipWhileFunc Func);

376

377

378

379

380 void scan_ns_uri_char();

381

382

383

384

386

387

389

390

391

393

394

395

397

398

399 bool isLineEmpty(StringRef Line);

400

401

402

403

404 bool consumeLineBreakIfPresent();

405

406

408 , unsigned AtColumn

409 , bool IsRequired);

410

411

412

413

414

415 void removeStaleSimpleKeyCandidates();

416

417

418 void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);

419

420

421

422 bool unrollIndent(int ToColumn);

423

424

425

426 bool rollIndent( int ToColumn

429

430

431

433

434

435 void scanToNextToken();

436

437

438 bool scanStreamStart();

439

440

441 bool scanStreamEnd();

442

443

444 bool scanDirective();

445

446

447 bool scanDocumentIndicator(bool IsStart);

448

449

450 bool scanFlowCollectionStart(bool IsSequence);

451

452

453 bool scanFlowCollectionEnd(bool IsSequence);

454

455

456 bool scanFlowEntry();

457

458

459 bool scanBlockEntry();

460

461

462 bool scanKey();

463

464

465 bool scanValue();

466

467

468 bool scanFlowScalar(bool IsDoubleQuoted);

469

470

471 bool scanPlainScalar();

472

473

474 bool scanAliasOrAnchor(bool IsAlias);

475

476

477 bool scanBlockScalar(bool IsLiteral);

478

479

480

481

482

483

484

485 bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,

486 unsigned &IndentIndicator, bool &IsDone);

487

488

489 char scanBlockStyleIndicator();

490

491

492 char scanBlockChompingIndicator();

493

494

495 unsigned scanBlockIndentationIndicator();

496

497

498

499

500 bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,

501 bool &IsDone);

502

503

504

505

506 bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,

507 unsigned &LineBreaks, bool &IsDone);

508

509

510

511

512 bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,

513 bool &IsDone);

514

515

516 bool scanTag();

517

518

519 bool fetchMoreTokens();

520

521

523

524

526

527

529

530

532

533

534 int Indent;

535

536

537 unsigned Column;

538

539

540 unsigned Line;

541

542

543 unsigned FlowLevel;

544

545

546 bool IsStartOfStream;

547

548

549 bool IsSimpleKeyAllowed;

550

551

552

553 bool IsAdjacentValueAllowedInFlow;

554

555

557

558

559 bool ShowColors;

560

561

562

563

565

566

568

569

571

572 std::error_code *EC;

573};

574

575}

576}

577

578

581 if (UnicodeScalarValue <= 0x7F) {

582 Result.push_back(UnicodeScalarValue & 0x7F);

583 } else if (UnicodeScalarValue <= 0x7FF) {

584 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);

585 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);

586 Result.push_back(FirstByte);

587 Result.push_back(SecondByte);

588 } else if (UnicodeScalarValue <= 0xFFFF) {

589 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);

590 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);

591 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);

592 Result.push_back(FirstByte);

593 Result.push_back(SecondByte);

594 Result.push_back(ThirdByte);

595 } else if (UnicodeScalarValue <= 0x10FFFF) {

596 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);

597 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);

598 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);

599 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);

600 Result.push_back(FirstByte);

601 Result.push_back(SecondByte);

602 Result.push_back(ThirdByte);

603 Result.push_back(FourthByte);

604 }

605}

606

610 while (true) {

612 switch (T.Kind) {

614 OS << "Stream-Start: ";

615 break;

617 OS << "Stream-End: ";

618 break;

620 OS << "Version-Directive: ";

621 break;

623 OS << "Tag-Directive: ";

624 break;

626 OS << "Document-Start: ";

627 break;

629 OS << "Document-End: ";

630 break;

632 OS << "Block-Entry: ";

633 break;

635 OS << "Block-End: ";

636 break;

638 OS << "Block-Sequence-Start: ";

639 break;

641 OS << "Block-Mapping-Start: ";

642 break;

644 OS << "Flow-Entry: ";

645 break;

647 OS << "Flow-Sequence-Start: ";

648 break;

650 OS << "Flow-Sequence-End: ";

651 break;

653 OS << "Flow-Mapping-Start: ";

654 break;

656 OS << "Flow-Mapping-End: ";

657 break;

659 OS << "Key: ";

660 break;

662 OS << "Value: ";

663 break;

665 OS << "Scalar: ";

666 break;

668 OS << "Block Scalar: ";

669 break;

671 OS << "Alias: ";

672 break;

674 OS << "Anchor: ";

675 break;

677 OS << "Tag: ";

678 break;

680 break;

681 }

682 OS << T.Range << "\n";

684 break;

686 return false;

687 }

688 return true;

689}

690

694 while (true) {

697 break;

699 return false;

700 }

701 return true;

702}

703

705 std::string EscapedInput;

707 if (*i == '\\')

708 EscapedInput += "\\\\";

709 else if (*i == '"')

710 EscapedInput += "\\\"";

711 else if (*i == 0)

712 EscapedInput += "\\0";

713 else if (*i == 0x07)

714 EscapedInput += "\\a";

715 else if (*i == 0x08)

716 EscapedInput += "\\b";

717 else if (*i == 0x09)

718 EscapedInput += "\\t";

719 else if (*i == 0x0A)

720 EscapedInput += "\\n";

721 else if (*i == 0x0B)

722 EscapedInput += "\\v";

723 else if (*i == 0x0C)

724 EscapedInput += "\\f";

725 else if (*i == 0x0D)

726 EscapedInput += "\\r";

727 else if (*i == 0x1B)

728 EscapedInput += "\\e";

729 else if ((unsigned char)*i < 0x20) {

730 std::string HexStr = utohexstr(*i);

731 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;

732 } else if (*i & 0x80) {

735 if (UnicodeScalarValue.second == 0) {

736

740

741 return EscapedInput;

742 }

743 if (UnicodeScalarValue.first == 0x85)

744 EscapedInput += "\\N";

745 else if (UnicodeScalarValue.first == 0xA0)

746 EscapedInput += "\\_";

747 else if (UnicodeScalarValue.first == 0x2028)

748 EscapedInput += "\\L";

749 else if (UnicodeScalarValue.first == 0x2029)

750 EscapedInput += "\\P";

751 else if (!EscapePrintable &&

753 EscapedInput += StringRef(i, UnicodeScalarValue.second);

754 else {

755 std::string HexStr = utohexstr(UnicodeScalarValue.first);

756 if (HexStr.size() <= 2)

757 EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;

758 else if (HexStr.size() <= 4)

759 EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;

760 else if (HexStr.size() <= 8)

761 EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;

762 }

763 i += UnicodeScalarValue.second - 1;

764 } else {

765 EscapedInput.push_back(*i);

766 }

767 }

768 return EscapedInput;

769}

770

772 switch (S.size()) {

773 case 1:

774 switch (S.front()) {

775 case 'y':

776 case 'Y':

777 return true;

778 case 'n':

779 case 'N':

780 return false;

781 default:

782 return std::nullopt;

783 }

784 case 2:

785 switch (S.front()) {

786 case 'O':

787 if (S[1] == 'N')

788 return true;

789 [[fallthrough]];

790 case 'o':

791 if (S[1] == 'n')

792 return true;

793 return std::nullopt;

794 case 'N':

795 if (S[1] == 'O')

796 return false;

797 [[fallthrough]];

798 case 'n':

799 if (S[1] == 'o')

800 return false;

801 return std::nullopt;

802 default:

803 return std::nullopt;

804 }

805 case 3:

806 switch (S.front()) {

807 case 'O':

809 return false;

810 [[fallthrough]];

811 case 'o':

813 return false;

814 return std::nullopt;

815 case 'Y':

817 return true;

818 [[fallthrough]];

819 case 'y':

821 return true;

822 return std::nullopt;

823 default:

824 return std::nullopt;

825 }

826 case 4:

827 switch (S.front()) {

828 case 'T':

830 return true;

831 [[fallthrough]];

832 case 't':

833 if (S.drop_front() == "rue")

834 return true;

835 return std::nullopt;

836 default:

837 return std::nullopt;

838 }

839 case 5:

840 switch (S.front()) {

841 case 'F':

842 if (S.drop_front() == "ALSE")

843 return false;

844 [[fallthrough]];

845 case 'f':

846 if (S.drop_front() == "alse")

847 return false;

848 return std::nullopt;

849 default:

850 return std::nullopt;

851 }

852 default:

853 return std::nullopt;

854 }

855}

856

858 std::error_code *EC)

859 : SM(sm), ShowColors(ShowColors), EC(EC) {

861}

862

864 std::error_code *EC)

865 : SM(SM_), ShowColors(ShowColors), EC(EC) {

866 init(Buffer);

867}

868

870 InputBuffer = Buffer;

873 Indent = -1;

874 Column = 0;

875 Line = 0;

876 FlowLevel = 0;

877 IsStartOfStream = true;

878 IsSimpleKeyAllowed = true;

879 IsAdjacentValueAllowedInFlow = false;

881 std::unique_ptr InputBufferOwner =

884}

885

887

888

889 bool NeedMore = false;

890 while (true) {

891 if (TokenQueue.empty() || NeedMore) {

892 if (!fetchMoreTokens()) {

893 TokenQueue.clear();

894 SimpleKeys.clear();

895 TokenQueue.push_back(Token());

896 return TokenQueue.front();

897 }

898 }

899 assert(!TokenQueue.empty() &&

900 "fetchMoreTokens lied about getting tokens!");

901

902 removeStaleSimpleKeyCandidates();

903 SimpleKey SK;

904 SK.Tok = TokenQueue.begin();

906 break;

907 else

908 NeedMore = true;

909 }

910 return TokenQueue.front();

911}

912

915

916 if (!TokenQueue.empty())

917 TokenQueue.pop_front();

918

919

920

921 if (TokenQueue.empty())

922 TokenQueue.resetAlloc();

923

924 return Ret;

925}

926

928 if (Position == End)

929 return Position;

930

931 if ( *Position == 0x09

932 || (*Position >= 0x20 && *Position <= 0x7E))

933 return Position + 1;

934

935

936 if (uint8_t(*Position) & 0x80) {

938 if ( u8d.second != 0

939 && u8d.first != 0xFEFF

940 && ( u8d.first == 0x85

941 || ( u8d.first >= 0xA0

942 && u8d.first <= 0xD7FF)

943 || ( u8d.first >= 0xE000

944 && u8d.first <= 0xFFFD)

945 || ( u8d.first >= 0x10000

946 && u8d.first <= 0x10FFFF)))

947 return Position + u8d.second;

948 }

949 return Position;

950}

951

953 if (Position == End)

954 return Position;

955 if (*Position == 0x0D) {

956 if (Position + 1 != End && *(Position + 1) == 0x0A)

957 return Position + 2;

958 return Position + 1;

959 }

960

961 if (*Position == 0x0A)

962 return Position + 1;

963 return Position;

964}

965

967 if (Position == End)

968 return Position;

969 if (*Position == ' ')

970 return Position + 1;

971 return Position;

972}

973

975 if (Position == End)

976 return Position;

977 if (*Position == ' ' || *Position == '\t')

978 return Position + 1;

979 return Position;

980}

981

983 if (Position == End)

984 return Position;

985 if (*Position == ' ' || *Position == '\t')

986 return Position;

987 return skip_nb_char(Position);

988}

989

992 while (true) {

994 if (i == Position)

995 break;

996 Position = i;

997 }

998 return Position;

999}

1000

1001void Scanner::advanceWhile(SkipWhileFunc Func) {

1002 auto Final = skip_while(Func, Current);

1003 Column += Final - Current;

1004 Current = Final;

1005}

1006

1008

1010

1011void Scanner::scan_ns_uri_char() {

1012 while (true) {

1013 if (Current == End)

1014 break;

1015 if ((*Current == '%' && Current + 2 < End &&

1018 StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") !=

1020 ++Current;

1021 ++Column;

1022 } else {

1023 break;

1024 }

1025 }

1026}

1027

1028bool Scanner::consume(uint32_t Expected) {

1029 if (Expected >= 0x80) {

1030 setError("Cannot consume non-ascii characters", Current);

1031 return false;

1032 }

1033 if (Current == End)

1034 return false;

1035 if (uint8_t(*Current) >= 0x80) {

1036 setError("Cannot consume non-ascii characters", Current);

1037 return false;

1038 }

1039 if (uint8_t(*Current) == Expected) {

1040 ++Current;

1041 ++Column;

1042 return true;

1043 }

1044 return false;

1045}

1046

1047void Scanner::skip(uint32_t Distance) {

1048 Current += Distance;

1049 Column += Distance;

1050 assert(Current <= End && "Skipped past the end");

1051}

1052

1054 if (Position == End)

1055 return false;

1056 return *Position == ' ' || *Position == '\t' || *Position == '\r' ||

1057 *Position == '\n';

1058}

1059

1061 if (Position == End || isBlankOrBreak(Position))

1062 return false;

1063 if (FlowLevel &&

1064 StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos)

1065 return false;

1066 return true;

1067}

1068

1069bool Scanner::isLineEmpty(StringRef Line) {

1070 for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)

1071 if (!isBlankOrBreak(Position))

1072 return false;

1073 return true;

1074}

1075

1076bool Scanner::consumeLineBreakIfPresent() {

1077 auto Next = skip_b_break(Current);

1078 if (Next == Current)

1079 return false;

1080 Column = 0;

1081 ++Line;

1082 Current = Next;

1083 return true;

1084}

1085

1087 , unsigned AtColumn

1088 , bool IsRequired) {

1089 if (IsSimpleKeyAllowed) {

1090 SimpleKey SK;

1091 SK.Tok = Tok;

1092 SK.Line = Line;

1093 SK.Column = AtColumn;

1094 SK.IsRequired = IsRequired;

1095 SK.FlowLevel = FlowLevel;

1096 SimpleKeys.push_back(SK);

1097 }

1098}

1099

1100void Scanner::removeStaleSimpleKeyCandidates() {

1102 i != SimpleKeys.end();) {

1103 if (i->Line != Line || i->Column + 1024 < Column) {

1104 if (i->IsRequired)

1105 setError( "Could not find expected : for simple key"

1106 , i->Tok->Range.begin());

1107 i = SimpleKeys.erase(i);

1108 } else {

1109 ++i;

1110 }

1111 }

1112}

1113

1114void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {

1115 if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)

1116 SimpleKeys.pop_back();

1117}

1118

1119bool Scanner::unrollIndent(int ToColumn) {

1120 Token T;

1121

1122 if (FlowLevel != 0)

1123 return true;

1124

1125 while (Indent > ToColumn) {

1127 T.Range = StringRef(Current, 1);

1128 TokenQueue.push_back(T);

1129 Indent = Indents.pop_back_val();

1130 }

1131

1132 return true;

1133}

1134

1135bool Scanner::rollIndent( int ToColumn

1138 if (FlowLevel)

1139 return true;

1140 if (Indent < ToColumn) {

1141 Indents.push_back(Indent);

1142 Indent = ToColumn;

1143

1144 Token T;

1146 T.Range = StringRef(Current, 0);

1147 TokenQueue.insert(InsertPoint, T);

1148 }

1149 return true;

1150}

1151

1152void Scanner::skipComment() {

1153 if (Current == End || *Current != '#')

1154 return;

1155 while (true) {

1156

1157

1159 if (I == Current)

1160 break;

1161 Current = I;

1162 ++Column;

1163 }

1164}

1165

1166void Scanner::scanToNextToken() {

1167 while (true) {

1168 while (Current != End && (*Current == ' ' || *Current == '\t')) {

1169 skip(1);

1170 }

1171

1172 skipComment();

1173

1174

1176 if (i == Current)

1177 break;

1178 Current = i;

1179 ++Line;

1180 Column = 0;

1181

1182 if (!FlowLevel)

1183 IsSimpleKeyAllowed = true;

1184 }

1185}

1186

1187bool Scanner::scanStreamStart() {

1188 IsStartOfStream = false;

1189

1191

1192 Token T;

1194 T.Range = StringRef(Current, EI.second);

1195 TokenQueue.push_back(T);

1196 Current += EI.second;

1197 return true;

1198}

1199

1200bool Scanner::scanStreamEnd() {

1201

1202 if (Column != 0) {

1203 Column = 0;

1204 ++Line;

1205 }

1206

1207 unrollIndent(-1);

1208 SimpleKeys.clear();

1209 IsSimpleKeyAllowed = false;

1210 IsAdjacentValueAllowedInFlow = false;

1211

1212 Token T;

1214 T.Range = StringRef(Current, 0);

1215 TokenQueue.push_back(T);

1216 return true;

1217}

1218

1219bool Scanner::scanDirective() {

1220

1221 unrollIndent(-1);

1222 SimpleKeys.clear();

1223 IsSimpleKeyAllowed = false;

1224 IsAdjacentValueAllowedInFlow = false;

1225

1229 Current = skip_while(&Scanner::skip_ns_char, Current);

1230 StringRef Name(NameStart, Current - NameStart);

1231 Current = skip_while(&Scanner::skip_s_white, Current);

1232

1233 Token T;

1234 if (Name == "YAML") {

1235 Current = skip_while(&Scanner::skip_ns_char, Current);

1237 T.Range = StringRef(Start, Current - Start);

1238 TokenQueue.push_back(T);

1239 return true;

1240 } else if(Name == "TAG") {

1241 Current = skip_while(&Scanner::skip_ns_char, Current);

1242 Current = skip_while(&Scanner::skip_s_white, Current);

1243 Current = skip_while(&Scanner::skip_ns_char, Current);

1245 T.Range = StringRef(Start, Current - Start);

1246 TokenQueue.push_back(T);

1247 return true;

1248 }

1249 return false;

1250}

1251

1252bool Scanner::scanDocumentIndicator(bool IsStart) {

1253 unrollIndent(-1);

1254 SimpleKeys.clear();

1255 IsSimpleKeyAllowed = false;

1256 IsAdjacentValueAllowedInFlow = false;

1257

1258 Token T;

1260 T.Range = StringRef(Current, 3);

1261 skip(3);

1262 TokenQueue.push_back(T);

1263 return true;

1264}

1265

1266bool Scanner::scanFlowCollectionStart(bool IsSequence) {

1267 Token T;

1270 T.Range = StringRef(Current, 1);

1271 skip(1);

1272 TokenQueue.push_back(T);

1273

1274

1275 saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);

1276

1277

1278 IsSimpleKeyAllowed = true;

1279

1280 IsAdjacentValueAllowedInFlow = false;

1281 ++FlowLevel;

1282 return true;

1283}

1284

1285bool Scanner::scanFlowCollectionEnd(bool IsSequence) {

1286 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);

1287 IsSimpleKeyAllowed = false;

1288 IsAdjacentValueAllowedInFlow = true;

1289 Token T;

1292 T.Range = StringRef(Current, 1);

1293 skip(1);

1294 TokenQueue.push_back(T);

1295 if (FlowLevel)

1296 --FlowLevel;

1297 return true;

1298}

1299

1300bool Scanner::scanFlowEntry() {

1301 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);

1302 IsSimpleKeyAllowed = true;

1303 IsAdjacentValueAllowedInFlow = false;

1304 Token T;

1306 T.Range = StringRef(Current, 1);

1307 skip(1);

1308 TokenQueue.push_back(T);

1309 return true;

1310}

1311

1312bool Scanner::scanBlockEntry() {

1314 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);

1315 IsSimpleKeyAllowed = true;

1316 IsAdjacentValueAllowedInFlow = false;

1317 Token T;

1319 T.Range = StringRef(Current, 1);

1320 skip(1);

1321 TokenQueue.push_back(T);

1322 return true;

1323}

1324

1325bool Scanner::scanKey() {

1326 if (!FlowLevel)

1328

1329 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);

1330 IsSimpleKeyAllowed = !FlowLevel;

1331 IsAdjacentValueAllowedInFlow = false;

1332

1333 Token T;

1335 T.Range = StringRef(Current, 1);

1336 skip(1);

1337 TokenQueue.push_back(T);

1338 return true;

1339}

1340

1341bool Scanner::scanValue() {

1342

1343

1344 if (!SimpleKeys.empty()) {

1345 SimpleKey SK = SimpleKeys.pop_back_val();

1346 Token T;

1348 T.Range = SK.Tok->Range;

1350 for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {

1351 if (i == SK.Tok)

1352 break;

1353 }

1354 if (i == e) {

1355 Failed = true;

1356 return false;

1357 }

1358 i = TokenQueue.insert(i, T);

1359

1360

1362

1363 IsSimpleKeyAllowed = false;

1364 } else {

1365 if (!FlowLevel)

1367 IsSimpleKeyAllowed = !FlowLevel;

1368 }

1369 IsAdjacentValueAllowedInFlow = false;

1370

1371 Token T;

1373 T.Range = StringRef(Current, 1);

1374 skip(1);

1375 TokenQueue.push_back(T);

1376 return true;

1377}

1378

1379

1380

1383

1384

1385

1390

1391

1392 while (I >= First && *I == '\\') --I;

1393

1394

1395 return (Position - 1 - I) % 2 == 1;

1396}

1397

1398bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {

1400 unsigned ColStart = Column;

1401 if (IsDoubleQuoted) {

1402 do {

1403 ++Current;

1404 while (Current != End && *Current != '"')

1405 ++Current;

1406

1407

1408 } while ( Current != End

1409 && *(Current - 1) == '\\'

1411 } else {

1412 skip(1);

1413 while (Current != End) {

1414

1415 if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {

1416 skip(2);

1417 continue;

1418 } else if (*Current == '\'')

1419 break;

1421 if (i == Current) {

1422 i = skip_b_break(Current);

1423 if (i == Current)

1424 break;

1425 Current = i;

1426 Column = 0;

1427 ++Line;

1428 } else {

1429 if (i == End)

1430 break;

1431 Current = i;

1432 ++Column;

1433 }

1434 }

1435 }

1436

1437 if (Current == End) {

1438 setError("Expected quote at end of scalar", Current);

1439 return false;

1440 }

1441

1442 skip(1);

1443 Token T;

1445 T.Range = StringRef(Start, Current - Start);

1446 TokenQueue.push_back(T);

1447

1448 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);

1449

1450 IsSimpleKeyAllowed = false;

1451 IsAdjacentValueAllowedInFlow = true;

1452

1453 return true;

1454}

1455

1456bool Scanner::scanPlainScalar() {

1458 unsigned ColStart = Column;

1459 unsigned LeadingBlanks = 0;

1460 assert(Indent >= -1 && "Indent must be >= -1 !");

1461 unsigned indent = static_cast<unsigned>(Indent + 1);

1462 while (Current != End) {

1463 if (*Current == '#')

1464 break;

1465

1466 while (Current != End &&

1467 ((*Current != ':' && isPlainSafeNonBlank(Current)) ||

1468 (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) {

1470 if (i == Current)

1471 break;

1472 Current = i;

1473 ++Column;

1474 }

1475

1476

1477 if (!isBlankOrBreak(Current))

1478 break;

1479

1480

1482 while (isBlankOrBreak(Tmp)) {

1484 if (i != Tmp) {

1485 if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {

1486 setError("Found invalid tab character in indentation", Tmp);

1487 return false;

1488 }

1489 Tmp = i;

1490 ++Column;

1491 } else {

1492 i = skip_b_break(Tmp);

1493 if (!LeadingBlanks)

1494 LeadingBlanks = 1;

1495 Tmp = i;

1496 Column = 0;

1497 ++Line;

1498 }

1499 }

1500

1501 if (!FlowLevel && Column < indent)

1502 break;

1503

1504 Current = Tmp;

1505 }

1506 if (Start == Current) {

1507 setError("Got empty plain scalar", Start);

1508 return false;

1509 }

1510 Token T;

1512 T.Range = StringRef(Start, Current - Start);

1513 TokenQueue.push_back(T);

1514

1515

1516 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);

1517

1518 IsSimpleKeyAllowed = false;

1519 IsAdjacentValueAllowedInFlow = false;

1520

1521 return true;

1522}

1523

1524bool Scanner::scanAliasOrAnchor(bool IsAlias) {

1526 unsigned ColStart = Column;

1527 skip(1);

1528 while (Current != End) {

1529 if ( *Current == '[' || *Current == ']'

1530 || *Current == '{' || *Current == '}'

1531 || *Current == ','

1532 || *Current == ':')

1533 break;

1535 if (i == Current)

1536 break;

1537 Current = i;

1538 ++Column;

1539 }

1540

1541 if (Start + 1 == Current) {

1542 setError("Got empty alias or anchor", Start);

1543 return false;

1544 }

1545

1546 Token T;

1548 T.Range = StringRef(Start, Current - Start);

1549 TokenQueue.push_back(T);

1550

1551

1552 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);

1553

1554 IsSimpleKeyAllowed = false;

1555 IsAdjacentValueAllowedInFlow = false;

1556

1557 return true;

1558}

1559

1560bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,

1561 char &ChompingIndicator,

1562 unsigned &IndentIndicator,

1563 bool &IsDone) {

1564 StyleIndicator = scanBlockStyleIndicator();

1565 if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))

1566 return false;

1567 return true;

1568}

1569

1570char Scanner::scanBlockStyleIndicator() {

1571 char Indicator = ' ';

1572 if (Current != End && (*Current == '>' || *Current == '|')) {

1573 Indicator = *Current;

1574 skip(1);

1575 }

1576 return Indicator;

1577}

1578

1579char Scanner::scanBlockChompingIndicator() {

1580 char Indicator = ' ';

1581 if (Current != End && (*Current == '+' || *Current == '-')) {

1582 Indicator = *Current;

1583 skip(1);

1584 }

1585 return Indicator;

1586}

1587

1588

1589

1590

1591

1593 unsigned LineBreaks, StringRef Str) {

1594 if (ChompingIndicator == '-')

1595 return 0;

1596 if (ChompingIndicator == '+')

1597 return LineBreaks;

1598

1599 return Str.empty() ? 0 : 1;

1600}

1601

1602unsigned Scanner::scanBlockIndentationIndicator() {

1603 unsigned Indent = 0;

1604 if (Current != End && (*Current >= '1' && *Current <= '9')) {

1605 Indent = unsigned(*Current - '0');

1606 skip(1);

1607 }

1608 return Indent;

1609}

1610

1611bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,

1612 unsigned &IndentIndicator, bool &IsDone) {

1613 auto Start = Current;

1614

1615 ChompingIndicator = scanBlockChompingIndicator();

1616 IndentIndicator = scanBlockIndentationIndicator();

1617

1618 if (ChompingIndicator == ' ')

1619 ChompingIndicator = scanBlockChompingIndicator();

1620 Current = skip_while(&Scanner::skip_s_white, Current);

1621 skipComment();

1622

1623 if (Current == End) {

1624 Token T;

1626 T.Range = StringRef(Start, Current - Start);

1627 TokenQueue.push_back(T);

1628 IsDone = true;

1629 return true;

1630 }

1631

1632 if (!consumeLineBreakIfPresent()) {

1633 setError("Expected a line break after block scalar header", Current);

1634 return false;

1635 }

1636 return true;

1637}

1638

1639bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,

1640 unsigned BlockExitIndent,

1641 unsigned &LineBreaks, bool &IsDone) {

1642 unsigned MaxAllSpaceLineCharacters = 0;

1644

1645 while (true) {

1646 advanceWhile(&Scanner::skip_s_space);

1647 if (skip_nb_char(Current) != Current) {

1648

1649 if (Column <= BlockExitIndent) {

1650 IsDone = true;

1651 return true;

1652 }

1653

1654 BlockIndent = Column;

1655 if (MaxAllSpaceLineCharacters > BlockIndent) {

1657 "Leading all-spaces line must be smaller than the block indent",

1658 LongestAllSpaceLine);

1659 return false;

1660 }

1661 return true;

1662 }

1663 if (skip_b_break(Current) != Current &&

1664 Column > MaxAllSpaceLineCharacters) {

1665

1666

1667 MaxAllSpaceLineCharacters = Column;

1668 LongestAllSpaceLine = Current;

1669 }

1670

1671

1672 if (Current == End) {

1673 IsDone = true;

1674 return true;

1675 }

1676

1677 if (!consumeLineBreakIfPresent()) {

1678 IsDone = true;

1679 return true;

1680 }

1681 ++LineBreaks;

1682 }

1683 return true;

1684}

1685

1686bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,

1687 unsigned BlockExitIndent, bool &IsDone) {

1688

1689 while (Column < BlockIndent) {

1690 auto I = skip_s_space(Current);

1691 if (I == Current)

1692 break;

1693 Current = I;

1694 ++Column;

1695 }

1696

1697 if (skip_nb_char(Current) == Current)

1698 return true;

1699

1700 if (Column <= BlockExitIndent) {

1701 IsDone = true;

1702 return true;

1703 }

1704

1705 if (Column < BlockIndent) {

1706 if (Current != End && *Current == '#') {

1707 IsDone = true;

1708 return true;

1709 }

1710 setError("A text line is less indented than the block scalar", Current);

1711 return false;

1712 }

1713 return true;

1714}

1715

1716bool Scanner::scanBlockScalar(bool IsLiteral) {

1717 assert(*Current == '|' || *Current == '>');

1718 char StyleIndicator;

1719 char ChompingIndicator;

1720 unsigned BlockIndent;

1721 bool IsDone = false;

1722 if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,

1723 IsDone))

1724 return false;

1725 if (IsDone)

1726 return true;

1727 bool IsFolded = StyleIndicator == '>';

1728

1729 const auto *Start = Current;

1730 unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;

1731 unsigned LineBreaks = 0;

1732 if (BlockIndent == 0) {

1733 if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,

1734 IsDone))

1735 return false;

1736 }

1737

1738

1739 SmallString<256> Str;

1740 while (!IsDone) {

1741 if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))

1742 return false;

1743 if (IsDone)

1744 break;

1745

1746

1747 auto LineStart = Current;

1748 advanceWhile(&Scanner::skip_nb_char);

1749 if (LineStart != Current) {

1750 if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {

1751

1752

1753

1754 if (LineBreaks == 1) {

1755 Str.append(LineBreaks,

1756 isLineEmpty(StringRef(LineStart, Current - LineStart))

1757 ? '\n'

1758 : ' ');

1759 }

1760

1761

1762

1763

1764 LineBreaks--;

1765 }

1766 Str.append(LineBreaks, '\n');

1767 Str.append(StringRef(LineStart, Current - LineStart));

1768 LineBreaks = 0;

1769 }

1770

1771

1772 if (Current == End)

1773 break;

1774

1775 if (!consumeLineBreakIfPresent())

1776 break;

1777 ++LineBreaks;

1778 }

1779

1780 if (Current == End && !LineBreaks)

1781

1782 LineBreaks = 1;

1784

1785

1786 if (!FlowLevel)

1787 IsSimpleKeyAllowed = true;

1788 IsAdjacentValueAllowedInFlow = false;

1789

1790 Token T;

1792 T.Range = StringRef(Start, Current - Start);

1793 T.Value = std::string(Str);

1794 TokenQueue.push_back(T);

1795 return true;

1796}

1797

1798bool Scanner::scanTag() {

1800 unsigned ColStart = Column;

1801 skip(1);

1802 if (Current == End || isBlankOrBreak(Current));

1803 else if (*Current == '<') {

1804 skip(1);

1805 scan_ns_uri_char();

1807 return false;

1808 } else {

1809

1810 Current = skip_while(&Scanner::skip_ns_char, Current);

1811 }

1812

1813 Token T;

1815 T.Range = StringRef(Start, Current - Start);

1816 TokenQueue.push_back(T);

1817

1818

1819 saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);

1820

1821 IsSimpleKeyAllowed = false;

1822 IsAdjacentValueAllowedInFlow = false;

1823

1824 return true;

1825}

1826

1827bool Scanner::fetchMoreTokens() {

1828 if (IsStartOfStream)

1829 return scanStreamStart();

1830

1831 scanToNextToken();

1832

1833 if (Current == End)

1834 return scanStreamEnd();

1835

1836 removeStaleSimpleKeyCandidates();

1837

1838 unrollIndent(Column);

1839

1840 if (Column == 0 && *Current == '%')

1841 return scanDirective();

1842

1843 if (Column == 0 && Current + 4 <= End

1844 && *Current == '-'

1845 && *(Current + 1) == '-'

1846 && *(Current + 2) == '-'

1847 && (Current + 3 == End || isBlankOrBreak(Current + 3)))

1848 return scanDocumentIndicator(true);

1849

1850 if (Column == 0 && Current + 4 <= End

1851 && *Current == '.'

1852 && *(Current + 1) == '.'

1853 && *(Current + 2) == '.'

1854 && (Current + 3 == End || isBlankOrBreak(Current + 3)))

1855 return scanDocumentIndicator(false);

1856

1857 if (*Current == '[')

1858 return scanFlowCollectionStart(true);

1859

1860 if (*Current == '{')

1861 return scanFlowCollectionStart(false);

1862

1863 if (*Current == ']')

1864 return scanFlowCollectionEnd(true);

1865

1866 if (*Current == '}')

1867 return scanFlowCollectionEnd(false);

1868

1869 if (*Current == ',')

1870 return scanFlowEntry();

1871

1872 if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))

1873 return scanBlockEntry();

1874

1875 if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))

1876 return scanKey();

1877

1878 if (*Current == ':' &&

1879 (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))

1880 return scanValue();

1881

1882 if (*Current == '*')

1883 return scanAliasOrAnchor(true);

1884

1885 if (*Current == '&')

1886 return scanAliasOrAnchor(false);

1887

1888 if (*Current == '!')

1889 return scanTag();

1890

1891 if (*Current == '|' && !FlowLevel)

1892 return scanBlockScalar(true);

1893

1894 if (*Current == '>' && !FlowLevel)

1895 return scanBlockScalar(false);

1896

1897 if (*Current == '\'')

1898 return scanFlowScalar(false);

1899

1900 if (*Current == '"')

1901 return scanFlowScalar(true);

1902

1903

1904 StringRef FirstChar(Current, 1);

1905 if ((!isBlankOrBreak(Current) &&

1906 FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) ||

1908 isPlainSafeNonBlank(Current + 1)))

1909 return scanPlainScalar();

1910

1911 setError("Unrecognized character while tokenizing.", Current);

1912 return false;

1913}

1914

1916 std::error_code *EC)

1917 : scanner(new Scanner(Input, SM, ShowColors, EC)) {}

1918

1920 std::error_code *EC)

1921 : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}

1922

1924

1926

1930

1933 scanner->printError(Range.Start, Kind, Msg, Range);

1934}

1935

1937 if (CurrentDoc)

1939

1940

1941 scanner->getNext();

1942

1943 CurrentDoc.reset(new Document(*this));

1945}

1946

1950

1955

1958 : Doc(D), TypeID(Type), Anchor(A), Tag(T) {

1960 SourceRange = SMRange(Start, Start);

1961}

1962

1965 if (!Raw.empty() && Raw != "!") {

1966 std::string Ret;

1968 Ret = std::string(Doc->getTagMap().find("!")->second);

1969 Ret += Raw.substr(1);

1970 return Ret;

1972 Ret = std::string(Doc->getTagMap().find("!!")->second);

1973 Ret += Raw.substr(2);

1974 return Ret;

1975 } else {

1977 std::map<StringRef, StringRef>::const_iterator It =

1978 Doc->getTagMap().find(TagHandle);

1979 if (It != Doc->getTagMap().end())

1980 Ret = std::string(It->second);

1981 else {

1984 T.Range = TagHandle;

1985 setError(Twine("Unknown tag handle ") + TagHandle, T);

1986 }

1988 return Ret;

1989 }

1990 }

1991

1994 return "tag:yaml.org,2002:null";

1997

1998 return "tag:yaml.org,2002:str";

2000 return "tag:yaml.org,2002:map";

2002 return "tag:yaml.org,2002:seq";

2003 }

2004

2005 return "";

2006}

2007

2009 return Doc->peekNext();

2010}

2011

2013 return Doc->getNext();

2014}

2015

2017 return Doc->parseBlockNode();

2018}

2019

2021 return Doc->NodeAllocator;

2022}

2023

2025 Doc->setError(Msg, Tok);

2026}

2027

2029 return Doc->failed();

2030}

2031

2033 if (Value[0] == '"')

2034 return getDoubleQuotedValue(Value, Storage);

2035 if (Value[0] == '\'')

2036 return getSingleQuotedValue(Value, Storage);

2037 return getPlainValue(Value, Storage);

2038}

2039

2040

2041

2042

2043

2044

2045

2046

2047

2048

2049

2050

2051

2052

2053

2058 UnescapeCallback) {

2059 size_t I = UnquotedValue.find_first_of(LookupChars);

2061 return UnquotedValue;

2062

2063 Storage.clear();

2065 char LastNewLineAddedAs = '\0';

2067 if (UnquotedValue[I] != '\r' && UnquotedValue[I] != '\n') {

2069 UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage);

2070 LastNewLineAddedAs = '\0';

2071 continue;

2072 }

2073 if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(" \t", I);

2077 LastNewLineAddedAs = ' ';

2078 } else {

2079

2080

2081

2082

2083 switch (LastNewLineAddedAs) {

2084 case ' ':

2086 Storage.back() = '\n';

2087 LastNewLineAddedAs = '\n';

2088 break;

2089 case '\n':

2092 break;

2093 default:

2095 LastNewLineAddedAs = ' ';

2096 break;

2097 }

2098 }

2099

2100 if (UnquotedValue.substr(I, 2) == "\r\n")

2101 I++;

2102 UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim(" \t");

2103 }

2106}

2107

2108StringRef

2109ScalarNode::getDoubleQuotedValue(StringRef RawValue,

2110 SmallVectorImpl &Storage) const {

2111 assert(RawValue.size() >= 2 && RawValue.front() == '"' &&

2112 RawValue.back() == '"');

2113 StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);

2114

2115 auto UnescapeFunc = [this](StringRef UnquotedValue,

2116 SmallVectorImpl &Storage) {

2118 if (UnquotedValue.size() == 1) {

2119 Token T;

2120 T.Range = UnquotedValue;

2121 setError("Unrecognized escape code", T);

2122 Storage.clear();

2123 return StringRef();

2124 }

2125 UnquotedValue = UnquotedValue.drop_front(1);

2126 switch (UnquotedValue[0]) {

2127 default: {

2128 Token T;

2130 setError("Unrecognized escape code", T);

2131 Storage.clear();

2132 return StringRef();

2133 }

2134 case '\r':

2135

2136 if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\n')

2137 UnquotedValue = UnquotedValue.drop_front(1);

2138 [[fallthrough]];

2139 case '\n':

2141 case '0':

2143 break;

2144 case 'a':

2146 break;

2147 case 'b':

2149 break;

2150 case 't':

2151 case 0x09:

2153 break;

2154 case 'n':

2156 break;

2157 case 'v':

2159 break;

2160 case 'f':

2162 break;

2163 case 'r':

2165 break;

2166 case 'e':

2168 break;

2169 case ' ':

2171 break;

2172 case '"':

2174 break;

2175 case '/':

2177 break;

2178 case '\\':

2180 break;

2181 case 'N':

2183 break;

2184 case '_':

2186 break;

2187 case 'L':

2189 break;

2190 case 'P':

2192 break;

2193 case 'x': {

2194 if (UnquotedValue.size() < 3)

2195

2196 break;

2197 unsigned int UnicodeScalarValue;

2198 if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))

2199

2200 UnicodeScalarValue = 0xFFFD;

2201 encodeUTF8(UnicodeScalarValue, Storage);

2203 }

2204 case 'u': {

2205 if (UnquotedValue.size() < 5)

2206

2207 break;

2208 unsigned int UnicodeScalarValue;

2209 if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))

2210

2211 UnicodeScalarValue = 0xFFFD;

2212 encodeUTF8(UnicodeScalarValue, Storage);

2214 }

2215 case 'U': {

2216 if (UnquotedValue.size() < 9)

2217

2218 break;

2219 unsigned int UnicodeScalarValue;

2220 if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))

2221

2222 UnicodeScalarValue = 0xFFFD;

2223 encodeUTF8(UnicodeScalarValue, Storage);

2225 }

2226 }

2228 };

2229

2230 return parseScalarValue(UnquotedValue, Storage, "\\\r\n", UnescapeFunc);

2231}

2232

2233StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,

2234 SmallVectorImpl &Storage) {

2235 assert(RawValue.size() >= 2 && RawValue.front() == '\'' &&

2236 RawValue.back() == '\'');

2237 StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);

2238

2239 auto UnescapeFunc = [](StringRef UnquotedValue,

2240 SmallVectorImpl &Storage) {

2244 };

2245

2246 return parseScalarValue(UnquotedValue, Storage, "'\r\n", UnescapeFunc);

2247}

2248

2249StringRef ScalarNode::getPlainValue(StringRef RawValue,

2250 SmallVectorImpl &Storage) {

2251

2252

2253

2254 RawValue = RawValue.rtrim("\r\n \t");

2255 return parseScalarValue(RawValue, Storage, "\r\n", nullptr);

2256}

2257

2259 if (Key)

2260 return Key;

2261

2262 {

2268 }

2270 getNext();

2271 }

2272

2273

2277 }

2278

2279

2281}

2282

2284 if (Value)

2285 return Value;

2286

2288 Key->skip();

2289 else {

2292 }

2293

2296

2297

2298 {

2306 }

2307

2309 setError("Unexpected token in Key Value.", t);

2311 }

2312 getNext();

2313 }

2314

2315

2319 }

2320

2321

2323}

2324

2325void MappingNode::increment() {

2327 IsAtEnd = true;

2328 CurrentEntry = nullptr;

2329 return;

2330 }

2331 if (CurrentEntry) {

2332 CurrentEntry->skip();

2334 IsAtEnd = true;

2335 CurrentEntry = nullptr;

2336 return;

2337 }

2338 }

2341

2343 } else if (Type == MT_Block) {

2344 switch (T.Kind) {

2347 IsAtEnd = true;

2348 CurrentEntry = nullptr;

2349 break;

2350 default:

2351 setError("Unexpected token. Expected Key or Block End", T);

2352 [[fallthrough]];

2354 IsAtEnd = true;

2355 CurrentEntry = nullptr;

2356 }

2357 } else {

2358 switch (T.Kind) {

2360

2362 return increment();

2365 [[fallthrough]];

2367

2368 IsAtEnd = true;

2369 CurrentEntry = nullptr;

2370 break;

2371 default:

2372 setError( "Unexpected token. Expected Key, Flow Entry, or Flow "

2373 "Mapping End."

2374 , T);

2375 IsAtEnd = true;

2376 CurrentEntry = nullptr;

2377 }

2378 }

2379}

2380

2383 IsAtEnd = true;

2384 CurrentEntry = nullptr;

2385 return;

2386 }

2387 if (CurrentEntry)

2388 CurrentEntry->skip();

2391 switch (T.Kind) {

2395 if (!CurrentEntry) {

2396 IsAtEnd = true;

2397 CurrentEntry = nullptr;

2398 }

2399 break;

2402 IsAtEnd = true;

2403 CurrentEntry = nullptr;

2404 break;

2405 default:

2406 setError( "Unexpected token. Expected Block Entry or Block End."

2407 , T);

2408 [[fallthrough]];

2410 IsAtEnd = true;

2411 CurrentEntry = nullptr;

2412 }

2414 switch (T.Kind) {

2418 if (!CurrentEntry) {

2419 IsAtEnd = true;

2420 CurrentEntry = nullptr;

2421 }

2422 break;

2423 default:

2425 IsAtEnd = true;

2426 CurrentEntry = nullptr;

2427 }

2428 } else if (SeqType == ST_Flow) {

2429 switch (T.Kind) {

2431

2433 WasPreviousTokenFlowEntry = true;

2437 [[fallthrough]];

2439

2440 IsAtEnd = true;

2441 CurrentEntry = nullptr;

2442 break;

2446 setError("Could not find closing ]!", T);

2447

2448 IsAtEnd = true;

2449 CurrentEntry = nullptr;

2450 break;

2451 default:

2452 if (!WasPreviousTokenFlowEntry) {

2453 setError("Expected , between entries!", T);

2454 IsAtEnd = true;

2455 CurrentEntry = nullptr;

2456 break;

2457 }

2458

2460 if (!CurrentEntry) {

2461 IsAtEnd = true;

2462 }

2463 WasPreviousTokenFlowEntry = false;

2464 break;

2465 }

2466 }

2467}

2468

2470

2471 TagMap["!"] = "!";

2472 TagMap["!!"] = "tag:yaml.org,2002:";

2473

2474 if (parseDirectives())

2476 Token &T = peekNext();

2478 getNext();

2479}

2480

2482 if (stream.scanner->failed())

2483 return false;

2484 if (!Root && getRoot())

2485 return false;

2486 Root->skip();

2487 Token &T = peekNext();

2489 return false;

2491 getNext();

2492 return skip();

2493 }

2494 return true;

2495}

2496

2497Token &Document::peekNext() {

2498 return stream.scanner->peekNext();

2499}

2500

2501Token Document::getNext() {

2502 return stream.scanner->getNext();

2503}

2504

2505void Document::setError(const Twine &Message, Token &Location) const {

2506 stream.scanner->setError(Message, Location.Range.begin());

2507}

2508

2509bool Document::failed() const {

2510 return stream.scanner->failed();

2511}

2512

2514 Token T = peekNext();

2515

2516 Token AnchorInfo;

2518parse_property:

2519 switch (T.Kind) {

2521 getNext();

2522 return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));

2525 setError("Already encountered an anchor for this node!", T);

2526 return nullptr;

2527 }

2528 AnchorInfo = getNext();

2529 T = peekNext();

2530 goto parse_property;

2533 setError("Already encountered a tag for this node!", T);

2534 return nullptr;

2535 }

2536 TagInfo = getNext();

2537 T = peekNext();

2538 goto parse_property;

2539 default:

2540 break;

2541 }

2542

2543 switch (T.Kind) {

2545

2546

2547

2548 return new (NodeAllocator) SequenceNode( stream.CurrentDoc

2553 getNext();

2554 return new (NodeAllocator)

2560 getNext();

2561 return new (NodeAllocator)

2567 getNext();

2568 return new (NodeAllocator)

2574 getNext();

2575 return new (NodeAllocator)

2581 getNext();

2582 return new (NodeAllocator)

2586 , T.Range);

2588 getNext();

2589 StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);

2591 return new (NodeAllocator)

2593 TagInfo.Range, StrCopy, T.Range);

2594 }

2596

2597 return new (NodeAllocator)

2605 default:

2606

2607

2608 return new (NodeAllocator) NullNode(stream.CurrentDoc);

2613 return new (NodeAllocator) NullNode(stream.CurrentDoc);

2614

2615 setError("Unexpected token", T);

2616 return nullptr;

2617 }

2619 return nullptr;

2620 }

2622 return nullptr;

2623}

2624

2625bool Document::parseDirectives() {

2626 bool isDirective = false;

2627 while (true) {

2628 Token T = peekNext();

2630 parseTAGDirective();

2631 isDirective = true;

2633 parseYAMLDirective();

2634 isDirective = true;

2635 } else {

2636 break;

2637 }

2638 }

2639 return isDirective;

2640}

2641

2642void Document::parseYAMLDirective() {

2643 getNext();

2644}

2645

2646void Document::parseTAGDirective() {

2647 Token Tag = getNext();

2648 StringRef T = Tag.Range;

2649

2650 T = T.substr(T.find_first_of(" \t")).ltrim(" \t");

2651 std::size_t HandleEnd = T.find_first_of(" \t");

2652 StringRef TagHandle = T.substr(0, HandleEnd);

2653 StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");

2654 TagMap[TagHandle] = TagPrefix;

2655}

2656

2657bool Document::expectToken(int TK) {

2658 Token T = getNext();

2659 if (T.Kind != TK) {

2660 setError("Unexpected token", T);

2661 return false;

2662 }

2663 return true;

2664}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

#define LLVM_ATTRIBUTE_NOINLINE

LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...

static Cursor skipComment(Cursor C)

Skip a line comment and return the updated cursor.

ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

static bool encodeUTF8(size_t CodePoint, char *Output)

This file defines the SmallString class.

This file defines the SmallVector class.

static EncodingInfo getUnicodeEncoding(StringRef Input)

getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode encoding form of Input.

Definition YAMLParser.cpp:60

static bool is_ns_hex_digit(const char C)

Definition YAMLParser.cpp:1007

static bool is_ns_word_char(const char C)

Definition YAMLParser.cpp:1009

static unsigned getChompedLineBreaks(char ChompingIndicator, unsigned LineBreaks, StringRef Str)

Get the number of line breaks after chomping.

Definition YAMLParser.cpp:1592

std::pair< uint32_t, unsigned > UTF8Decoded

The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...

Definition YAMLParser.cpp:194

UnicodeEncodingForm

Definition YAMLParser.cpp:41

@ UEF_UTF32_LE

UTF-32 Little Endian.

Definition YAMLParser.cpp:42

@ UEF_UTF16_BE

UTF-16 Big Endian.

Definition YAMLParser.cpp:45

@ UEF_UTF16_LE

UTF-16 Little Endian.

Definition YAMLParser.cpp:44

@ UEF_UTF32_BE

UTF-32 Big Endian.

Definition YAMLParser.cpp:43

@ UEF_UTF8

UTF-8 or ascii.

Definition YAMLParser.cpp:46

@ UEF_Unknown

Not a valid Unicode encoding.

Definition YAMLParser.cpp:47

static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)

Definition YAMLParser.cpp:1386

static StringRef parseScalarValue(StringRef UnquotedValue, SmallVectorImpl< char > &Storage, StringRef LookupChars, std::function< StringRef(StringRef, SmallVectorImpl< char > &)> UnescapeCallback)

parseScalarValue - A common parsing routine for all flow scalar styles.

Definition YAMLParser.cpp:2055

std::pair< UnicodeEncodingForm, unsigned > EncodingInfo

EncodingInfo - Holds the encoding type and length of the byte order mark if it exists.

Definition YAMLParser.cpp:52

BumpPtrList< Token > TokenQueueT

Definition YAMLParser.cpp:164

static UTF8Decoded decodeUTF8(StringRef Range)

Definition YAMLParser.cpp:196

IteratorImpl< T, typename list_type::iterator > iterator

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Tagged union holding either a T or a Error.

const char * getBufferStart() const

const char * getBufferEnd() const

static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)

Open the specified memory range as a MemoryBuffer.

Represents a location in source code.

static SMLoc getFromPointer(const char *Ptr)

Represents a range in source code.

SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...

This class consists of common code factored out of the SmallVector class to reduce code duplication b...

void reserve(size_type N)

typename SuperClass::iterator iterator

void push_back(const T &Elt)

This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.

This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.

LLVM_ABI void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges={}, ArrayRef< SMFixIt > FixIts={}, bool ShowColors=true) const

Emit a message about the specified location with the specified string.

unsigned AddNewSourceBuffer(std::unique_ptr< MemoryBuffer > F, SMLoc IncludeLoc)

Add a new source buffer to this source manager.

StringRef - Represent a constant reference to a string, i.e.

LLVM_ABI size_t find_last_not_of(char C, size_t From=npos) const

Find the last character in the string that is not C, or npos if not found.

static constexpr size_t npos

bool getAsInteger(unsigned Radix, T &Result) const

Parse the current string as an integer of the specified radix.

constexpr StringRef substr(size_t Start, size_t N=npos) const

Return a reference to the substring from [Start, Start + N).

bool starts_with(StringRef Prefix) const

Check if this string starts with the given Prefix.

constexpr bool empty() const

empty - Check if the string is empty.

StringRef drop_front(size_t N=1) const

Return a StringRef equal to 'this' but with the first N elements dropped.

char back() const

back - Get the last character in the string.

constexpr size_t size() const

size - Get the string size.

char front() const

front - Get the first character in the string.

size_t find_last_of(char C, size_t From=npos) const

Find the last character in the string that is C, or npos if not found.

StringRef ltrim(char Char) const

Return string with consecutive Char characters starting from the the left removed.

size_t find_first_of(char C, size_t From=0) const

Find the first character in the string that is C, or npos if not found.

StringRef rtrim(char Char) const

Return string with consecutive Char characters starting from the right removed.

StringRef take_front(size_t N=1) const

Return a StringRef equal to 'this' but with only the first N elements remaining.

StringRef copy(Allocator &A) const

StringRef drop_back(size_t N=1) const

Return a StringRef equal to 'this' but with the last N elements dropped.

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

The instances of the Type class are immutable: once they are created, they are never changed.

This class implements an extremely fast bulk output stream that can only output to a stream.

Represents an alias to a Node with an anchor.

A block scalar node is an opaque datum that can be presented as a series of zero or more Unicode scal...

LLVM_ABI Node * parseBlockNode()

Root for parsing a node. Returns a single node.

Definition YAMLParser.cpp:2513

LLVM_ABI bool skip()

Finish parsing the current document and return true if there are more.

Definition YAMLParser.cpp:2481

Node * getRoot()

Parse and return the root level node.

LLVM_ABI Document(Stream &ParentStream)

Definition YAMLParser.cpp:2469

The Input class is used to parse a yaml document into in-memory structs and vectors.

Node * getValue()

Parse and return the value.

Definition YAMLParser.cpp:2283

Node * getKey()

Parse and return the key.

Definition YAMLParser.cpp:2258

Represents a YAML map created from either a block map for a flow map.

@ MT_Inline

An inline mapping node is used for "[key: value]".

Abstract base class for all Nodes.

StringRef getRawTag() const

Get the tag as it was written in the document.

unsigned int getType() const

std::string getVerbatimTag() const

Get the verbatium tag for a given Node.

Definition YAMLParser.cpp:1963

bool failed() const

Definition YAMLParser.cpp:2028

std::unique_ptr< Document > & Doc

Node(unsigned int Type, std::unique_ptr< Document > &, StringRef Anchor, StringRef Tag)

BumpPtrAllocator & getAllocator()

Definition YAMLParser.cpp:2020

Node * parseBlockNode()

Definition YAMLParser.cpp:2016

void setError(const Twine &Message, Token &Location) const

Definition YAMLParser.cpp:2024

Token getNext()

Definition YAMLParser.cpp:2012

Token & peekNext()

Definition YAMLParser.cpp:2008

A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...

StringRef getValue(SmallVectorImpl< char > &Storage) const

Gets the value of this node as a StringRef.

Definition YAMLParser.cpp:2032

Scans YAML tokens from a MemoryBuffer.

Definition YAMLParser.cpp:247

Scanner(StringRef Input, SourceMgr &SM, bool ShowColors=true, std::error_code *EC=nullptr)

Definition YAMLParser.cpp:857

void setError(const Twine &Message, StringRef::iterator Position)

Definition YAMLParser.cpp:265

Token getNext()

Parse the next token and pop it from the queue.

Definition YAMLParser.cpp:913

bool failed()

Returns true if an error occurred while parsing.

Definition YAMLParser.cpp:281

void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges={})

Definition YAMLParser.cpp:260

Token & peekNext()

Parse the next token and return it without popping it.

Definition YAMLParser.cpp:886

Represents a YAML sequence created from either a block sequence for a flow sequence.

void increment()

Definition YAMLParser.cpp:2381

This class represents a YAML stream potentially containing multiple documents.

LLVM_ABI document_iterator end()

Definition YAMLParser.cpp:1947

LLVM_ABI document_iterator begin()

Definition YAMLParser.cpp:1936

LLVM_ABI Stream(StringRef Input, SourceMgr &, bool ShowColors=true, std::error_code *EC=nullptr)

This keeps a reference to the string referenced by Input.

Definition YAMLParser.cpp:1915

LLVM_ABI bool failed()

Definition YAMLParser.cpp:1925

LLVM_ABI void printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind=SourceMgr::DK_Error)

Definition YAMLParser.cpp:1927

LLVM_ABI void skip()

Definition YAMLParser.cpp:1951

Iterator abstraction for Documents over a Stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

NodeAddr< FuncNode * > Func

LLVM_ABI bool isPrintable(int UCS)

Determines if a character is likely to be displayed correctly on the terminal.

LLVM_ABI bool dumpTokens(StringRef Input, raw_ostream &)

Dump all the tokens in this stream to OS.

Definition YAMLParser.cpp:607

LLVM_ABI std::optional< bool > parseBool(StringRef S)

Parse S as a bool according to https://yaml.org/type/bool.html.

Definition YAMLParser.cpp:771

LLVM_ABI bool scanTokens(StringRef Input)

Scans all tokens in input without outputting anything.

Definition YAMLParser.cpp:691

void skip(CollectionType &C)

LLVM_ABI std::string escape(StringRef Input, bool EscapePrintable=true)

Escape Input for a double quoted scalar; if EscapePrintable is true, all UTF8 sequences will be escap...

Definition YAMLParser.cpp:704

This is an optimization pass for GlobalISel generic memory operations.

std::error_code make_error_code(BitcodeError E)

testing::Matcher< const detail::ErrorHolder & > Failed()

void append_range(Container &C, Range &&R)

Wrapper function to append range R to container C.

std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)

bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)

bool isAlpha(char C)

Checks if character C is a valid letter as classified by "C" locale.

LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)

bool isAlnum(char C)

Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...

bool isa(const From &Val)

isa - Return true if the parameter to the template is an instance of one of the template type argu...

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

FunctionAddr VTableAddr Next

bool is_contained(R &&Range, const E &Element)

Returns true if Element is found in Range.

BumpPtrAllocatorImpl<> BumpPtrAllocator

The standard BumpPtrAllocator which just uses the default template parameters.

AllocatorList< T, BumpPtrAllocator > BumpPtrList

Token - A single YAML token.

Definition YAMLParser.cpp:124

TokenKind

Definition YAMLParser.cpp:125

@ TK_StreamEnd

Definition YAMLParser.cpp:128

@ TK_BlockMappingStart

Definition YAMLParser.cpp:136

@ TK_BlockSequenceStart

Definition YAMLParser.cpp:135

@ TK_FlowEntry

Definition YAMLParser.cpp:137

@ TK_DocumentStart

Definition YAMLParser.cpp:131

@ TK_Value

Definition YAMLParser.cpp:143

@ TK_Tag

Definition YAMLParser.cpp:148

@ TK_VersionDirective

Definition YAMLParser.cpp:129

@ TK_StreamStart

Definition YAMLParser.cpp:127

@ TK_Key

Definition YAMLParser.cpp:142

@ TK_Error

Definition YAMLParser.cpp:126

@ TK_BlockScalar

Definition YAMLParser.cpp:145

@ TK_BlockEntry

Definition YAMLParser.cpp:133

@ TK_BlockEnd

Definition YAMLParser.cpp:134

@ TK_Anchor

Definition YAMLParser.cpp:147

@ TK_Scalar

Definition YAMLParser.cpp:144

@ TK_DocumentEnd

Definition YAMLParser.cpp:132

@ TK_FlowSequenceStart

Definition YAMLParser.cpp:138

@ TK_TagDirective

Definition YAMLParser.cpp:130

@ TK_FlowSequenceEnd

Definition YAMLParser.cpp:139

@ TK_FlowMappingEnd

Definition YAMLParser.cpp:141

@ TK_FlowMappingStart

Definition YAMLParser.cpp:140

@ TK_Alias

Definition YAMLParser.cpp:146

enum llvm::yaml::Token::TokenKind Kind

std::string Value

The value of a block scalar node.

Definition YAMLParser.cpp:156

StringRef Range

A string of length 0 or more whose begin() points to the logical location of the token in the input.

Definition YAMLParser.cpp:153