LLVM: lib/TableGen/TGLexer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

18#include "llvm/Config/config.h"

23#include

24#include

25#include

26#include

27

28using namespace llvm;

29

30namespace {

31

32

33struct PreprocessorDir {

35 StringRef Word;

36};

37}

38

39

40

41

42

43

49

55

56

57

60

61

62 const char *Next = Str.begin();

65

67

68

69 const char *End = Str.end();

73}

74

76 CurBuffer = SrcMgr.getMainFileID();

77 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();

78 CurPtr = CurBuf.begin();

79 TokStart = nullptr;

80

81

82 PrepIncludeStack.emplace_back();

83

84

85

86 for (StringRef MacroName : Macros) {

88 if (End != MacroName.end())

90 "` specified on command line");

91

92 DefinedMacros.insert(MacroName);

93 }

94}

95

97

101

102

103

107}

108

111}

112

113bool TGLexer::processEOF() {

114 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);

115 if (ParentIncludeLoc != SMLoc()) {

116

117

118

119

120 if (!prepExitInclude(false))

121 return false;

122

123 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);

124 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();

125 CurPtr = ParentIncludeLoc.getPointer();

126

127

128

129 TokStart = CurPtr;

130 return true;

131 }

132

133

134

135

136 prepExitInclude(true);

137 return false;

138}

139

140int TGLexer::getNextChar() {

141 char CurChar = *CurPtr++;

142 switch (CurChar) {

143 default:

144 return (unsigned char)CurChar;

145

146 case 0: {

147

148

149 if (CurPtr - 1 == CurBuf.end()) {

150 --CurPtr;

151 return EOF;

152 }

154 "NUL character is invalid in source; treated as space");

155 return ' ';

156 }

157

158 case '\n':

159 case '\r':

160

161

162

163 if ((*CurPtr == '\n' || (*CurPtr == '\r')) && *CurPtr != CurChar)

164 ++CurPtr;

165 return '\n';

166 }

167}

168

169int TGLexer::peekNextChar(int Index) const { return *(CurPtr + Index); }

170

171tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {

172 while (true) {

173 TokStart = CurPtr;

174

175 int CurChar = getNextChar();

176

177 switch (CurChar) {

178 default:

179

181 return LexIdentifier();

182

183

184 return ReturnError(TokStart, "unexpected character");

185 case EOF:

186

187 if (processEOF()) {

188

189

190 FileOrLineStart = false;

191 break;

192 }

193

194

196

197 case ':':

199 case ';':

201 case ',':

203 case '<':

205 case '>':

207 case ']':

209 case '{':

211 case '}':

213 case '(':

215 case ')':

217 case '=':

219 case '?':

221 case '#':

222 if (FileOrLineStart) {

225 return lexPreprocessor(Kind);

226 }

227

229

230

231

232 case '.':

233 if (peekNextChar(0) == '.') {

234 ++CurPtr;

235 if (peekNextChar(0) == '.') {

236 ++CurPtr;

238 }

239 return ReturnError(TokStart, "invalid '..' punctuation");

240 }

242

243 case '\r':

245

246 case ' ':

247 case '\t':

248

249 break;

250 case '\n':

251

252 FileOrLineStart = true;

253 break;

254 case '/':

255

256

257 if (*CurPtr == '/')

258 SkipBCPLComment();

259 else if (*CurPtr == '*') {

260 if (SkipCComment())

262 } else

263 return ReturnError(TokStart, "unexpected character");

264 break;

265 case '-':

266 case '+':

267 case '0':

268 case '1':

269 case '2':

270 case '3':

271 case '4':

272 case '5':

273 case '6':

274 case '7':

275 case '8':

276 case '9': {

277 int NextChar = 0;

279

280

281

282 int i = 0;

283 do {

284 NextChar = peekNextChar(i++);

285 } while (isDigit(NextChar));

286

287 if (NextChar == 'x' || NextChar == 'b') {

288

289

290 int NextNextChar = peekNextChar(i);

291 switch (NextNextChar) {

292 default:

293 break;

294 case '0':

295 case '1':

296 if (NextChar == 'b')

297 return LexNumber();

298 [[fallthrough]];

299 case '2':

300 case '3':

301 case '4':

302 case '5':

303 case '6':

304 case '7':

305 case '8':

306 case '9':

307 case 'a':

308 case 'b':

309 case 'c':

310 case 'd':

311 case 'e':

312 case 'f':

313 case 'A':

314 case 'B':

315 case 'C':

316 case 'D':

317 case 'E':

318 case 'F':

319 if (NextChar == 'x')

320 return LexNumber();

321 break;

322 }

323 }

324 }

325

327 return LexIdentifier();

328

329 return LexNumber();

330 }

331 case '"':

332 return LexString();

333 case '$':

334 return LexVarName();

335 case '[':

336 return LexBracket();

337 case '!':

338 return LexExclaim();

339 }

340 }

341}

342

343

345 const char *StrStart = CurPtr;

346

347 CurStrVal = "";

348

349 while (*CurPtr != '"') {

350

351 if (*CurPtr == 0 && CurPtr == CurBuf.end())

352 return ReturnError(StrStart, "end of file in string literal");

353

354 if (*CurPtr == '\n' || *CurPtr == '\r')

355 return ReturnError(StrStart, "end of line in string literal");

356

357 if (*CurPtr != '\\') {

358 CurStrVal += *CurPtr++;

359 continue;

360 }

361

362 ++CurPtr;

363

364 switch (*CurPtr) {

365 case '\\':

366 case '\'':

367 case '"':

368

369 CurStrVal += *CurPtr++;

370 break;

371 case 't':

372 CurStrVal += '\t';

373 ++CurPtr;

374 break;

375 case 'n':

376 CurStrVal += '\n';

377 ++CurPtr;

378 break;

379

380 case '\n':

381 case '\r':

382 return ReturnError(CurPtr, "escaped newlines not supported in tblgen");

383

384

385 case '\0':

386 if (CurPtr == CurBuf.end())

387 return ReturnError(StrStart, "end of file in string literal");

388 [[fallthrough]];

389 default:

390 return ReturnError(CurPtr, "invalid escape in string literal");

391 }

392 }

393

394 ++CurPtr;

396}

397

400 return ReturnError(TokStart, "invalid variable name");

401

402

403 const char *VarNameStart = CurPtr++;

404

406 ++CurPtr;

407

408 CurStrVal.assign(VarNameStart, CurPtr);

410}

411

413

414 const char *IdentStart = TokStart;

415

416

418 ++CurPtr;

419

420

421 StringRef Str(IdentStart, CurPtr - IdentStart);

422

451

452

453 switch (Kind) {

455 if (LexInclude())

457 return Lex();

459 CurStrVal.assign(Str.begin(), Str.end());

460 break;

461 default:

462 break;

463 }

464

466}

467

468

469

470bool TGLexer::LexInclude() {

471

474 return true;

477 return true;

478 }

479

480

481 std::string Filename = CurStrVal;

482 std::string IncludedFile;

483

485 IncludedFile);

486 if (!CurBuffer) {

487 PrintError(getLoc(), "could not find include file '" + Filename + "'");

488 return true;

489 }

490

491 Dependencies.insert(IncludedFile);

492

493 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();

494 CurPtr = CurBuf.begin();

495

496 PrepIncludeStack.emplace_back();

497 return false;

498}

499

500

501

502void TGLexer::SkipBCPLComment() {

503 ++CurPtr;

504 auto EOLPos = CurBuf.find_first_of("\r\n", CurPtr - CurBuf.data());

505 CurPtr = (EOLPos == StringRef::npos) ? CurBuf.end() : CurBuf.data() + EOLPos;

506}

507

508

509

510bool TGLexer::SkipCComment() {

511 ++CurPtr;

512 unsigned CommentDepth = 1;

513

514 while (true) {

515 int CurChar = getNextChar();

516 switch (CurChar) {

517 case EOF:

518 PrintError(TokStart, "unterminated comment");

519 return true;

520 case '*':

521

522 if (CurPtr[0] != '/')

523 break;

524

525 ++CurPtr;

526 if (--CommentDepth == 0)

527 return false;

528 break;

529 case '/':

530

531 if (CurPtr[0] != '*')

532 break;

533 ++CurPtr;

534 ++CommentDepth;

535 break;

536 }

537 }

538}

539

540

541

542

543

545 unsigned Base = 0;

546 const char *NumStart;

547

548

549 if (CurPtr[-1] == '0') {

550 NumStart = CurPtr + 1;

551 if (CurPtr[0] == 'x') {

553 do

554 ++CurPtr;

556 } else if (CurPtr[0] == 'b') {

558 do

559 ++CurPtr;

560 while (CurPtr[0] == '0' || CurPtr[0] == '1');

561 }

562 }

563

564

565 bool IsMinus = false;

566

567

568 if (Base == 0) {

569

570 if (isDigit(CurPtr[0])) {

571 if (CurPtr[-1] == '-')

573 else if (CurPtr[-1] == '+')

575 }

576

578 NumStart = TokStart;

579 IsMinus = CurPtr[-1] == '-';

580

581 while (isDigit(CurPtr[0]))

582 ++CurPtr;

583 }

584

585

586 if (CurPtr == NumStart)

587 return ReturnError(TokStart, "invalid number");

588

589 errno = 0;

590 if (IsMinus)

591 CurIntVal = strtoll(NumStart, nullptr, Base);

592 else

593 CurIntVal = strtoull(NumStart, nullptr, Base);

594

595 if (errno == EINVAL)

596 return ReturnError(TokStart, "invalid number");

597 if (errno == ERANGE)

598 return ReturnError(TokStart, "number out of range");

599

601}

602

603

604

606 if (CurPtr[0] != '{')

608 ++CurPtr;

609 const char *CodeStart = CurPtr;

610 while (true) {

611 int Char = getNextChar();

612 if (Char == EOF)

613 break;

614

615 if (Char != '}')

616 continue;

617

618 Char = getNextChar();

619 if (Char == EOF)

620 break;

621 if (Char == ']') {

622 CurStrVal.assign(CodeStart, CurPtr - 2);

624 }

625 }

626

627 return ReturnError(CodeStart - 2, "unterminated code block");

628}

629

630

633 return ReturnError(CurPtr - 1, "invalid \"!operator\"");

634

635 const char *Start = CurPtr++;

637 ++CurPtr;

638

639

641 StringSwitchtgtok::TokKind(StringRef(Start, CurPtr - Start))

685 .Cases({"setdagop", "setop"},

687 .Cases({"getdagop", "getop"},

701

703 : ReturnError(Start - 1, "unknown operator");

704}

705

706bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {

707

708

709 if (!PrepIncludeStack.back().empty()) {

710 prepReportPreprocessorStackError();

711

712 return false;

713 }

714

715

716 PrepIncludeStack.pop_back();

717

718 if (IncludeStackMustBeEmpty) {

719 assert(PrepIncludeStack.empty() &&

720 "preprocessor include stack is not empty");

721 } else {

722 assert(!PrepIncludeStack.empty() && "preprocessor include stack is empty");

723 }

724

725 return true;

726}

727

730 if (StringRef(CurPtr, Word.size()) != Word)

731 continue;

732 int NextChar = peekNextChar(Word.size());

733

734

735

736

737

738

739 if (NextChar == ' ' || NextChar == '\t' || NextChar == EOF ||

740 NextChar == '\n' ||

741

742

743

744 NextChar == '\r')

746

747

748

749

750

751

752

753

754

755

756

757

758

759 if (NextChar == '/') {

760 NextChar = peekNextChar(Word.size() + 1);

761

762 if (NextChar == '*' || NextChar == '/')

764

765

766 }

767 }

768

770}

771

772void TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {

773 TokStart = CurPtr;

774

776 if (PKind == Kind) {

777

778 CurPtr += PWord.size();

779 return;

780 }

781 }

782

784 "unsupported preprocessing token in prepEatPreprocessorDirective()");

785}

786

788 bool ReturnNextLiveToken) {

789

790 prepEatPreprocessorDirective(Kind);

791

793 StringRef MacroName = prepLexMacroName();

794 StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";

795 if (MacroName.empty())

796 return ReturnError(TokStart, "expected macro name after " + IfTokName);

797

798 bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;

799

800

802 MacroIsDefined = !MacroIsDefined;

803

804

805

806

807 PrepIncludeStack.back().push_back(

809

810 if (!prepSkipDirectiveEnd())

811 return ReturnError(CurPtr, "only comments are supported after " +

812 IfTokName + " NAME");

813

814

815

816 if (!ReturnNextLiveToken)

818

819

820

821 if (MacroIsDefined)

822 return LexToken();

823

824

825

826

827

828 if (prepSkipRegion(ReturnNextLiveToken))

829 return LexToken();

830

833

834

835 if (PrepIncludeStack.back().empty())

836 return ReturnError(TokStart, "#else without #ifdef or #ifndef");

837

838 PreprocessorControlDesc IfdefEntry = PrepIncludeStack.back().back();

839

841 PrintError(TokStart, "double #else");

842 return ReturnError(IfdefEntry.SrcPos, "previous #else is here");

843 }

844

845

846

847 PrepIncludeStack.back().back() = {Kind, !IfdefEntry.IsDefined,

849

850 if (!prepSkipDirectiveEnd())

851 return ReturnError(CurPtr, "only comments are supported after #else");

852

853

854

855 if (ReturnNextLiveToken) {

856 if (prepSkipRegion(ReturnNextLiveToken))

857 return LexToken();

858

860 }

861

862

865

866

867 if (PrepIncludeStack.back().empty())

868 return ReturnError(TokStart, "#endif without #ifdef");

869

870 [[maybe_unused]] auto &IfdefOrElseEntry = PrepIncludeStack.back().back();

871

874 "invalid preprocessor control on the stack");

875

876 if (!prepSkipDirectiveEnd())

877 return ReturnError(CurPtr, "only comments are supported after #endif");

878

879 PrepIncludeStack.back().pop_back();

880

881

882

883 if (ReturnNextLiveToken) {

884 return LexToken();

885 }

886

887

890 StringRef MacroName = prepLexMacroName();

891 if (MacroName.empty())

892 return ReturnError(TokStart, "expected macro name after #define");

893

894 if (!DefinedMacros.insert(MacroName).second)

896 "duplicate definition of macro: " + Twine(MacroName));

897

898 if (!prepSkipDirectiveEnd())

899 return ReturnError(CurPtr,

900 "only comments are supported after #define NAME");

901

902 assert(ReturnNextLiveToken &&

903 "#define must be ignored during the lines skipping");

904

905 return LexToken();

906 }

907

908 llvm_unreachable("preprocessing directive is not supported");

909}

910

911bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {

912 assert(MustNeverBeFalse && "invalid recursion.");

913

914 do {

915

916 while (*CurPtr != '\n')

917 ++CurPtr;

918

919

920 if (!prepSkipLineBegin())

921 return false;

922

923

924

925

926

927 if (*CurPtr == '#')

928 ++CurPtr;

929 else

930 continue;

931

933

934

935

936

938 continue;

939

940 tgtok::TokKind ProcessedKind = lexPreprocessor(Kind, false);

941

942

943

944

946 return false;

947

948 assert(Kind == ProcessedKind && "prepIsDirective() and lexPreprocessor() "

949 "returned different token kinds");

950

951

952

953

954

955 if (prepIsProcessingEnabled()) {

957 "tokens processing was enabled by an unexpected preprocessing "

958 "directive");

959

960 return true;

961 }

962 } while (CurPtr != CurBuf.end());

963

964

965

966 prepReportPreprocessorStackError();

967 return false;

968}

969

970StringRef TGLexer::prepLexMacroName() {

971

972 while (*CurPtr == ' ' || *CurPtr == '\t')

973 ++CurPtr;

974

975 TokStart = CurPtr;

976 CurPtr = lexMacroName(StringRef(CurPtr, CurBuf.end() - CurPtr));

977 return StringRef(TokStart, CurPtr - TokStart);

978}

979

980bool TGLexer::prepSkipLineBegin() {

981 while (CurPtr != CurBuf.end()) {

982 switch (*CurPtr) {

983 case ' ':

984 case '\t':

985 case '\n':

986 case '\r':

987 break;

988

989 case '/': {

990 int NextChar = peekNextChar(1);

991 if (NextChar == '*') {

992

993

994

995

996

997

998

999

1000

1001

1002 TokStart = CurPtr;

1003

1004

1005 ++CurPtr;

1006 if (SkipCComment())

1007 return false;

1008 } else {

1009

1010 return true;

1011 }

1012

1013

1014 continue;

1015 }

1016

1017 default:

1018 return true;

1019 }

1020

1021 ++CurPtr;

1022 }

1023

1024

1025

1026 return true;

1027}

1028

1029bool TGLexer::prepSkipDirectiveEnd() {

1030 while (CurPtr != CurBuf.end()) {

1031 switch (*CurPtr) {

1032 case ' ':

1033 case '\t':

1034 break;

1035

1036 case '\n':

1037 case '\r':

1038 return true;

1039

1040 case '/': {

1041 int NextChar = peekNextChar(1);

1042 if (NextChar == '/') {

1043

1044

1045

1046 ++CurPtr;

1047 SkipBCPLComment();

1048 } else if (NextChar == '*') {

1049

1050

1051

1052

1053

1054

1055

1056

1057

1058

1059

1060

1061

1062

1063

1064 TokStart = CurPtr;

1065 ++CurPtr;

1066 if (SkipCComment())

1067 return false;

1068 } else {

1069 TokStart = CurPtr;

1070 PrintError(CurPtr, "unexpected character");

1071 return false;

1072 }

1073

1074

1075 continue;

1076 }

1077

1078 default:

1079

1080 TokStart = CurPtr;

1081 return false;

1082 }

1083

1084 ++CurPtr;

1085 }

1086

1087 return true;

1088}

1089

1090bool TGLexer::prepIsProcessingEnabled() {

1091 return all_of(PrepIncludeStack.back(),

1092 [](const PreprocessorControlDesc &I) { return I.IsDefined; });

1093}

1094

1095void TGLexer::prepReportPreprocessorStackError() {

1096 auto &PrepControl = PrepIncludeStack.back().back();

1097 PrintError(CurBuf.end(), "reached EOF without matching #endif");

1098 PrintError(PrepControl.SrcPos, "the latest preprocessor control is here");

1099

1100 TokStart = CurPtr;

1101}

assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...

constexpr PreprocessorDir PreprocessorDirs[]

Definition TGLexer.cpp:50

static bool isValidIDChar(char C, bool First)

Returns true if C is a valid character in an identifier.

Definition TGLexer.cpp:44

static const char * lexMacroName(StringRef Str)

Definition TGLexer.cpp:58

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

Represents a location in source code.

static SMLoc getFromPointer(const char *Ptr)

constexpr const char * getPointer() const

Represents a range in source code.

This owns the files read by a parser, handles include stacks, and handles diagnostic wrangling.

StringRef - Represent a constant reference to a string, i.e.

static constexpr size_t npos

constexpr bool empty() const

empty - Check if the string is empty.

SMRange getLocRange() const

Definition TGLexer.cpp:98

SMLoc getLoc() const

Definition TGLexer.cpp:96

TGLexer(SourceMgr &SrcMgr, ArrayRef< std::string > Macros)

Definition TGLexer.cpp:75

Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ C

The default llvm calling convention, compatible with C.

support::ulittle32_t Word

This is an optimization pass for GlobalISel generic memory operations.

bool all_of(R &&range, UnaryPredicate P)

Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.

void PrintFatalError(const Twine &Msg)

void PrintError(const Twine &Msg)

void PrintWarning(const Twine &Msg)

bool isAlpha(char C)

Checks if character C is a valid letter as classified by "C" locale.

bool isDigit(char C)

Checks if character C is one of the 10 decimal digits.

@ First

Helpers to iterate all locations in the MemoryEffectsBase class.

FunctionAddr VTableAddr Next

bool isHexDigit(char C)

Checks if character C is a hexadecimal numeric character.