LLVM: lib/MC/MCParser/AsmLexer.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

24#include

25#include

26#include

27#include

28#include

29#include

30#include

31

32using namespace llvm;

33

37}

38

40

42 bool EndStatementAtEOF) {

43 CurBuf = Buf;

44

45 if (ptr)

46 CurPtr = ptr;

47 else

48 CurPtr = CurBuf.begin();

49

51 this->EndStatementAtEOF = EndStatementAtEOF;

52}

53

54

55

56AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {

58

60}

61

62int AsmLexer::getNextChar() {

63 if (CurPtr == CurBuf.end())

64 return EOF;

65 return (unsigned char)*CurPtr++;

66}

67

68int AsmLexer::peekNextChar() {

69 if (CurPtr == CurBuf.end())

70 return EOF;

71 return (unsigned char)*CurPtr;

72}

73

74

75

76

77AsmToken AsmLexer::LexFloatLiteral() {

78

80 ++CurPtr;

81

82 if (*CurPtr == '-' || *CurPtr == '+')

83 return ReturnError(CurPtr, "invalid sign in float literal");

84

85

86 if ((*CurPtr == 'e' || *CurPtr == 'E')) {

87 ++CurPtr;

88

89 if (*CurPtr == '-' || *CurPtr == '+')

90 ++CurPtr;

91

93 ++CurPtr;

94 }

95

98}

99

100

101

102

103

104

105

106AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {

107 assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&

108 "unexpected parse state in floating hex");

109 bool NoFracDigits = true;

110

111

112 if (*CurPtr == '.') {

113 ++CurPtr;

114

115 const char *FracStart = CurPtr;

117 ++CurPtr;

118

119 NoFracDigits = CurPtr == FracStart;

120 }

121

122 if (NoIntDigits && NoFracDigits)

123 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "

124 "expected at least one significand digit");

125

126

127 if (*CurPtr != 'p' && *CurPtr != 'P')

128 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "

129 "expected exponent part 'p'");

130 ++CurPtr;

131

132 if (*CurPtr == '+' || *CurPtr == '-')

133 ++CurPtr;

134

135

136 const char *ExpStart = CurPtr;

138 ++CurPtr;

139

140 if (CurPtr == ExpStart)

141 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "

142 "expected at least one exponent digit");

143

145}

146

147

149 return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||

150 (AllowAt && C == '@') || (AllowHash && C == '#');

151}

152

153AsmToken AsmLexer::LexIdentifier() {

154

155 if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {

156

158 ++CurPtr;

159

162 *CurPtr == 'e' || *CurPtr == 'E')

163 return LexFloatLiteral();

164 }

165

167 ++CurPtr;

168

169

172

174}

175

176

177

178

179AsmToken AsmLexer::LexSlash() {

181 IsAtStartOfStatement = false;

183 }

184

185 switch (*CurPtr) {

186 case '*':

187 IsAtStartOfStatement = false;

188 break;

189 case '/':

190 ++CurPtr;

191 return LexLineComment();

192 default:

193 IsAtStartOfStatement = false;

195 }

196

197

198 ++CurPtr;

199 const char *CommentTextStart = CurPtr;

200 while (CurPtr != CurBuf.end()) {

201 switch (*CurPtr++) {

202 case '*':

203

204 if (*CurPtr != '/')

205 break;

206

210 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));

211 }

212 ++CurPtr;

215 }

216 }

217 return ReturnError(TokStart, "unterminated comment");

218}

219

220

221

222AsmToken AsmLexer::LexLineComment() {

223

224

225

226

227 const char *CommentTextStart = CurPtr;

228 int CurChar = getNextChar();

229 while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)

230 CurChar = getNextChar();

231 const char *NewlinePtr = CurPtr;

232 if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')

233 ++CurPtr;

234

235

239 StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));

240 }

241

242 IsAtStartOfLine = true;

243

244 if (IsAtStartOfStatement)

247 IsAtStartOfStatement = true;

248

251}

252

254

255 if (CurPtr[0] == 'U' || CurPtr[0] == 'u')

256 ++CurPtr;

257 if (CurPtr[0] == 'L' || CurPtr[0] == 'l')

258 ++CurPtr;

259 if (CurPtr[0] == 'L' || CurPtr[0] == 'l')

260 ++CurPtr;

261}

262

263

264

265static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,

266 bool LexHex) {

267 const char *FirstNonDec = nullptr;

268 const char *LookAhead = CurPtr;

269 while (true) {

270 if (isDigit(*LookAhead)) {

271 ++LookAhead;

272 } else {

273 if (!FirstNonDec)

274 FirstNonDec = LookAhead;

275

276

277 if (LexHex && isHexDigit(*LookAhead))

278 ++LookAhead;

279 else

280 break;

281 }

282 }

283 bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');

284 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;

285 if (isHex)

286 return 16;

287 return DefaultRadix;

288}

289

290static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {

291 while (hexDigitValue(*CurPtr) < DefaultRadix) {

292 ++CurPtr;

293 }

294 return CurPtr;

295}

296

298 if (Value.isIntN(64))

301}

302

303static std::string radixName(unsigned Radix) {

304 switch (Radix) {

305 case 2:

306 return "binary";

307 case 8:

308 return "octal";

309 case 10:

310 return "decimal";

311 case 16:

312 return "hexadecimal";

313 default:

314 return "base-" + std::to_string(Radix);

315 }

316}

317

318

319

320

321

322

323

324

325AsmToken AsmLexer::LexDigit() {

326

327

328

329

331 const char *FirstNonBinary =

332 (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;

333 const char *FirstNonDecimal =

334 (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;

335 const char *OldCurPtr = CurPtr;

337 switch (*CurPtr) {

338 default:

339 if (!FirstNonDecimal) {

340 FirstNonDecimal = CurPtr;

341 }

342 [[fallthrough]];

343 case '9':

344 case '8':

345 case '7':

346 case '6':

347 case '5':

348 case '4':

349 case '3':

350 case '2':

351 if (!FirstNonBinary) {

352 FirstNonBinary = CurPtr;

353 }

354 break;

355 case '1':

356 case '0':

357 break;

358 }

359 ++CurPtr;

360 }

361 if (*CurPtr == '.') {

362

363

364 ++CurPtr;

365 return LexFloatLiteral();

366 }

367

368 if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {

369 ++CurPtr;

371 }

372

373 unsigned Radix = 0;

374 if (*CurPtr == 'h' || *CurPtr == 'H') {

375

376 ++CurPtr;

377 Radix = 16;

378 } else if (*CurPtr == 't' || *CurPtr == 'T') {

379

380 ++CurPtr;

381 Radix = 10;

382 } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||

383 *CurPtr == 'Q') {

384

385 ++CurPtr;

386 Radix = 8;

387 } else if (*CurPtr == 'y' || *CurPtr == 'Y') {

388

389 ++CurPtr;

390 Radix = 2;

391 } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&

393 (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {

394 Radix = 10;

395 } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&

397 (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {

398 Radix = 2;

399 }

400

401 if (Radix) {

404

405 if (Result.drop_back().getAsInteger(Radix, Value))

406 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");

407

408

410

412 }

413

414

415 CurPtr = OldCurPtr;

416 }

417

418

419

423

428 }

429

431 }

432

433

435 const char *NumStart = CurPtr;

437 ++CurPtr;

438

440 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))

441 return ReturnError(TokStart, "invalid hexadecimal number");

442

444 }

445

446

448 const char *NumStart = CurPtr;

449 while (*CurPtr == '0' || *CurPtr == '1')

450 ++CurPtr;

451

453 if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))

454 return ReturnError(TokStart, "invalid binary number");

455

457 }

458

459

460

461

462

463 if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {

465

467 bool IsHex = Radix == 16;

468

469 if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {

470 if (*CurPtr == '.')

471 ++CurPtr;

472 return LexFloatLiteral();

473 }

474 }

475

477

480 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");

481

483

484

486

488 }

489

490 if (LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {

491 ++CurPtr;

492

493 if (isDigit(CurPtr[0])) {

494 --CurPtr;

497 }

498 const char *NumStart = CurPtr;

499 while (CurPtr[0] == '0' || CurPtr[0] == '1')

500 ++CurPtr;

501

502

503 if (CurPtr == NumStart)

504 return ReturnError(TokStart, "invalid binary number");

505

507

509 if (Result.substr(2).getAsInteger(2, Value))

510 return ReturnError(TokStart, "invalid binary number");

511

512

513

515

517 }

518

519 if ((*CurPtr == 'x') || (*CurPtr == 'X')) {

520 ++CurPtr;

521 const char *NumStart = CurPtr;

523 ++CurPtr;

524

525

526

527 if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')

528 return LexHexFloatLiteral(NumStart == CurPtr);

529

530

531 if (CurPtr == NumStart)

532 return ReturnError(CurPtr-2, "invalid hexadecimal number");

533

536 return ReturnError(TokStart, "invalid hexadecimal number");

537

538

539 if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))

540 ++CurPtr;

541

542

543

545

547 }

548

549

554 return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");

555

556

557 if (Radix == 16)

558 ++CurPtr;

559

560

561

563

565}

566

567

568AsmToken AsmLexer::LexSingleQuote() {

569 int CurChar = getNextChar();

570

572 return ReturnError(TokStart, "invalid usage of character literals");

573

575 while (CurChar != EOF) {

576 if (CurChar != '\'') {

577 CurChar = getNextChar();

578 } else if (peekNextChar() == '\'') {

579

580

581 (void)getNextChar();

582 CurChar = getNextChar();

583 } else {

584 break;

585 }

586 }

587 if (CurChar == EOF)

588 return ReturnError(TokStart, "unterminated string constant");

590 }

591

592 if (CurChar == '\\')

593 CurChar = getNextChar();

594

595 if (CurChar == EOF)

596 return ReturnError(TokStart, "unterminated single quote");

597

598 CurChar = getNextChar();

599

600 if (CurChar != '\'')

601 return ReturnError(TokStart, "single quote way too long");

602

603

604

607

609 char theChar = Res[2];

610 switch (theChar) {

611 default: Value = theChar; break;

612 case '\'': Value = '\''; break;

613 case 't': Value = '\t'; break;

614 case 'n': Value = '\n'; break;

615 case 'b': Value = '\b'; break;

616 case 'f': Value = '\f'; break;

617 case 'r': Value = '\r'; break;

618 }

619 } else

621

623}

624

625

626AsmToken AsmLexer::LexQuote() {

627 int CurChar = getNextChar();

629 return ReturnError(TokStart, "invalid usage of string literals");

630

632 while (CurChar != EOF) {

633 if (CurChar != '"') {

634 CurChar = getNextChar();

635 } else if (peekNextChar() == '"') {

636

637

638 (void)getNextChar();

639 CurChar = getNextChar();

640 } else {

641 break;

642 }

643 }

644 if (CurChar == EOF)

645 return ReturnError(TokStart, "unterminated string constant");

647 }

648

649 while (CurChar != '"') {

650 if (CurChar == '\\') {

651

652 CurChar = getNextChar();

653 }

654

655 if (CurChar == EOF)

656 return ReturnError(TokStart, "unterminated string constant");

657

658 CurChar = getNextChar();

659 }

660

662}

663

666

667 while (!isAtStartOfComment(CurPtr) &&

668 !isAtStatementSeparator(CurPtr) &&

669 *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {

670 ++CurPtr;

671 }

673}

674

675StringRef AsmLexer::LexUntilEndOfLine() {

677

678 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {

679 ++CurPtr;

680 }

682}

683

685 bool ShouldSkipSpace) {

689 SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement);

692 std::string SavedErr = getErr();

694

695 size_t ReadCount;

696 for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {

698

699 Buf[ReadCount] = Token;

700

702 break;

703 }

704

705 SetError(SavedErrLoc, SavedErr);

706 return ReadCount;

707}

708

709bool AsmLexer::isAtStartOfComment(const char *Ptr) {

710 if (MAI.isHLASM() && !IsAtStartOfStatement)

711 return false;

712

714

715 if (CommentString.size() == 1)

716 return CommentString[0] == Ptr[0];

717

718

719 if (CommentString[1] == '#')

720 return CommentString[0] == Ptr[0];

721

722 return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;

723}

724

725bool AsmLexer::isAtStatementSeparator(const char *Ptr) {

728}

729

732

733 int CurChar = getNextChar();

734

735 if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {

736

737

741

744 CurPtr = TokStart;

745 StringRef s = LexUntilEndOfLine();

746 UnLex(TokenBuf[1]);

747 UnLex(TokenBuf[0]);

749 }

750

752 return LexLineComment();

753 }

754

755 if (isAtStartOfComment(TokStart))

756 return LexLineComment();

757

758 if (isAtStatementSeparator(TokStart)) {

760 IsAtStartOfLine = true;

761 IsAtStartOfStatement = true;

764 }

765

766

767

768 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {

769 IsAtStartOfLine = true;

770 IsAtStartOfStatement = true;

772 }

773 IsAtStartOfLine = false;

774 bool OldIsAtStartOfStatement = IsAtStartOfStatement;

775 IsAtStartOfStatement = false;

776 switch (CurChar) {

777 default:

778

779

780

781

782 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')

783 return LexIdentifier();

784

785

786 return ReturnError(TokStart, "invalid character in input");

787 case EOF:

788 if (EndStatementAtEOF) {

789 IsAtStartOfLine = true;

790 IsAtStartOfStatement = true;

791 }

793 case 0:

794 case ' ':

795 case '\t':

796 IsAtStartOfStatement = OldIsAtStartOfStatement;

797 while (*CurPtr == ' ' || *CurPtr == '\t')

798 CurPtr++;

800 return LexToken();

801 else

803 case '\r': {

804 IsAtStartOfLine = true;

805 IsAtStartOfStatement = true;

806

807 if (CurPtr != CurBuf.end() && *CurPtr == '\n')

808 ++CurPtr;

811 }

812 case '\n':

813 IsAtStartOfLine = true;

814 IsAtStartOfStatement = true;

827 case '$': {

829 return LexDigit();

831 return LexIdentifier();

833 }

834 case '@':

836 return LexIdentifier();

838 case '#':

840 return LexIdentifier();

842 case '?':

844 return LexIdentifier();

847 case '=':

848 if (*CurPtr == '=') {

849 ++CurPtr;

851 }

853 case '-':

854 if (*CurPtr == '>') {

855 ++CurPtr;

857 }

859 case '|':

860 if (*CurPtr == '|') {

861 ++CurPtr;

863 }

866 case '&':

867 if (*CurPtr == '&') {

868 ++CurPtr;

870 }

872 case '!':

873 if (*CurPtr == '=') {

874 ++CurPtr;

876 }

878 case '%':

880 return LexDigit();

881 }

882

885 unsigned OperatorLength;

886

887 std::tie(Operator, OperatorLength) =

915

917 CurPtr += OperatorLength - 1;

919 }

920 }

922 case '/':

923 IsAtStartOfStatement = OldIsAtStartOfStatement;

924 return LexSlash();

925 case '\'': return LexSingleQuote();

926 case '"': return LexQuote();

927 case '0': case '1': case '2': case '3': case '4':

928 case '5': case '6': case '7': case '8': case '9':

929 return LexDigit();

930 case '<':

931 switch (*CurPtr) {

932 case '<':

933 ++CurPtr;

935 case '=':

936 ++CurPtr;

938 case '>':

939 ++CurPtr;

941 default:

943 }

944 case '>':

945 switch (*CurPtr) {

946 case '>':

947 ++CurPtr;

949 case '=':

950 ++CurPtr;

952 default:

954 }

955

956

957

958

959

960 }

961}

This file implements a class to represent arbitrary precision integral constant values and operations...

static std::string radixName(unsigned Radix)

static void SkipIgnoredIntegerSuffix(const char *&CurPtr)

static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, bool LexHex)

static AsmToken intToken(StringRef Ref, APInt &Value)

static const char * findLastDigit(const char *CurPtr, unsigned DefaultRadix)

static bool isIdentifierChar(char C)

Return true if the given character satisfies the following regular expression: [-a-zA-Z$....

static bool isDigit(const char C)

static bool isHexDigit(const char C)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

This file provides utility classes that use RAII to save and restore values.

This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...

Class for arbitrary precision integers.

size_t size() const

size - Get the array size.

size_t peekTokens(MutableArrayRef< AsmToken > Buf, bool ShouldSkipSpace=true) override

Look ahead an arbitrary number of tokens.

AsmLexer(const MCAsmInfo &MAI)

StringRef LexUntilEndOfStatement() override

void setBuffer(StringRef Buf, const char *ptr=nullptr, bool EndStatementAtEOF=true)

AsmToken LexToken() override

LexToken - Read the next token and return its code.

Target independent representation for an assembler token.

bool is(TokenKind K) const

This class is intended to be used as a base class for asm properties and features specific to the tar...

bool doesAllowDollarAtStartOfIdentifier() const

bool hasMipsExpressions() const

bool shouldUseMotorolaIntegers() const

StringRef getCommentString() const

const char * getSeparatorString() const

bool doesAllowAtAtStartOfIdentifier() const

bool shouldAllowAdditionalComments() const

bool doesAllowQuestionAtStartOfIdentifier() const

void UnLex(AsmToken const &Token)

AsmCommentConsumer * CommentConsumer

SMLoc getErrLoc()

Get the current error location.

bool is(AsmToken::TokenKind K) const

Check if the current token has kind K.

void SetError(SMLoc errLoc, const std::string &err)

bool AllowHashInIdentifier

const std::string & getErr()

Get the current error string.

MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...

This is a utility class that provides an abstraction for the common functionality between Instruction...

Represents a location in source code.

static SMLoc getFromPointer(const char *Ptr)

StringRef - Represent a constant reference to a string, i.e.

bool starts_with(StringRef Prefix) const

Check if this string starts with the given Prefix.

constexpr size_t size() const

size - Get the string size.

constexpr const char * data() const

data - Get a pointer to the start of the string (which may not be null terminated).

A switch()-like statement whose cases are string literals.

LLVM Value Representation.

@ C

The default llvm calling convention, compatible with C.

This is an optimization pass for GlobalISel generic memory operations.

@ Ref

The access may reference the value stored in memory.

@ Default

The result values are uniform if and only if all operands are uniform.

A utility class that uses RAII to save and restore the value of a variable.