LLVM: lib/Target/X86/Disassembler/X86Disassembler.cpp Source File (original) (raw)

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

90

91using namespace llvm;

93

94#define DEBUG_TYPE "x86-disassembler"

95

96#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);

97

98

99

100

104};

105

106

107

110};

111

112

113

114

115

116

119};

120

121#include "X86GenDisassemblerTables.inc"

122

126

127 switch (type) {

129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

130 break;

132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

133 break;

135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

136 break;

138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

139 break;

141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

142 break;

144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

145 break;

147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

148 break;

150 dec =

151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

152 break;

154 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

155 break;

157 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

158 break;

160 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

161 break;

163 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];

164 break;

165 }

166

168 default:

170 return 0;

171 case MODRM_ONEENTRY:

173 case MODRM_SPLITRM:

177 case MODRM_SPLITREG:

179 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];

180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];

181 case MODRM_SPLITMISC:

183 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];

184 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];

185 case MODRM_FULL:

187 }

188}

189

192 if (offset >= insn->bytes.size())

193 return true;

194 byte = insn->bytes[offset];

195 return false;

196}

197

199 auto r = insn->bytes;

201 if (offset + sizeof(T) > r.size())

202 return true;

205 return false;

206}

207

209 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;

210}

211

214}

215

216

217

218

219

220

225

227

229

230

232 break;

233

234

235

237 break;

238

239 if ((byte == 0xf2 || byte == 0xf3) && peek(insn, nextByte)) {

240

241

242

243

244

248 if (!(byte == 0xf3 && nextByte == 0x90))

249 break;

250 }

251

252

253

254

258 break;

259 }

262

263 if (consume(insn, nnextByte))

264 return -1;

265

266 if (peek(insn, nnextByte))

267 return -1;

269 }

270 }

271

272 switch (byte) {

273 case 0xf0:

275 break;

276 case 0xf2:

277 case 0xf3: {

280 break;

281

282

283

284

285

286

287

288

291

294 break;

295 }

296 case 0x2e:

298 break;

299 case 0x36:

301 break;

302 case 0x3e:

304 break;

305 case 0x26:

307 break;

308 case 0x64:

310 break;

311 case 0x65:

313 break;

314 case 0x66: {

318 break;

319

322 break;

323 }

324 case 0x67:

326 break;

327 default:

329 break;

330 }

331

332 if (isREX(insn, byte)) {

338 }

339

342 }

343

345

346 if (byte == 0x62) {

348 if (consume(insn, byte1)) {

349 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");

350 return -1;

351 }

352

353 if (peek(insn, byte2)) {

354 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");

355 return -1;

356 }

357

358 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {

360 } else {

363 }

364

369 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");

370 return -1;

371 }

373 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");

374 return -1;

375 }

376

378

384

385

390 }

391

394 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",

397 }

398 } else if (byte == 0xc4) {

400 if (peek(insn, byte1)) {

401 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");

402 return -1;

403 }

404

405 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)

407 else

409

414

415

416

423

428 }

429 } else if (byte == 0xc5) {

431 if (peek(insn, byte1)) {

432 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");

433 return -1;

434 }

435

436 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)

438 else

440

444

448

450 default:

451 break;

454 break;

455 }

456

460 }

461 } else if (byte == 0x8f) {

463 if (peek(insn, byte1)) {

464 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");

465 return -1;

466 }

467

468 if ((byte1 & 0x38) != 0x0)

470 else

472

477

478

479

486

488 default:

489 break;

492 break;

493 }

494

499 }

500 } else if (isREX2(insn, byte)) {

502 if (peek(insn, byte1)) {

503 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");

504 return -1;

505 }

508

509

517 } else

519

537 } else {

541 }

542 }

543

544 return 0;

545}

546

547

551

554 case 2:

555 default:

556 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");

557 case 4:

559 sibBaseBase = SIB_BASE_EAX;

560 break;

561 case 8:

563 sibBaseBase = SIB_BASE_RAX;

564 break;

565 }

566

568 return -1;

569

572

573 if (index == 0x4) {

575 } else {

577 }

578

580

583

584 switch (base) {

585 case 0x5:

586 case 0xd:

588 case 0x0:

591 break;

592 case 0x1:

595 break;

596 case 0x2:

599 break;

600 default:

602 }

603 break;

604 default:

606 break;

607 }

608

609 return 0;

610}

611

613 int8_t d8;

614 int16_t d16;

615 int32_t d32;

617

621 break;

624 return -1;

626 break;

629 return -1;

631 break;

634 return -1;

636 break;

637 }

638

639 return 0;

640}

641

642

646

648 return 0;

649

651 return -1;

653

657

658

659

660

662 case 2:

663 insn->regBase = MODRM_REG_AX;

665 break;

666 case 4:

667 insn->regBase = MODRM_REG_EAX;

669 break;

670 case 8:

671 insn->regBase = MODRM_REG_RAX;

673 break;

674 }

675

680

683

685

687 case 2: {

688 EABase eaBaseBase = EA_BASE_BX_SI;

689

690 switch (mod) {

691 case 0x0:

692 if (rm == 0x6) {

696 return -1;

697 } else {

700 }

701 break;

702 case 0x1:

707 return -1;

708 break;

709 case 0x2:

713 return -1;

714 break;

715 case 0x3:

718 return -1;

719 break;

720 }

721 break;

722 }

723 case 4:

724 case 8: {

725 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);

726

727 switch (mod) {

728 case 0x0:

730

731

732

733 switch (rm & 7) {

734 case 0x4:

735 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);

737 return -1;

738 break;

739 case 0x5:

743 return -1;

744 break;

745 default:

747 break;

748 }

749 break;

750 case 0x1:

752 [[fallthrough]];

753 case 0x2:

755 switch (rm & 7) {

756 case 0x4:

757 insn->eaBase = EA_BASE_sib;

759 return -1;

760 break;

761 default:

764 return -1;

765 break;

766 }

767 break;

768 case 0x3:

771 break;

772 }

773 break;

774 }

775 }

776

777 return 0;

778}

779

780#define GENERIC_FIXUP_FUNC(name, base, prefix) \

781 static uint16_t name(struct InternalInstruction *insn, OperandType type, \

782 uint8_t index, uint8_t *valid) { \

783 *valid = 1; \

784 switch (type) { \

785 default: \

786 debug("Unhandled register type"); \

787 *valid = 0; \

788 return 0; \

789 case TYPE_Rv: \

790 return base + index; \

791 case TYPE_R8: \

792 if (insn->rexPrefix && index >= 4 && index <= 7) \

793 return prefix##_SPL + (index - 4); \

794 else \

795 return prefix##_AL + index; \

796 case TYPE_R16: \

797 return prefix##_AX + index; \

798 case TYPE_R32: \

799 return prefix##_EAX + index; \

800 case TYPE_R64: \

801 return prefix##_RAX + index; \

802 case TYPE_ZMM: \

803 return prefix##_ZMM0 + index; \

804 case TYPE_YMM: \

805 return prefix##_YMM0 + index; \

806 case TYPE_XMM: \

807 return prefix##_XMM0 + index; \

808 case TYPE_TMM: \

809 if (index > 7) \

810 *valid = 0; \

811 return prefix##_TMM0 + index; \

812 case TYPE_TMM_PAIR: \

813 if (index > 7) \

814 *valid = 0; \

815 return prefix##_TMM0_TMM1 + (index / 2); \

816 case TYPE_VK: \

817 index &= 0xf; \

818 if (index > 7) \

819 *valid = 0; \

820 return prefix##_K0 + index; \

821 case TYPE_VK_PAIR: \

822 if (index > 7) \

823 *valid = 0; \

824 return prefix##_K0_K1 + (index / 2); \

825 case TYPE_MM64: \

826 return prefix##_MM0 + (index & 0x7); \

827 case TYPE_SEGMENTREG: \

828 if ((index & 7) > 5) \

829 *valid = 0; \

830 return prefix##_ES + (index & 7); \

831 case TYPE_DEBUGREG: \

832 if (index > 15) \

833 *valid = 0; \

834 return prefix##_DR0 + index; \

835 case TYPE_CONTROLREG: \

836 if (index > 15) \

837 *valid = 0; \

838 return prefix##_CR0 + index; \

839 case TYPE_MVSIBX: \

840 return prefix##_XMM0 + index; \

841 case TYPE_MVSIBY: \

842 return prefix##_YMM0 + index; \

843 case TYPE_MVSIBZ: \

844 return prefix##_ZMM0 + index; \

845 } \

846 }

847

848

849

850

851

852

853

854

855

856

857

860

861

862

863

864

865

866

867

872

874 default:

875 debug("Expected a REG or R/M encoding in fixupReg");

876 return -1;

877 case ENCODING_VVVV:

878 insn->vvvv =

879 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);

880 if (!valid)

881 return -1;

882 break;

883 case ENCODING_REG:

885 insn->reg - insn->regBase, &valid);

886 if (!valid)

887 return -1;

888 break;

890 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&

892

893

894

895

896

897

898 switch (op->type) {

899 case TYPE_Rv:

900 case TYPE_R8:

901 case TYPE_R16:

902 case TYPE_R32:

903 case TYPE_R64:

904 break;

905 default:

906 insn->eaBase =

907 (EABase)(insn->eaBase +

908 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));

909 break;

910 }

911 }

912 [[fallthrough]];

913 case ENCODING_SIB:

914 if (insn->eaBase >= insn->eaRegBase) {

915 insn->eaBase = (EABase)fixupRMValue(

916 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);

917 if (!valid)

918 return -1;

919 }

920 break;

921 }

922

923 return 0;

924}

925

926

927

931

935 default:

937 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",

939 return true;

961 }

964 default:

966 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",

968 return true;

987 }

993 default:

995 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",

997 return true;

1007 }

1009

1012 }

1013

1014 if (consume(insn, current))

1015 return true;

1016

1017 if (current == 0x0f) {

1019 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));

1020 if (consume(insn, current))

1021 return true;

1022

1023 if (current == 0x38) {

1025 current));

1026 if (consume(insn, current))

1027 return true;

1028

1030 } else if (current == 0x3a) {

1032 current));

1033 if (consume(insn, current))

1034 return true;

1035

1037 } else if (current == 0x0f) {

1039 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));

1040

1041

1043 return true;

1044

1045 if (consume(insn, current))

1046 return true;

1047

1049 } else {

1050 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");

1052 }

1054

1055

1057

1058

1059

1060 insn->opcode = current;

1061

1062 return false;

1063}

1064

1065

1067 for (int i = 0;; i++) {

1068 if (orig[i] == '\0' && equiv[i] == '\0')

1069 return true;

1070 if (orig[i] == '\0' || equiv[i] == '\0')

1071 return false;

1072 if (orig[i] != equiv[i]) {

1073 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')

1074 continue;

1075 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')

1076 continue;

1077 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')

1078 continue;

1079 return false;

1080 }

1081 }

1082}

1083

1084

1086 for (int i = 0;; ++i) {

1087 if (name[i] == '\0')

1088 return false;

1089 if (name[i] == '6' && name[i + 1] == '4')

1090 return true;

1091 }

1092}

1093

1094

1095

1099 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);

1104 break;

1107 break;

1110 break;

1113 break;

1116 break;

1119 break;

1122 break;

1125 break;

1128 break;

1131 break;

1134 break;

1137 break;

1138 }

1139

1144 return -1;

1145 *instructionID =

1147 } else {

1149 }

1150

1151 return 0;

1152}

1153

1156 return false;

1158 return true;

1159 switch (insn->opcode & 0xfe) {

1160 default:

1161 return false;

1162 case 0x38:

1163 case 0x3a:

1164 case 0x84:

1165 return true;

1166 case 0x80:

1168 case 0xf6:

1170 }

1171}

1172

1175 return false;

1177 return true;

1178

1181 switch (insn->opcode) {

1182 case 0xf2:

1183 case 0xf3:

1184 case 0xf5:

1185 case 0xf7:

1186 return true;

1187 default:

1188 break;

1189 }

1190 }

1191 return false;

1192}

1193

1194

1195

1196

1201

1203

1205

1208

1211

1216 break;

1219 break;

1222 break;

1223 }

1224

1228 isCCMPOrCTEST(insn))

1230

1239 }

1248 break;

1251 break;

1254 break;

1255 }

1256

1265 break;

1268 break;

1271 break;

1272 }

1273

1280 break;

1283 break;

1286 break;

1287 }

1288

1291 } else {

1292 return -1;

1293 }

1295

1302

1304 } else {

1309 }

1310 } else {

1312 case 0xf2:

1314 break;

1315 case 0xf3:

1317 break;

1318 case 0x66:

1323 break;

1324 case 0x67:

1326 break;

1327 }

1328 }

1329

1332 attrMask &= ~ATTR_ADSIZE;

1333 }

1334

1335

1337 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))

1339

1341

1342

1345

1346

1347

1349 (insn->opcode == 0xE8 || insn->opcode == 0xE9))

1351

1353 insn->opcode >= 0x80 && insn->opcode <= 0x8F)

1355 }

1356

1357

1359 return -1;

1360

1361

1362

1365

1366

1373

1374 uint16_t instructionIDWithREXW;

1379 return 0;

1380 }

1381

1382 auto SpecName = mii->getName(instructionIDWithREXW);

1383

1384 if (is64Bit(SpecName.data())) {

1387 return 0;

1388 }

1389 }

1390 }

1391

1392

1393

1394

1395

1396

1401

1406

1407

1410

1411

1414 }

1415

1417 return -1;

1418

1421 return 0;

1422 }

1423

1426

1427

1428

1429

1430

1432 uint16_t instructionIDWithOpsize;

1434

1436

1439

1440

1442 insn->spec = spec;

1443 return 0;

1444 }

1445

1446 specName = mii->getName(instructionID);

1447 specWithOpSizeName = mii->getName(instructionIDWithOpsize);

1448

1453 } else {

1455 insn->spec = spec;

1456 }

1457 return 0;

1458 }

1459

1462

1463

1465 uint16_t instructionIDWithNewOpcode;

1467

1469

1470

1471 insn->opcode = 0x91;

1472

1474 attrMask)) {

1475 insn->opcode = 0x90;

1476

1478 insn->spec = spec;

1479 return 0;

1480 }

1481

1482 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];

1483

1484

1485 insn->opcode = 0x90;

1486

1487 insn->instructionID = instructionIDWithNewOpcode;

1488 insn->spec = specWithNewOpcode;

1489

1490 return 0;

1491 }

1492

1495

1496 return 0;

1497}

1498

1499

1500

1501

1502

1503

1504

1505

1506

1509

1510 if (size == 0)

1512

1513 auto setOpcodeRegister = [&](unsigned base) {

1517 (insn->opcode & 7)));

1518 };

1519

1520 switch (size) {

1521 case 1:

1522 setOpcodeRegister(MODRM_REG_AL);

1526 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));

1527 }

1528

1529 break;

1530 case 2:

1531 setOpcodeRegister(MODRM_REG_AX);

1532 break;

1533 case 4:

1534 setOpcodeRegister(MODRM_REG_EAX);

1535 break;

1536 case 8:

1537 setOpcodeRegister(MODRM_REG_RAX);

1538 break;

1539 }

1540

1541 return 0;

1542}

1543

1544

1545

1546

1547

1548

1549

1550

1556

1558

1560

1563

1564 switch (size) {

1565 case 1:

1566 if (consume(insn, imm8))

1567 return -1;

1569 break;

1570 case 2:

1571 if (consume(insn, imm16))

1572 return -1;

1574 break;

1575 case 4:

1576 if (consume(insn, imm32))

1577 return -1;

1579 break;

1580 case 8:

1581 if (consume(insn, imm64))

1582 return -1;

1584 break;

1585 default:

1587 }

1588

1590

1591 return 0;

1592}

1593

1594

1597

1598 int vvvv;

1608 else

1609 return -1;

1610

1612 vvvv &= 0xf;

1613

1614 insn->vvvv = static_cast<Reg>(vvvv);

1615 return 0;

1616}

1617

1618

1619

1620

1621

1624

1626 return -1;

1627

1630 return 0;

1631}

1632

1633

1634

1636 int hasVVVV, needVVVV;

1637 int sawRegImm = 0;

1638

1640

1641

1643 needVVVV = hasVVVV && (insn->vvvv != 0);

1644

1645 for (const auto &Op : x86OperandSets[insn->spec->operands]) {

1646 switch (Op.encoding) {

1647 case ENCODING_NONE:

1648 case ENCODING_SI:

1649 case ENCODING_DI:

1650 break;

1652

1653 if (needVVVV)

1654 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);

1656 return -1;

1657

1658

1659 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)

1660 return -1;

1661

1662

1665

1666

1670

1671

1673 default:

1674 debug("Unhandled VSIB index type");

1675 return -1;

1676 case TYPE_MVSIBX:

1679 break;

1680 case TYPE_MVSIBY:

1683 break;

1684 case TYPE_MVSIBZ:

1687 break;

1688 }

1689

1690

1692 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);

1693 break;

1694 case ENCODING_SIB:

1695

1696 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)

1697 return -1;

1699 return -1;

1701 return -1;

1702 break;

1703 case ENCODING_REG:

1706 return -1;

1708 return -1;

1709

1711 insn->displacement *= 1 << (Op.encoding - ENCODING_RM);

1712 break;

1713 case ENCODING_IB:

1714 if (sawRegImm) {

1715

1716

1720 break;

1721 }

1723 return -1;

1724 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)

1725 sawRegImm = 1;

1726 break;

1727 case ENCODING_IW:

1729 return -1;

1730 break;

1731 case ENCODING_ID:

1733 return -1;

1734 break;

1735 case ENCODING_IO:

1737 return -1;

1738 break;

1739 case ENCODING_Iv:

1741 return -1;

1742 break;

1743 case ENCODING_Ia:

1745 return -1;

1746 break;

1747 case ENCODING_IRC:

1750 break;

1751 case ENCODING_RB:

1753 return -1;

1754 break;

1755 case ENCODING_RW:

1757 return -1;

1758 break;

1759 case ENCODING_RD:

1761 return -1;

1762 break;

1763 case ENCODING_RO:

1765 return -1;

1766 break;

1767 case ENCODING_Rv:

1769 return -1;

1770 break;

1771 case ENCODING_CF:

1773 needVVVV = false;

1774 break;

1775 case ENCODING_CC:

1778 else

1780 break;

1781 case ENCODING_FP:

1782 break;

1783 case ENCODING_VVVV:

1784 needVVVV = 0;

1785 if (!hasVVVV)

1786 return -1;

1788 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);

1790 return -1;

1791 break;

1792 case ENCODING_WRITEMASK:

1794 return -1;

1795 break;

1796 case ENCODING_DUP:

1797 break;

1798 default:

1799 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");

1800 return -1;

1801 }

1802 }

1803

1804

1805 if (needVVVV)

1806 return -1;

1807

1808 return 0;

1809}

1810

1811namespace llvm {

1812

1813

1814

1815

1816namespace X86 {

1817 enum {

1825}

1826

1827}

1828

1832

1833namespace {

1834

1835

1836

1837

1838class X86GenericDisassembler : public MCDisassembler {

1839 std::unique_ptr MII;

1840public:

1842 std::unique_ptr MII);

1843public:

1847

1848private:

1850};

1851

1852}

1853

1854X86GenericDisassembler::X86GenericDisassembler(

1857 std::unique_ptr MII)

1860 if (FB[X86::Is16Bit]) {

1862 return;

1863 } else if (FB[X86::Is32Bit]) {

1865 return;

1866 } else if (FB[X86::Is64Bit]) {

1868 return;

1869 }

1870

1872}

1873

1877 CommentStream = &CStream;

1878

1881 Insn.bytes = Bytes;

1884 Insn.mode = fMode;

1885

1890 return Fail;

1891 }

1892

1893 Insn.operands = x86OperandSets[Insn.spec->operands];

1894 Insn.length = Insn.readerCursor - Insn.startLocation;

1896 if (Size > 15)

1897 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");

1898

1900 if (!Ret) {

1902 if (Insn.hasAdSize)

1904 if (Insn.mandatoryPrefix) {

1905 if (Insn.hasOpSize)

1907 if (Insn.repeatPrefix == 0xf2)

1909 else if (Insn.repeatPrefix == 0xf3 &&

1910

1911 Insn.opcode != 0x90)

1913 if (Insn.hasLockPrefix)

1915 }

1916 Instr.setFlags(Flags);

1917 }

1919}

1920

1921

1922

1923

1924

1925

1926

1927

1928

1929

1931#define ENTRY(x) X86::x,

1933#undef ENTRY

1934

1935 MCPhysReg llvmRegnum = llvmRegnums[reg];

1937}

1938

1940 0,

1941 X86::CS,

1942 X86::SS,

1943 X86::DS,

1944 X86::ES,

1945 X86::FS,

1946 X86::GS

1947};

1948

1949

1950

1951

1952

1954 unsigned baseRegNo;

1955

1957 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;

1959 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;

1960 else {

1962 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;

1963 }

1966

1970 return false;

1971}

1972

1973

1974

1975

1976

1977

1979 unsigned baseRegNo;

1980

1982 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;

1984 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;

1985 else {

1987 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;

1988 }

1991 return false;

1992}

1993

1994

1995

1996

1997

1998

1999

2004

2005

2007

2010 if (type == TYPE_REL) {

2014 default:

2015 break;

2016 case ENCODING_Iv:

2018 default:

2019 break;

2020 case 1:

2021 if(immediate & 0x80)

2022 immediate |= ~(0xffull);

2023 break;

2024 case 2:

2025 if(immediate & 0x8000)

2026 immediate |= ~(0xffffull);

2027 break;

2028 case 4:

2029 if(immediate & 0x80000000)

2030 immediate |= ~(0xffffffffull);

2031 break;

2032 case 8:

2033 break;

2034 }

2035 break;

2036 case ENCODING_IB:

2037 if(immediate & 0x80)

2038 immediate |= ~(0xffull);

2039 break;

2040 case ENCODING_IW:

2041 if(immediate & 0x8000)

2042 immediate |= ~(0xffffull);

2043 break;

2044 case ENCODING_ID:

2045 if(immediate & 0x80000000)

2046 immediate |= ~(0xffffffffull);

2047 break;

2048 }

2049 }

2050

2051 else if (type == TYPE_IMM) {

2053 default:

2054 break;

2055 case ENCODING_IB:

2056 if(immediate & 0x80)

2057 immediate |= ~(0xffull);

2058 break;

2059 case ENCODING_IW:

2060 if(immediate & 0x8000)

2061 immediate |= ~(0xffffull);

2062 break;

2063 case ENCODING_ID:

2064 if(immediate & 0x80000000)

2065 immediate |= ~(0xffffffffull);

2066 break;

2067 case ENCODING_IO:

2068 break;

2069 }

2070 }

2071

2072 switch (type) {

2073 case TYPE_XMM:

2075 return;

2076 case TYPE_YMM:

2078 return;

2079 case TYPE_ZMM:

2081 return;

2082 default:

2083

2084 break;

2085 }

2086

2091

2092 if (type == TYPE_MOFFS) {

2096 }

2097}

2098

2099

2100

2101

2102

2103

2104

2107 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {

2108 debug("A R/M register operand may not have a SIB byte");

2109 return true;

2110 }

2111

2112 switch (insn.eaBase) {

2113 default:

2114 debug("Unexpected EA base register");

2115 return true;

2117 debug("EA_BASE_NONE for ModR/M base");

2118 return true;

2119#define ENTRY(x) case EA_BASE_##x:

2121#undef ENTRY

2122 debug("A R/M register operand may not have a base; "

2123 "the operand must be a register.");

2124 return true;

2125#define ENTRY(x) \

2126 case EA_REG_##x: \

2127 mcInst.addOperand(MCOperand::createReg(X86::x)); break;

2129#undef ENTRY

2130 }

2131

2132 return false;

2133}

2134

2135

2136

2137

2138

2139

2140

2141

2142

2143

2146 bool ForceSIB = false) {

2147

2148

2149

2150

2151

2152

2153

2154

2155

2156

2157

2158

2165

2166 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {

2169 default:

2170 debug("Unexpected sibBase");

2171 return true;

2172#define ENTRY(x) \

2173 case SIB_BASE_##x: \

2174 baseReg = MCOperand::createReg(X86::x); break;

2176#undef ENTRY

2177 }

2178 } else {

2180 }

2181

2184 default:

2185 debug("Unexpected sibIndex");

2186 return true;

2187#define ENTRY(x) \

2188 case SIB_INDEX_##x: \

2189 indexReg = MCOperand::createReg(X86::x); break;

2195#undef ENTRY

2196 }

2197 } else {

2198

2199

2200

2201

2202

2203

2204

2205 if (!ForceSIB &&

2209 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&

2210 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {

2212 X86::RIZ);

2213 } else

2215 }

2216

2218 } else {

2219 switch (insn.eaBase) {

2222 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");

2223 return true;

2224 }

2230

2232 X86::RIP);

2233 }

2234 else

2236

2238 break;

2239 case EA_BASE_BX_SI:

2242 break;

2243 case EA_BASE_BX_DI:

2246 break;

2247 case EA_BASE_BP_SI:

2250 break;

2251 case EA_BASE_BP_DI:

2254 break;

2255 default:

2257 switch (insn.eaBase) {

2258 default:

2259 debug("Unexpected eaBase");

2260 return true;

2261

2262

2263

2264

2265#define ENTRY(x) \

2266 case EA_BASE_##x: \

2267 baseReg = MCOperand::createReg(X86::x); break;

2269#undef ENTRY

2270#define ENTRY(x) case EA_REG_##x:

2272#undef ENTRY

2273 debug("A R/M memory operand may not be a register; "

2274 "the base field must be a base.");

2275 return true;

2276 }

2277 }

2278

2280 }

2281

2283

2285

2289

2290 const uint8_t dispSize =

2292

2298 return false;

2299}

2300

2301

2302

2303

2304

2305

2306

2307

2308

2311 switch (operand.type) {

2312 default:

2313 debug("Unexpected type for a R/M operand");

2314 return true;

2315 case TYPE_R8:

2316 case TYPE_R16:

2317 case TYPE_R32:

2318 case TYPE_R64:

2319 case TYPE_Rv:

2320 case TYPE_MM64:

2321 case TYPE_XMM:

2322 case TYPE_YMM:

2323 case TYPE_ZMM:

2324 case TYPE_TMM:

2325 case TYPE_TMM_PAIR:

2326 case TYPE_VK_PAIR:

2327 case TYPE_VK:

2328 case TYPE_DEBUGREG:

2329 case TYPE_CONTROLREG:

2330 case TYPE_BNDR:

2332 case TYPE_M:

2333 case TYPE_MVSIBX:

2334 case TYPE_MVSIBY:

2335 case TYPE_MVSIBZ:

2337 case TYPE_MSIB:

2339 }

2340}

2341

2342

2343

2344

2345

2346

2350}

2351

2352

2353

2354

2355

2356

2357

2360 if (maskRegNum >= 8) {

2361 debug("Invalid mask register number");

2362 return true;

2363 }

2364

2366 return false;

2367}

2368

2369

2370

2371

2372

2373

2374

2375

2380 default:

2381 debug("Unhandled operand encoding during translation");

2382 return true;

2383 case ENCODING_REG:

2385 return false;

2386 case ENCODING_WRITEMASK:

2388 case ENCODING_SIB:

2391 return translateRM(mcInst, operand, insn, Dis);

2392 case ENCODING_IB:

2393 case ENCODING_IW:

2394 case ENCODING_ID:

2395 case ENCODING_IO:

2396 case ENCODING_Iv:

2397 case ENCODING_Ia:

2400 operand,

2401 insn,

2402 Dis);

2403 return false;

2404 case ENCODING_IRC:

2406 return false;

2407 case ENCODING_SI:

2409 case ENCODING_DI:

2411 case ENCODING_RB:

2412 case ENCODING_RW:

2413 case ENCODING_RD:

2414 case ENCODING_RO:

2415 case ENCODING_Rv:

2417 return false;

2418 case ENCODING_CF:

2420 return false;

2421 case ENCODING_CC:

2424 else

2426 return false;

2427 case ENCODING_FP:

2429 return false;

2430 case ENCODING_VVVV:

2432 return false;

2433 case ENCODING_DUP:

2435 insn, Dis);

2436 }

2437}

2438

2439

2440

2441

2442

2443

2444

2448 if (!insn.spec) {

2449 debug("Instruction has no specification");

2450 return true;

2451 }

2452

2455

2456

2457

2459 if(mcInst.getOpcode() == X86::REP_PREFIX)

2460 mcInst.setOpcode(X86::XRELEASE_PREFIX);

2461 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)

2462 mcInst.setOpcode(X86::XACQUIRE_PREFIX);

2463 }

2464

2466

2467 for (const auto &Op : insn.operands) {

2468 if (Op.encoding != ENCODING_NONE) {

2470 return true;

2471 }

2472 }

2473 }

2474

2475 return false;

2476}

2477

2481 std::unique_ptr MII(T.createMCInstrInfo());

2482 return new X86GenericDisassembler(STI, Ctx, std::move(MII));

2483}

2484

2486

2491}

SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn

static bool isBranch(unsigned Opcode)

assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)

static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)

static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII)

Check if the instruction is a prefix.

#define CASE_ENCODING_VSIB

#define THREEDNOW_MAP_SYM

#define rFromEVEX2of4(evex)

#define lFromEVEX4of4(evex)

#define l2FromEVEX4of4(evex)

#define rFromVEX2of3(vex)

#define zFromEVEX4of4(evex)

#define bFromXOP2of3(xop)

#define xFromVEX2of3(vex)

#define mmmmmFromVEX2of3(vex)

#define rmFromModRM(modRM)

#define bFromEVEX4of4(evex)

#define rFromVEX2of2(vex)

#define ppFromEVEX3of4(evex)

#define v2FromEVEX4of4(evex)

#define modFromModRM(modRM)

#define rFromXOP2of3(xop)

#define lFromXOP3of3(xop)

#define lFromVEX2of2(vex)

#define scFromEVEX4of4(evex)

#define scaleFromSIB(sib)

#define regFromModRM(modRM)

#define b2FromEVEX2of4(evex)

#define vvvvFromVEX2of2(vex)

#define nfFromEVEX4of4(evex)

#define ppFromXOP3of3(xop)

#define vvvvFromVEX3of3(vex)

#define r2FromEVEX2of4(evex)

#define uFromEVEX3of4(evex)

#define xFromXOP2of3(xop)

#define wFromEVEX3of4(evex)

#define bFromVEX2of3(vex)

#define wFromVEX3of3(vex)

#define mmmmmFromXOP2of3(xop)

#define aaaFromEVEX4of4(evex)

#define lFromVEX3of3(vex)

#define mmmFromEVEX2of4(evex)

#define ppFromVEX3of3(vex)

#define bFromEVEX2of4(evex)

#define xFromEVEX2of4(evex)

#define ppFromVEX2of2(vex)

#define indexFromSIB(sib)

#define vvvvFromXOP3of3(xop)

#define wFromXOP3of3(xop)

#define oszcFromEVEX3of4(evex)

#define vvvvFromEVEX3of4(evex)

static void translateRegister(MCInst &mcInst, Reg reg)

translateRegister - Translates an internal register to the appropriate LLVM register,...

static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)

static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)

static bool readOpcode(struct InternalInstruction *insn)

static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)

static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)

translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...

static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)

translateDstIndex - Appends a destination index operand to an MCInst.

static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)

translateImmediate - Appends an immediate operand to an MCInst.

static int readOperands(struct InternalInstruction *insn)

static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)

translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...

static bool is64Bit(const char *name)

static const uint8_t segmentRegnums[SEG_OVERRIDE_max]

static int readImmediate(struct InternalInstruction *insn, uint8_t size)

static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)

static int readSIB(struct InternalInstruction *insn)

static bool isREX(struct InternalInstruction *insn, uint8_t prefix)

static int readVVVV(struct InternalInstruction *insn)

static bool isNF(InternalInstruction *insn)

static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)

translateSrcIndex - Appends a source index operand to an MCInst.

#define GENERIC_FIXUP_FUNC(name, base, prefix)

static int readMaskRegister(struct InternalInstruction *insn)

static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)

translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...

static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)

static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)

static int readDisplacement(struct InternalInstruction *insn)

static bool isCCMPOrCTEST(InternalInstruction *insn)

LLVM_C_ABI void LLVMInitializeX86Disassembler()

static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)

static int readModRM(struct InternalInstruction *insn)

static bool is16BitEquivalent(const char *orig, const char *equiv)

static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)

translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...

static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)

translateInstruction - Translates an internal instruction and all its operands to an MCInst.

static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)

translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...

static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)

translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...

static int readPrefixes(struct InternalInstruction *insn)

static bool peek(struct InternalInstruction *insn, uint8_t &byte)

ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...

size_t size() const

size - Get the array size.

bool empty() const

empty - Check if the array is empty.

This class represents an Operation in the Expression.

Container class for subtarget features.

Context object for machine code objects.

Superclass for all disassemblers.

bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const

void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const

DecodeStatus

Ternary decode status.

virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0

Returns the disassembly of a single instruction.

Instances of this class represent a single low-level machine instruction.

unsigned getOpcode() const

void addOperand(const MCOperand Op)

void setOpcode(unsigned Op)

Interface to description of machine instruction set.

StringRef getName(unsigned Opcode) const

Returns the name for the instructions with the given opcode.

Instances of this class represent operands of the MCInst class.

static MCOperand createReg(MCRegister Reg)

static MCOperand createImm(int64_t Val)

Generic base class for all target subtargets.

const FeatureBitset & getFeatureBits() const

StringRef - Represent a constant reference to a string, i.e.

constexpr const char * data() const

data - Get a pointer to the start of the string (which may not be null terminated).

Target - Wrapper for Target specific information.

This class implements an extremely fast bulk output stream that can only output to a stream.

#define llvm_unreachable(msg)

Marks that the current location is not supposed to be reachable.

@ X86

Windows x64, Windows Itanium (IA-64)

EABase

All possible values of the base field for effective-address computations, a.k.a.

Reg

All possible values of the reg field in the ModR/M byte.

DisassemblerMode

Decoding mode for the Intel disassembler.

SIBBase

All possible values of the SIB base field.

SIBIndex

All possible values of the SIB index field.

NodeAddr< InstrNode * > Instr

This is an optimization pass for GlobalISel generic memory operations.

LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt mod(const DynamicAPInt &LHS, const DynamicAPInt &RHS)

is always non-negative.

auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)

Get the size of a range.

Target & getTheX86_32Target()

raw_ostream & dbgs()

dbgs() - This returns a reference to a raw_ostream for debugging messages.

format_object< Ts... > format(const char *Fmt, const Ts &... Vals)

These are helper functions used to produce formatted output.

OutputIt move(R &&Range, OutputIt Out)

Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.

Target & getTheX86_64Target()

Implement std::hash so that hash_code can be used in STL containers.

OpcodeDecision opcodeDecisions[IC_max]

ModRMDecision modRMDecisions[256]

static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)

RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.

The specification for how to extract and interpret a full instruction and its operands.

The x86 internal instruction, which is produced by the decoder.

ArrayRef< OperandSpecifier > operands

EADisplacement eaDisplacement

uint8_t rex2ExtensionPrefix[2]

uint8_t vectorExtensionPrefix[4]

SegmentOverride segmentOverride

uint8_t numImmediatesConsumed

llvm::ArrayRef< uint8_t > bytes

uint8_t numImmediatesTranslated

const InstructionSpecifier * spec

VectorExtensionType vectorExtensionType

uint8_t displacementOffset

The specification for how to extract and interpret one operand.