LLVM: lib/Target/X86/Disassembler/X86Disassembler.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
91
92using namespace llvm;
94
95#define DEBUG_TYPE "x86-disassembler"
96
97#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
98
99
100
101
106
107
108
112
113
114
115
116
117
121
122#include "X86GenDisassemblerTables.inc"
123
127
128 switch (type) {
130 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
131 break;
133 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
134 break;
136 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
137 break;
139 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
140 break;
142 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
143 break;
145 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
146 break;
148 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
149 break;
151 dec =
152 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
153 break;
155 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
156 break;
158 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
159 break;
161 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
162 break;
164 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
165 break;
166 }
167
169 default:
171 return 0;
172 case MODRM_ONEENTRY:
174 case MODRM_SPLITRM:
178 case MODRM_SPLITREG:
180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
181 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
182 case MODRM_SPLITMISC:
184 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
185 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
186 case MODRM_FULL:
188 }
189}
190
193 if (offset >= insn->bytes.size())
194 return true;
195 byte = insn->bytes[offset];
196 return false;
197}
198
200 auto r = insn->bytes;
202 if (offset + sizeof(T) > r.size())
203 return true;
206 return false;
207}
208
210 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
211}
212
216
217
218
219
220
221
226
228
230
231
233 break;
234
235
236
238 break;
239
240 if ((byte == 0xf2 || byte == 0xf3) && (insn, nextByte)) {
241
242
243
244
245
249 if (!(byte == 0xf3 && nextByte == 0x90))
250 break;
251 }
252
253
254
255
259 break;
260 }
263
264 if (consume(insn, nnextByte))
265 return -1;
266
267 if (peek(insn, nnextByte))
268 return -1;
270 }
271 }
272
273 switch (byte) {
274 case 0xf0:
276 break;
277 case 0xf2:
278 case 0xf3: {
281 break;
282
283
284
285
286
287
288
289
292
295 break;
296 }
297 case 0x2e:
299 break;
300 case 0x36:
302 break;
303 case 0x3e:
305 break;
306 case 0x26:
308 break;
309 case 0x64:
311 break;
312 case 0x65:
314 break;
315 case 0x66: {
319 break;
320
323 break;
324 }
325 case 0x67:
327 break;
328 default:
330 break;
331 }
332
333 if (isREX(insn, byte)) {
339 }
340
343 }
344
346
347 if (byte == 0x62) {
349 if (consume(insn, byte1)) {
350 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
351 return -1;
352 }
353
354 if (peek(insn, byte2)) {
355 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
356 return -1;
357 }
358
359 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
361 } else {
364 }
365
370 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
371 return -1;
372 }
374 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
375 return -1;
376 }
377
379
385
386
391 }
392
395 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
398 }
399 } else if (byte == 0xc4) {
401 if (peek(insn, byte1)) {
402 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
403 return -1;
404 }
405
406 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
408 else
410
415
416
417
424
429 }
430 } else if (byte == 0xc5) {
432 if (peek(insn, byte1)) {
433 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
434 return -1;
435 }
436
437 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
439 else
441
445
449
451 default:
452 break;
455 break;
456 }
457
461 }
462 } else if (byte == 0x8f) {
464 if (peek(insn, byte1)) {
465 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
466 return -1;
467 }
468
469 if ((byte1 & 0x38) != 0x0)
471 else
473
478
479
480
487
489 default:
490 break;
493 break;
494 }
495
500 }
501 } else if (isREX2(insn, byte)) {
503 if (peek(insn, byte1)) {
504 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
505 return -1;
506 }
509
510
518 } else
520
538 } else {
542 }
543 }
544
545 return 0;
546}
547
548
552
555 case 2:
556 default:
557 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
558 case 4:
560 sibBaseBase = SIB_BASE_EAX;
561 break;
562 case 8:
564 sibBaseBase = SIB_BASE_RAX;
565 break;
566 }
567
569 return -1;
570
573
574 if (index == 0x4) {
576 } else {
578 }
579
581
584
585 switch (base) {
586 case 0x5:
587 case 0xd:
589 case 0x0:
592 break;
593 case 0x1:
596 break;
597 case 0x2:
600 break;
601 default:
603 }
604 break;
605 default:
607 break;
608 }
609
610 return 0;
611}
612
614 int8_t d8;
615 int16_t d16;
616 int32_t d32;
618
622 break;
625 return -1;
627 break;
630 return -1;
632 break;
635 return -1;
637 break;
638 }
639
640 return 0;
641}
642
643
647
649 return 0;
650
652 return -1;
654
658
659
660
661
663 case 2:
664 insn->regBase = MODRM_REG_AX;
666 break;
667 case 4:
668 insn->regBase = MODRM_REG_EAX;
670 break;
671 case 8:
672 insn->regBase = MODRM_REG_RAX;
674 break;
675 }
676
681
684
686
688 case 2: {
689 EABase eaBaseBase = EA_BASE_BX_SI;
690
691 switch (mod) {
692 case 0x0:
693 if (rm == 0x6) {
697 return -1;
698 } else {
701 }
702 break;
703 case 0x1:
708 return -1;
709 break;
710 case 0x2:
714 return -1;
715 break;
716 case 0x3:
719 return -1;
720 break;
721 }
722 break;
723 }
724 case 4:
725 case 8: {
726 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
727
728 switch (mod) {
729 case 0x0:
731
732
733
734 switch (rm & 7) {
735 case 0x4:
736 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
738 return -1;
739 break;
740 case 0x5:
744 return -1;
745 break;
746 default:
748 break;
749 }
750 break;
751 case 0x1:
753 [[fallthrough]];
754 case 0x2:
756 switch (rm & 7) {
757 case 0x4:
758 insn->eaBase = EA_BASE_sib;
760 return -1;
761 break;
762 default:
765 return -1;
766 break;
767 }
768 break;
769 case 0x3:
772 break;
773 }
774 break;
775 }
776 }
777
778 return 0;
779}
780
781#define GENERIC_FIXUP_FUNC(name, base, prefix) \
782 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
783 uint8_t index, uint8_t *valid) { \
784 *valid = 1; \
785 switch (type) { \
786 default: \
787 debug("Unhandled register type"); \
788 *valid = 0; \
789 return 0; \
790 case TYPE_Rv: \
791 return base + index; \
792 case TYPE_R8: \
793 if (insn->rexPrefix && index >= 4 && index <= 7) \
794 return prefix##_SPL + (index - 4); \
795 else \
796 return prefix##_AL + index; \
797 case TYPE_R16: \
798 return prefix##_AX + index; \
799 case TYPE_R32: \
800 return prefix##_EAX + index; \
801 case TYPE_R64: \
802 return prefix##_RAX + index; \
803 case TYPE_ZMM: \
804 return prefix##_ZMM0 + index; \
805 case TYPE_YMM: \
806 return prefix##_YMM0 + index; \
807 case TYPE_XMM: \
808 return prefix##_XMM0 + index; \
809 case TYPE_TMM: \
810 if (index > 7) \
811 *valid = 0; \
812 return prefix##_TMM0 + index; \
813 case TYPE_VK: \
814 index &= 0xf; \
815 if (index > 7) \
816 *valid = 0; \
817 return prefix##_K0 + index; \
818 case TYPE_VK_PAIR: \
819 if (index > 7) \
820 *valid = 0; \
821 return prefix##_K0_K1 + (index / 2); \
822 case TYPE_MM64: \
823 return prefix##_MM0 + (index & 0x7); \
824 case TYPE_SEGMENTREG: \
825 if ((index & 7) > 5) \
826 *valid = 0; \
827 return prefix##_ES + (index & 7); \
828 case TYPE_DEBUGREG: \
829 if (index > 15) \
830 *valid = 0; \
831 return prefix##_DR0 + index; \
832 case TYPE_CONTROLREG: \
833 if (index > 15) \
834 *valid = 0; \
835 return prefix##_CR0 + index; \
836 case TYPE_MVSIBX: \
837 return prefix##_XMM0 + index; \
838 case TYPE_MVSIBY: \
839 return prefix##_YMM0 + index; \
840 case TYPE_MVSIBZ: \
841 return prefix##_ZMM0 + index; \
842 } \
843 }
844
845
846
847
848
849
850
851
852
853
854
857
858
859
860
861
862
863
864
869
871 default:
872 debug("Expected a REG or R/M encoding in fixupReg");
873 return -1;
874 case ENCODING_VVVV:
875 insn->vvvv =
876 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
877 if (!valid)
878 return -1;
879 break;
880 case ENCODING_REG:
882 insn->reg - insn->regBase, &valid);
883 if (!valid)
884 return -1;
885 break;
887 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
889
890
891
892
893
894
895 switch (op->type) {
896 case TYPE_Rv:
897 case TYPE_R8:
898 case TYPE_R16:
899 case TYPE_R32:
900 case TYPE_R64:
901 break;
902 default:
903 insn->eaBase =
904 (EABase)(insn->eaBase +
905 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
906 break;
907 }
908 }
909 [[fallthrough]];
910 case ENCODING_SIB:
911 if (insn->eaBase >= insn->eaRegBase) {
912 insn->eaBase = (EABase)fixupRMValue(
913 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
914 if (!valid)
915 return -1;
916 }
917 break;
918 }
919
920 return 0;
921}
922
923
924
928
932 default:
934 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
936 return true;
958 }
961 default:
963 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
965 return true;
984 }
990 default:
992 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
994 return true;
1004 }
1006
1009 }
1010
1011 if (consume(insn, current))
1012 return true;
1013
1014 if (current == 0x0f) {
1016 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1017 if (consume(insn, current))
1018 return true;
1019
1020 if (current == 0x38) {
1022 current));
1023 if (consume(insn, current))
1024 return true;
1025
1027 } else if (current == 0x3a) {
1029 current));
1030 if (consume(insn, current))
1031 return true;
1032
1034 } else if (current == 0x0f) {
1036 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1037
1038
1040 return true;
1041
1042 if (consume(insn, current))
1043 return true;
1044
1046 } else {
1047 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1049 }
1051
1052
1054
1055
1056
1057 insn->opcode = current;
1058
1059 return false;
1060}
1061
1062
1064 for (int i = 0;; i++) {
1065 if (orig[i] == '\0' && equiv[i] == '\0')
1066 return true;
1067 if (orig[i] == '\0' || equiv[i] == '\0')
1068 return false;
1069 if (orig[i] != equiv[i]) {
1070 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1071 continue;
1072 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1073 continue;
1074 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1075 continue;
1076 return false;
1077 }
1078 }
1079}
1080
1081
1083 for (int i = 0;; ++i) {
1084 if (name[i] == '\0')
1085 return false;
1086 if (name[i] == '6' && name[i + 1] == '4')
1087 return true;
1088 }
1089}
1090
1091
1092
1096 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1101 break;
1104 break;
1107 break;
1110 break;
1113 break;
1116 break;
1119 break;
1122 break;
1125 break;
1128 break;
1131 break;
1134 break;
1135 }
1136
1141 return -1;
1142 *instructionID =
1144 } else {
1146 }
1147
1148 return 0;
1149}
1150
1153 return false;
1155 return true;
1156 switch (insn->opcode & 0xfe) {
1157 default:
1158 return false;
1159 case 0x38:
1160 case 0x3a:
1161 case 0x84:
1162 return true;
1163 case 0x80:
1165 case 0xf6:
1167 }
1168}
1169
1172 return false;
1174 return true;
1175
1178 switch (insn->opcode) {
1179 case 0xf2:
1180 case 0xf3:
1181 case 0xf5:
1182 case 0xf7:
1183 return true;
1184 default:
1185 break;
1186 }
1187 }
1188 return false;
1189}
1190
1191
1192
1193
1198
1200
1202
1205
1208
1213 break;
1216 break;
1219 break;
1220 }
1221
1225 (insn))
1227
1236 }
1245 break;
1248 break;
1251 break;
1252 }
1253
1262 break;
1265 break;
1268 break;
1269 }
1270
1277 break;
1280 break;
1283 break;
1284 }
1285
1288 } else {
1289 return -1;
1290 }
1292
1299
1301 } else {
1306 }
1307 } else {
1309 case 0xf2:
1311 break;
1312 case 0xf3:
1314 break;
1315 case 0x66:
1320 break;
1321 case 0x67:
1323 break;
1324 }
1325 }
1326
1330 }
1331
1332
1334 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
1336
1338
1339
1342
1343
1344
1346 (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1348
1350 insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1352 }
1353
1354
1356 return -1;
1357
1358
1359
1362
1363
1370
1371 uint16_t instructionIDWithREXW;
1376 return 0;
1377 }
1378
1379 auto SpecName = mii->getName(instructionIDWithREXW);
1380
1381 if ((SpecName.data())) {
1384 return 0;
1385 }
1386 }
1387 }
1388
1389
1390
1391
1392
1393
1398
1403
1404
1407
1408
1411 }
1412
1414 return -1;
1415
1418 return 0;
1419 }
1420
1423
1424
1425
1426
1427
1429 uint16_t instructionIDWithOpsize;
1431
1433
1436
1437
1439 insn->spec = spec;
1440 return 0;
1441 }
1442
1443 specName = mii->getName(instructionID);
1444 specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1445
1450 } else {
1452 insn->spec = spec;
1453 }
1454 return 0;
1455 }
1456
1459
1460
1462 uint16_t instructionIDWithNewOpcode;
1464
1466
1467
1468 insn->opcode = 0x91;
1469
1471 attrMask)) {
1472 insn->opcode = 0x90;
1473
1475 insn->spec = spec;
1476 return 0;
1477 }
1478
1479 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1480
1481
1482 insn->opcode = 0x90;
1483
1484 insn->instructionID = instructionIDWithNewOpcode;
1485 insn->spec = specWithNewOpcode;
1486
1487 return 0;
1488 }
1489
1492
1493 return 0;
1494}
1495
1496
1497
1498
1499
1500
1501
1502
1503
1506
1507 if (size == 0)
1509
1510 auto setOpcodeRegister = [&](unsigned base) {
1514 (insn->opcode & 7)));
1515 };
1516
1517 switch (size) {
1518 case 1:
1519 setOpcodeRegister(MODRM_REG_AL);
1523 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1524 }
1525
1526 break;
1527 case 2:
1528 setOpcodeRegister(MODRM_REG_AX);
1529 break;
1530 case 4:
1531 setOpcodeRegister(MODRM_REG_EAX);
1532 break;
1533 case 8:
1534 setOpcodeRegister(MODRM_REG_RAX);
1535 break;
1536 }
1537
1538 return 0;
1539}
1540
1541
1542
1543
1544
1545
1546
1547
1553
1555
1557
1560
1561 switch (size) {
1562 case 1:
1563 if (consume(insn, imm8))
1564 return -1;
1566 break;
1567 case 2:
1568 if (consume(insn, imm16))
1569 return -1;
1571 break;
1572 case 4:
1573 if (consume(insn, imm32))
1574 return -1;
1576 break;
1577 case 8:
1578 if (consume(insn, imm64))
1579 return -1;
1581 break;
1582 default:
1584 }
1585
1587
1588 return 0;
1589}
1590
1591
1594
1595 int vvvv;
1605 else
1606 return -1;
1607
1609 vvvv &= 0xf;
1610
1611 insn->vvvv = static_cast<Reg>(vvvv);
1612 return 0;
1613}
1614
1615
1616
1617
1618
1621
1623 return -1;
1624
1627 return 0;
1628}
1629
1630
1631
1633 int hasVVVV, needVVVV;
1634 int sawRegImm = 0;
1635
1637
1638
1640 needVVVV = hasVVVV && (insn->vvvv != 0);
1641
1642 for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1643 switch (Op.encoding) {
1644 case ENCODING_NONE:
1645 case ENCODING_SI:
1646 case ENCODING_DI:
1647 break;
1649
1650 if (needVVVV)
1651 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1653 return -1;
1654
1655
1656 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1657 return -1;
1658
1659
1662
1663
1667
1668
1670 default:
1671 debug("Unhandled VSIB index type");
1672 return -1;
1673 case TYPE_MVSIBX:
1676 break;
1677 case TYPE_MVSIBY:
1680 break;
1681 case TYPE_MVSIBZ:
1684 break;
1685 }
1686
1687
1689 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1690 break;
1691 case ENCODING_SIB:
1692
1693 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1694 return -1;
1696 return -1;
1698 return -1;
1699 break;
1700 case ENCODING_REG:
1703 return -1;
1705 return -1;
1706
1708 insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1709 break;
1710 case ENCODING_IB:
1711 if (sawRegImm) {
1712
1713
1717 break;
1718 }
1720 return -1;
1721 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1722 sawRegImm = 1;
1723 break;
1724 case ENCODING_IW:
1726 return -1;
1727 break;
1728 case ENCODING_ID:
1730 return -1;
1731 break;
1732 case ENCODING_IO:
1734 return -1;
1735 break;
1736 case ENCODING_Iv:
1738 return -1;
1739 break;
1740 case ENCODING_Ia:
1742 return -1;
1743 break;
1744 case ENCODING_IRC:
1747 break;
1748 case ENCODING_RB:
1750 return -1;
1751 break;
1752 case ENCODING_RW:
1754 return -1;
1755 break;
1756 case ENCODING_RD:
1758 return -1;
1759 break;
1760 case ENCODING_RO:
1762 return -1;
1763 break;
1764 case ENCODING_Rv:
1766 return -1;
1767 break;
1768 case ENCODING_CF:
1770 needVVVV = false;
1771 break;
1772 case ENCODING_CC:
1775 else
1777 break;
1778 case ENCODING_FP:
1779 break;
1780 case ENCODING_VVVV:
1781 needVVVV = 0;
1782 if (!hasVVVV)
1783 return -1;
1785 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1787 return -1;
1788 break;
1789 case ENCODING_WRITEMASK:
1791 return -1;
1792 break;
1793 case ENCODING_DUP:
1794 break;
1795 default:
1796 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1797 return -1;
1798 }
1799 }
1800
1801
1802 if (needVVVV)
1803 return -1;
1804
1805 return 0;
1806}
1807
1808namespace llvm {
1809
1810
1811
1812
1813namespace X86 {
1814 enum {
1821 };
1822}
1823
1824}
1825
1829
1830namespace {
1831
1832
1833
1834
1835class X86GenericDisassembler : public MCDisassembler {
1836 std::unique_ptr MII;
1837public:
1838 X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1839 std::unique_ptr MII);
1840public:
1842 ArrayRef<uint8_t> Bytes, uint64_t Address,
1843 raw_ostream &cStream) const override;
1844
1845private:
1847};
1848
1849}
1850
1851X86GenericDisassembler::X86GenericDisassembler(
1854 std::unique_ptr MII)
1857 if (FB[X86::Is16Bit]) {
1859 return;
1860 } else if (FB[X86::Is32Bit]) {
1862 return;
1863 } else if (FB[X86::Is64Bit]) {
1865 return;
1866 }
1867
1869}
1870
1874 CommentStream = &CStream;
1875
1876 InternalInstruction Insn;
1877 memset(&Insn, 0, sizeof(InternalInstruction));
1878 Insn.bytes = Bytes;
1881 Insn.mode = fMode;
1882
1887 return Fail;
1888 }
1889
1893 if (Size > 15)
1894 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1895
1897 if (!Ret) {
1907
1908 Insn.opcode != 0x90)
1912 }
1913 Instr.setFlags(Flags);
1914 }
1916}
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1928#define ENTRY(x) X86::x,
1930#undef ENTRY
1931
1932 MCPhysReg llvmRegnum = llvmRegnums[reg];
1934}
1935
1937 0,
1938 X86::CS,
1939 X86::SS,
1940 X86::DS,
1941 X86::ES,
1942 X86::FS,
1943 X86::GS
1944};
1945
1946
1947
1948
1949
1951 unsigned baseRegNo;
1952
1954 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1956 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1957 else {
1959 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1960 }
1963
1967 return false;
1968}
1969
1970
1971
1972
1973
1974
1976 unsigned baseRegNo;
1977
1979 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1981 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1982 else {
1984 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1985 }
1988 return false;
1989}
1990
1991
1992
1993
1994
1995
1996
2001
2002
2004
2007 if (type == TYPE_REL) {
2011 default:
2012 break;
2013 case ENCODING_Iv:
2015 default:
2016 break;
2017 case 1:
2018 if(immediate & 0x80)
2019 immediate |= ~(0xffull);
2020 break;
2021 case 2:
2022 if(immediate & 0x8000)
2023 immediate |= ~(0xffffull);
2024 break;
2025 case 4:
2026 if(immediate & 0x80000000)
2027 immediate |= ~(0xffffffffull);
2028 break;
2029 case 8:
2030 break;
2031 }
2032 break;
2033 case ENCODING_IB:
2034 if(immediate & 0x80)
2035 immediate |= ~(0xffull);
2036 break;
2037 case ENCODING_IW:
2038 if(immediate & 0x8000)
2039 immediate |= ~(0xffffull);
2040 break;
2041 case ENCODING_ID:
2042 if(immediate & 0x80000000)
2043 immediate |= ~(0xffffffffull);
2044 break;
2045 }
2046 }
2047
2048 else if (type == TYPE_IMM) {
2050 default:
2051 break;
2052 case ENCODING_IB:
2053 if(immediate & 0x80)
2054 immediate |= ~(0xffull);
2055 break;
2056 case ENCODING_IW:
2057 if(immediate & 0x8000)
2058 immediate |= ~(0xffffull);
2059 break;
2060 case ENCODING_ID:
2061 if(immediate & 0x80000000)
2062 immediate |= ~(0xffffffffull);
2063 break;
2064 case ENCODING_IO:
2065 break;
2066 }
2067 }
2068
2069 switch (type) {
2070 case TYPE_XMM:
2072 return;
2073 case TYPE_YMM:
2075 return;
2076 case TYPE_ZMM:
2078 return;
2079 default:
2080
2081 break;
2082 }
2083
2088
2089 if (type == TYPE_MOFFS) {
2093 }
2094}
2095
2096
2097
2098
2099
2100
2101
2104 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2105 debug("A R/M register operand may not have a SIB byte");
2106 return true;
2107 }
2108
2109 switch (insn.eaBase) {
2110 default:
2111 debug("Unexpected EA base register");
2112 return true;
2114 debug("EA_BASE_NONE for ModR/M base");
2115 return true;
2116#define ENTRY(x) case EA_BASE_##x:
2118#undef ENTRY
2119 debug("A R/M register operand may not have a base; "
2120 "the operand must be a register.");
2121 return true;
2122#define ENTRY(x) \
2123 case EA_REG_##x: \
2124 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2126#undef ENTRY
2127 }
2128
2129 return false;
2130}
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2143 bool ForceSIB = false) {
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2162
2163 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2166 default:
2167 debug("Unexpected sibBase");
2168 return true;
2169#define ENTRY(x) \
2170 case SIB_BASE_##x: \
2171 baseReg = MCOperand::createReg(X86::x); break;
2173#undef ENTRY
2174 }
2175 } else {
2177 }
2178
2181 default:
2182 debug("Unexpected sibIndex");
2183 return true;
2184#define ENTRY(x) \
2185 case SIB_INDEX_##x: \
2186 indexReg = MCOperand::createReg(X86::x); break;
2192#undef ENTRY
2193 }
2194 } else {
2195
2196
2197
2198
2199
2200
2201
2202 if (!ForceSIB &&
2206 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2207 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2209 X86::RIZ);
2210 } else
2212 }
2213
2215 } else {
2216 switch (insn.eaBase) {
2219 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2220 return true;
2221 }
2227
2229 X86::RIP);
2230 }
2231 else
2233
2235 break;
2236 case EA_BASE_BX_SI:
2239 break;
2240 case EA_BASE_BX_DI:
2243 break;
2244 case EA_BASE_BP_SI:
2247 break;
2248 case EA_BASE_BP_DI:
2251 break;
2252 default:
2254 switch (insn.eaBase) {
2255 default:
2256 debug("Unexpected eaBase");
2257 return true;
2258
2259
2260
2261
2262#define ENTRY(x) \
2263 case EA_BASE_##x: \
2264 baseReg = MCOperand::createReg(X86::x); break;
2266#undef ENTRY
2267#define ENTRY(x) case EA_REG_##x:
2269#undef ENTRY
2270 debug("A R/M memory operand may not be a register; "
2271 "the base field must be a base.");
2272 return true;
2273 }
2274 }
2275
2277 }
2278
2280
2282
2286
2287 const uint8_t dispSize =
2289
2295 return false;
2296}
2297
2298
2299
2300
2301
2302
2303
2304
2305
2308 switch (operand.type) {
2309 default:
2310 debug("Unexpected type for a R/M operand");
2311 return true;
2312 case TYPE_R8:
2313 case TYPE_R16:
2314 case TYPE_R32:
2315 case TYPE_R64:
2316 case TYPE_Rv:
2317 case TYPE_MM64:
2318 case TYPE_XMM:
2319 case TYPE_YMM:
2320 case TYPE_ZMM:
2321 case TYPE_TMM:
2322 case TYPE_VK_PAIR:
2323 case TYPE_VK:
2324 case TYPE_DEBUGREG:
2325 case TYPE_CONTROLREG:
2326 case TYPE_BNDR:
2328 case TYPE_M:
2329 case TYPE_MVSIBX:
2330 case TYPE_MVSIBY:
2331 case TYPE_MVSIBZ:
2333 case TYPE_MSIB:
2335 }
2336}
2337
2338
2339
2340
2341
2342
2347
2348
2349
2350
2351
2352
2353
2356 if (maskRegNum >= 8) {
2357 debug("Invalid mask register number");
2358 return true;
2359 }
2360
2362 return false;
2363}
2364
2365
2366
2367
2368
2369
2370
2371
2376 default:
2377 debug("Unhandled operand encoding during translation");
2378 return true;
2379 case ENCODING_REG:
2381 return false;
2382 case ENCODING_WRITEMASK:
2384 case ENCODING_SIB:
2387 return translateRM(mcInst, operand, insn, Dis);
2388 case ENCODING_IB:
2389 case ENCODING_IW:
2390 case ENCODING_ID:
2391 case ENCODING_IO:
2392 case ENCODING_Iv:
2393 case ENCODING_Ia:
2396 operand,
2397 insn,
2398 Dis);
2399 return false;
2400 case ENCODING_IRC:
2402 return false;
2403 case ENCODING_SI:
2405 case ENCODING_DI:
2407 case ENCODING_RB:
2408 case ENCODING_RW:
2409 case ENCODING_RD:
2410 case ENCODING_RO:
2411 case ENCODING_Rv:
2413 return false;
2414 case ENCODING_CF:
2416 return false;
2417 case ENCODING_CC:
2420 else
2422 return false;
2423 case ENCODING_FP:
2425 return false;
2426 case ENCODING_VVVV:
2428 return false;
2429 case ENCODING_DUP:
2431 insn, Dis);
2432 }
2433}
2434
2435
2436
2437
2438
2439
2440
2444 if (!insn.spec) {
2445 debug("Instruction has no specification");
2446 return true;
2447 }
2448
2451
2452
2453
2455 if(mcInst.getOpcode() == X86::REP_PREFIX)
2456 mcInst.setOpcode(X86::XRELEASE_PREFIX);
2457 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2458 mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2459 }
2460
2462
2463 for (const auto &Op : insn.operands) {
2464 if (Op.encoding != ENCODING_NONE) {
2466 return true;
2467 }
2468 }
2469 }
2470
2471 return false;
2472}
2473
2477 std::unique_ptr MII(T.createMCInstrInfo());
2478 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2479}
2480
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isBranch(unsigned Opcode)
#define LLVM_C_ABI
LLVM_C_ABI is the export/visibility macro used to mark symbols declared in llvm-c as exported when bu...
static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
#define CASE_ENCODING_VSIB
#define THREEDNOW_MAP_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define lFromXOP3of3(xop)
#define lFromVEX2of2(vex)
#define scFromEVEX4of4(evex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define b2FromEVEX2of4(evex)
#define vvvvFromVEX2of2(vex)
#define nfFromEVEX4of4(evex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
#define uFromEVEX3of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define mmmFromEVEX2of4(evex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define vvvvFromXOP3of3(xop)
#define wFromXOP3of3(xop)
#define oszcFromEVEX3of4(evex)
#define vvvvFromEVEX3of4(evex)
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
Definition X86Disassembler.cpp:1927
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)
Definition X86Disassembler.cpp:213
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
Definition X86Disassembler.cpp:1194
static bool readOpcode(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:925
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
Definition X86Disassembler.cpp:2474
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
Definition X86Disassembler.cpp:2354
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
Definition X86Disassembler.cpp:1975
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
Definition X86Disassembler.cpp:1997
static int readOperands(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:1632
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
Definition X86Disassembler.cpp:2343
static bool is64Bit(const char *name)
Definition X86Disassembler.cpp:1082
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
Definition X86Disassembler.cpp:1936
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
Definition X86Disassembler.cpp:1548
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
Definition X86Disassembler.cpp:1093
static int readSIB(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:549
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
Definition X86Disassembler.cpp:209
static int readVVVV(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:1592
static bool isNF(InternalInstruction *insn)
Definition X86Disassembler.cpp:1170
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
Definition X86Disassembler.cpp:1950
#define GENERIC_FIXUP_FUNC(name, base, prefix)
Definition X86Disassembler.cpp:781
static int readMaskRegister(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:1619
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
Definition X86Disassembler.cpp:2306
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
Definition X86Disassembler.cpp:124
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
Definition X86Disassembler.cpp:1504
static int readDisplacement(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:613
static bool isCCMPOrCTEST(InternalInstruction *insn)
Definition X86Disassembler.cpp:1151
LLVM_C_ABI void LLVMInitializeX86Disassembler()
Definition X86Disassembler.cpp:2481
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
Definition X86Disassembler.cpp:865
#define debug(s)
Definition X86Disassembler.cpp:97
static int readModRM(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:644
static bool is16BitEquivalent(const char *orig, const char *equiv)
Definition X86Disassembler.cpp:1063
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
Definition X86Disassembler.cpp:2141
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
Definition X86Disassembler.cpp:2441
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
Definition X86Disassembler.cpp:2102
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
Definition X86Disassembler.cpp:2372
static int readPrefixes(struct InternalInstruction *insn)
Definition X86Disassembler.cpp:222
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
Definition X86Disassembler.cpp:191
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Context object for machine code objects.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
DecodeStatus
Ternary decode status.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Interface to description of machine instruction set.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Target - Wrapper for Target specific information.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EABase
All possible values of the base field for effective-address computations, a.k.a.
Reg
All possible values of the reg field in the ModR/M byte.
DisassemblerMode
Decoding mode for the Intel disassembler.
SIBBase
All possible values of the SIB base field.
SIBIndex
All possible values of the SIB index field.
Define some predicates that are used for node matching.
@ BX_SI
Definition X86Disassembler.cpp:1815
@ BP_DI
Definition X86Disassembler.cpp:1818
@ sib64
Definition X86Disassembler.cpp:1820
@ BX_DI
Definition X86Disassembler.cpp:1816
@ BP_SI
Definition X86Disassembler.cpp:1817
@ sib
Definition X86Disassembler.cpp:1819
NodeAddr< InstrNode * > Instr
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt mod(const DynamicAPInt &LHS, const DynamicAPInt &RHS)
is always non-negative.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Target & getTheX86_32Target()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
@ Success
The lock was released successfully.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Target & getTheX86_64Target()
Implement std::hash so that hash_code can be used in STL containers.
Definition X86Disassembler.cpp:118
OpcodeDecision opcodeDecisions[IC_max]
Definition X86Disassembler.cpp:119
uint16_t instructionIDs
Definition X86Disassembler.cpp:104
uint8_t modrm_type
Definition X86Disassembler.cpp:103
ModRMDecision modRMDecisions[256]
Definition X86Disassembler.cpp:110
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
The specification for how to extract and interpret a full instruction and its operands.
The x86 internal instruction, which is produced by the decoder.
ArrayRef< OperandSpecifier > operands
EADisplacement eaDisplacement
uint8_t rex2ExtensionPrefix[2]
uint8_t vectorExtensionPrefix[4]
SegmentOverride segmentOverride
uint8_t numImmediatesConsumed
llvm::ArrayRef< uint8_t > bytes
uint8_t numImmediatesTranslated
const InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
uint8_t displacementOffset
The specification for how to extract and interpret one operand.