LLVM: lib/Target/X86/Disassembler/X86Disassembler.cpp Source File (original) (raw)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
90
91using namespace llvm;
93
94#define DEBUG_TYPE "x86-disassembler"
95
96#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97
98
99
100
104};
105
106
107
110};
111
112
113
114
115
116
119};
120
121#include "X86GenDisassemblerTables.inc"
122
126
127 switch (type) {
129 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130 break;
132 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133 break;
135 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136 break;
138 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139 break;
141 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142 break;
144 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145 break;
147 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148 break;
150 dec =
151 &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152 break;
154 dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155 break;
157 dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158 break;
160 dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
161 break;
163 dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
164 break;
165 }
166
168 default:
170 return 0;
171 case MODRM_ONEENTRY:
173 case MODRM_SPLITRM:
177 case MODRM_SPLITREG:
179 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
180 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
181 case MODRM_SPLITMISC:
183 return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
184 return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
185 case MODRM_FULL:
187 }
188}
189
192 if (offset >= insn->bytes.size())
193 return true;
194 byte = insn->bytes[offset];
195 return false;
196}
197
199 auto r = insn->bytes;
201 if (offset + sizeof(T) > r.size())
202 return true;
205 return false;
206}
207
209 return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
210}
211
214}
215
216
217
218
219
220
225
227
229
230
232 break;
233
234
235
237 break;
238
239 if ((byte == 0xf2 || byte == 0xf3) && (insn, nextByte)) {
240
241
242
243
244
248 if (!(byte == 0xf3 && nextByte == 0x90))
249 break;
250 }
251
252
253
254
258 break;
259 }
262
263 if (consume(insn, nnextByte))
264 return -1;
265
266 if (peek(insn, nnextByte))
267 return -1;
269 }
270 }
271
272 switch (byte) {
273 case 0xf0:
275 break;
276 case 0xf2:
277 case 0xf3: {
280 break;
281
282
283
284
285
286
287
288
291
294 break;
295 }
296 case 0x2e:
298 break;
299 case 0x36:
301 break;
302 case 0x3e:
304 break;
305 case 0x26:
307 break;
308 case 0x64:
310 break;
311 case 0x65:
313 break;
314 case 0x66: {
318 break;
319
322 break;
323 }
324 case 0x67:
326 break;
327 default:
329 break;
330 }
331
332 if (isREX(insn, byte)) {
338 }
339
342 }
343
345
346 if (byte == 0x62) {
348 if (consume(insn, byte1)) {
349 LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
350 return -1;
351 }
352
353 if (peek(insn, byte2)) {
354 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
355 return -1;
356 }
357
358 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
360 } else {
363 }
364
369 LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
370 return -1;
371 }
373 LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
374 return -1;
375 }
376
378
384
385
390 }
391
394 "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
397 }
398 } else if (byte == 0xc4) {
400 if (peek(insn, byte1)) {
401 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
402 return -1;
403 }
404
405 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
407 else
409
414
415
416
423
428 }
429 } else if (byte == 0xc5) {
431 if (peek(insn, byte1)) {
432 LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
433 return -1;
434 }
435
436 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
438 else
440
444
448
450 default:
451 break;
454 break;
455 }
456
460 }
461 } else if (byte == 0x8f) {
463 if (peek(insn, byte1)) {
464 LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
465 return -1;
466 }
467
468 if ((byte1 & 0x38) != 0x0)
470 else
472
477
478
479
486
488 default:
489 break;
492 break;
493 }
494
499 }
500 } else if (isREX2(insn, byte)) {
502 if (peek(insn, byte1)) {
503 LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
504 return -1;
505 }
508
509
517 } else
519
537 } else {
541 }
542 }
543
544 return 0;
545}
546
547
551
554 case 2:
555 default:
556 llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
557 case 4:
559 sibBaseBase = SIB_BASE_EAX;
560 break;
561 case 8:
563 sibBaseBase = SIB_BASE_RAX;
564 break;
565 }
566
568 return -1;
569
572
573 if (index == 0x4) {
575 } else {
577 }
578
580
583
584 switch (base) {
585 case 0x5:
586 case 0xd:
588 case 0x0:
591 break;
592 case 0x1:
595 break;
596 case 0x2:
599 break;
600 default:
602 }
603 break;
604 default:
606 break;
607 }
608
609 return 0;
610}
611
613 int8_t d8;
614 int16_t d16;
615 int32_t d32;
617
621 break;
624 return -1;
626 break;
629 return -1;
631 break;
634 return -1;
636 break;
637 }
638
639 return 0;
640}
641
642
646
648 return 0;
649
651 return -1;
653
657
658
659
660
662 case 2:
663 insn->regBase = MODRM_REG_AX;
665 break;
666 case 4:
667 insn->regBase = MODRM_REG_EAX;
669 break;
670 case 8:
671 insn->regBase = MODRM_REG_RAX;
673 break;
674 }
675
680
683
685
687 case 2: {
688 EABase eaBaseBase = EA_BASE_BX_SI;
689
690 switch (mod) {
691 case 0x0:
692 if (rm == 0x6) {
696 return -1;
697 } else {
700 }
701 break;
702 case 0x1:
707 return -1;
708 break;
709 case 0x2:
713 return -1;
714 break;
715 case 0x3:
718 return -1;
719 break;
720 }
721 break;
722 }
723 case 4:
724 case 8: {
725 EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
726
727 switch (mod) {
728 case 0x0:
730
731
732
733 switch (rm & 7) {
734 case 0x4:
735 insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
737 return -1;
738 break;
739 case 0x5:
743 return -1;
744 break;
745 default:
747 break;
748 }
749 break;
750 case 0x1:
752 [[fallthrough]];
753 case 0x2:
755 switch (rm & 7) {
756 case 0x4:
757 insn->eaBase = EA_BASE_sib;
759 return -1;
760 break;
761 default:
764 return -1;
765 break;
766 }
767 break;
768 case 0x3:
771 break;
772 }
773 break;
774 }
775 }
776
777 return 0;
778}
779
780#define GENERIC_FIXUP_FUNC(name, base, prefix) \
781 static uint16_t name(struct InternalInstruction *insn, OperandType type, \
782 uint8_t index, uint8_t *valid) { \
783 *valid = 1; \
784 switch (type) { \
785 default: \
786 debug("Unhandled register type"); \
787 *valid = 0; \
788 return 0; \
789 case TYPE_Rv: \
790 return base + index; \
791 case TYPE_R8: \
792 if (insn->rexPrefix && index >= 4 && index <= 7) \
793 return prefix##_SPL + (index - 4); \
794 else \
795 return prefix##_AL + index; \
796 case TYPE_R16: \
797 return prefix##_AX + index; \
798 case TYPE_R32: \
799 return prefix##_EAX + index; \
800 case TYPE_R64: \
801 return prefix##_RAX + index; \
802 case TYPE_ZMM: \
803 return prefix##_ZMM0 + index; \
804 case TYPE_YMM: \
805 return prefix##_YMM0 + index; \
806 case TYPE_XMM: \
807 return prefix##_XMM0 + index; \
808 case TYPE_TMM: \
809 if (index > 7) \
810 *valid = 0; \
811 return prefix##_TMM0 + index; \
812 case TYPE_TMM_PAIR: \
813 if (index > 7) \
814 *valid = 0; \
815 return prefix##_TMM0_TMM1 + (index / 2); \
816 case TYPE_VK: \
817 index &= 0xf; \
818 if (index > 7) \
819 *valid = 0; \
820 return prefix##_K0 + index; \
821 case TYPE_VK_PAIR: \
822 if (index > 7) \
823 *valid = 0; \
824 return prefix##_K0_K1 + (index / 2); \
825 case TYPE_MM64: \
826 return prefix##_MM0 + (index & 0x7); \
827 case TYPE_SEGMENTREG: \
828 if ((index & 7) > 5) \
829 *valid = 0; \
830 return prefix##_ES + (index & 7); \
831 case TYPE_DEBUGREG: \
832 if (index > 15) \
833 *valid = 0; \
834 return prefix##_DR0 + index; \
835 case TYPE_CONTROLREG: \
836 if (index > 15) \
837 *valid = 0; \
838 return prefix##_CR0 + index; \
839 case TYPE_MVSIBX: \
840 return prefix##_XMM0 + index; \
841 case TYPE_MVSIBY: \
842 return prefix##_YMM0 + index; \
843 case TYPE_MVSIBZ: \
844 return prefix##_ZMM0 + index; \
845 } \
846 }
847
848
849
850
851
852
853
854
855
856
857
860
861
862
863
864
865
866
867
872
874 default:
875 debug("Expected a REG or R/M encoding in fixupReg");
876 return -1;
877 case ENCODING_VVVV:
878 insn->vvvv =
879 (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
880 if (!valid)
881 return -1;
882 break;
883 case ENCODING_REG:
885 insn->reg - insn->regBase, &valid);
886 if (!valid)
887 return -1;
888 break;
890 if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
892
893
894
895
896
897
898 switch (op->type) {
899 case TYPE_Rv:
900 case TYPE_R8:
901 case TYPE_R16:
902 case TYPE_R32:
903 case TYPE_R64:
904 break;
905 default:
906 insn->eaBase =
907 (EABase)(insn->eaBase +
908 (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
909 break;
910 }
911 }
912 [[fallthrough]];
913 case ENCODING_SIB:
914 if (insn->eaBase >= insn->eaRegBase) {
915 insn->eaBase = (EABase)fixupRMValue(
916 insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
917 if (!valid)
918 return -1;
919 }
920 break;
921 }
922
923 return 0;
924}
925
926
927
931
935 default:
937 dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
939 return true;
961 }
964 default:
966 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
968 return true;
987 }
993 default:
995 dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
997 return true;
1007 }
1009
1012 }
1013
1014 if (consume(insn, current))
1015 return true;
1016
1017 if (current == 0x0f) {
1019 dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1020 if (consume(insn, current))
1021 return true;
1022
1023 if (current == 0x38) {
1025 current));
1026 if (consume(insn, current))
1027 return true;
1028
1030 } else if (current == 0x3a) {
1032 current));
1033 if (consume(insn, current))
1034 return true;
1035
1037 } else if (current == 0x0f) {
1039 dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1040
1041
1043 return true;
1044
1045 if (consume(insn, current))
1046 return true;
1047
1049 } else {
1050 LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1052 }
1054
1055
1057
1058
1059
1060 insn->opcode = current;
1061
1062 return false;
1063}
1064
1065
1067 for (int i = 0;; i++) {
1068 if (orig[i] == '\0' && equiv[i] == '\0')
1069 return true;
1070 if (orig[i] == '\0' || equiv[i] == '\0')
1071 return false;
1072 if (orig[i] != equiv[i]) {
1073 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1074 continue;
1075 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1076 continue;
1077 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1078 continue;
1079 return false;
1080 }
1081 }
1082}
1083
1084
1086 for (int i = 0;; ++i) {
1087 if (name[i] == '\0')
1088 return false;
1089 if (name[i] == '6' && name[i + 1] == '4')
1090 return true;
1091 }
1092}
1093
1094
1095
1099 auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1104 break;
1107 break;
1110 break;
1113 break;
1116 break;
1119 break;
1122 break;
1125 break;
1128 break;
1131 break;
1134 break;
1137 break;
1138 }
1139
1144 return -1;
1145 *instructionID =
1147 } else {
1149 }
1150
1151 return 0;
1152}
1153
1156 return false;
1158 return true;
1159 switch (insn->opcode & 0xfe) {
1160 default:
1161 return false;
1162 case 0x38:
1163 case 0x3a:
1164 case 0x84:
1165 return true;
1166 case 0x80:
1168 case 0xf6:
1170 }
1171}
1172
1175 return false;
1177 return true;
1178
1181 switch (insn->opcode) {
1182 case 0xf2:
1183 case 0xf3:
1184 case 0xf5:
1185 case 0xf7:
1186 return true;
1187 default:
1188 break;
1189 }
1190 }
1191 return false;
1192}
1193
1194
1195
1196
1201
1203
1205
1208
1211
1216 break;
1219 break;
1222 break;
1223 }
1224
1228 (insn))
1230
1239 }
1248 break;
1251 break;
1254 break;
1255 }
1256
1265 break;
1268 break;
1271 break;
1272 }
1273
1280 break;
1283 break;
1286 break;
1287 }
1288
1291 } else {
1292 return -1;
1293 }
1295
1302
1304 } else {
1309 }
1310 } else {
1312 case 0xf2:
1314 break;
1315 case 0xf3:
1317 break;
1318 case 0x66:
1323 break;
1324 case 0x67:
1326 break;
1327 }
1328 }
1329
1332 attrMask &= ~ATTR_ADSIZE;
1333 }
1334
1335
1337 (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
1339
1341
1342
1345
1346
1347
1349 (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1351
1353 insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1355 }
1356
1357
1359 return -1;
1360
1361
1362
1365
1366
1373
1374 uint16_t instructionIDWithREXW;
1379 return 0;
1380 }
1381
1382 auto SpecName = mii->getName(instructionIDWithREXW);
1383
1384 if ((SpecName.data())) {
1387 return 0;
1388 }
1389 }
1390 }
1391
1392
1393
1394
1395
1396
1401
1406
1407
1410
1411
1414 }
1415
1417 return -1;
1418
1421 return 0;
1422 }
1423
1426
1427
1428
1429
1430
1432 uint16_t instructionIDWithOpsize;
1434
1436
1439
1440
1442 insn->spec = spec;
1443 return 0;
1444 }
1445
1446 specName = mii->getName(instructionID);
1447 specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1448
1453 } else {
1455 insn->spec = spec;
1456 }
1457 return 0;
1458 }
1459
1462
1463
1465 uint16_t instructionIDWithNewOpcode;
1467
1469
1470
1471 insn->opcode = 0x91;
1472
1474 attrMask)) {
1475 insn->opcode = 0x90;
1476
1478 insn->spec = spec;
1479 return 0;
1480 }
1481
1482 specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1483
1484
1485 insn->opcode = 0x90;
1486
1487 insn->instructionID = instructionIDWithNewOpcode;
1488 insn->spec = specWithNewOpcode;
1489
1490 return 0;
1491 }
1492
1495
1496 return 0;
1497}
1498
1499
1500
1501
1502
1503
1504
1505
1506
1509
1510 if (size == 0)
1512
1513 auto setOpcodeRegister = [&](unsigned base) {
1517 (insn->opcode & 7)));
1518 };
1519
1520 switch (size) {
1521 case 1:
1522 setOpcodeRegister(MODRM_REG_AL);
1526 (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1527 }
1528
1529 break;
1530 case 2:
1531 setOpcodeRegister(MODRM_REG_AX);
1532 break;
1533 case 4:
1534 setOpcodeRegister(MODRM_REG_EAX);
1535 break;
1536 case 8:
1537 setOpcodeRegister(MODRM_REG_RAX);
1538 break;
1539 }
1540
1541 return 0;
1542}
1543
1544
1545
1546
1547
1548
1549
1550
1556
1558
1560
1563
1564 switch (size) {
1565 case 1:
1566 if (consume(insn, imm8))
1567 return -1;
1569 break;
1570 case 2:
1571 if (consume(insn, imm16))
1572 return -1;
1574 break;
1575 case 4:
1576 if (consume(insn, imm32))
1577 return -1;
1579 break;
1580 case 8:
1581 if (consume(insn, imm64))
1582 return -1;
1584 break;
1585 default:
1587 }
1588
1590
1591 return 0;
1592}
1593
1594
1597
1598 int vvvv;
1608 else
1609 return -1;
1610
1612 vvvv &= 0xf;
1613
1614 insn->vvvv = static_cast<Reg>(vvvv);
1615 return 0;
1616}
1617
1618
1619
1620
1621
1624
1626 return -1;
1627
1630 return 0;
1631}
1632
1633
1634
1636 int hasVVVV, needVVVV;
1637 int sawRegImm = 0;
1638
1640
1641
1643 needVVVV = hasVVVV && (insn->vvvv != 0);
1644
1645 for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1646 switch (Op.encoding) {
1647 case ENCODING_NONE:
1648 case ENCODING_SI:
1649 case ENCODING_DI:
1650 break;
1652
1653 if (needVVVV)
1654 needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1656 return -1;
1657
1658
1659 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1660 return -1;
1661
1662
1665
1666
1670
1671
1673 default:
1674 debug("Unhandled VSIB index type");
1675 return -1;
1676 case TYPE_MVSIBX:
1679 break;
1680 case TYPE_MVSIBY:
1683 break;
1684 case TYPE_MVSIBZ:
1687 break;
1688 }
1689
1690
1692 insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1693 break;
1694 case ENCODING_SIB:
1695
1696 if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1697 return -1;
1699 return -1;
1701 return -1;
1702 break;
1703 case ENCODING_REG:
1706 return -1;
1708 return -1;
1709
1711 insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1712 break;
1713 case ENCODING_IB:
1714 if (sawRegImm) {
1715
1716
1720 break;
1721 }
1723 return -1;
1724 if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1725 sawRegImm = 1;
1726 break;
1727 case ENCODING_IW:
1729 return -1;
1730 break;
1731 case ENCODING_ID:
1733 return -1;
1734 break;
1735 case ENCODING_IO:
1737 return -1;
1738 break;
1739 case ENCODING_Iv:
1741 return -1;
1742 break;
1743 case ENCODING_Ia:
1745 return -1;
1746 break;
1747 case ENCODING_IRC:
1750 break;
1751 case ENCODING_RB:
1753 return -1;
1754 break;
1755 case ENCODING_RW:
1757 return -1;
1758 break;
1759 case ENCODING_RD:
1761 return -1;
1762 break;
1763 case ENCODING_RO:
1765 return -1;
1766 break;
1767 case ENCODING_Rv:
1769 return -1;
1770 break;
1771 case ENCODING_CF:
1773 needVVVV = false;
1774 break;
1775 case ENCODING_CC:
1778 else
1780 break;
1781 case ENCODING_FP:
1782 break;
1783 case ENCODING_VVVV:
1784 needVVVV = 0;
1785 if (!hasVVVV)
1786 return -1;
1788 insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1790 return -1;
1791 break;
1792 case ENCODING_WRITEMASK:
1794 return -1;
1795 break;
1796 case ENCODING_DUP:
1797 break;
1798 default:
1799 LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1800 return -1;
1801 }
1802 }
1803
1804
1805 if (needVVVV)
1806 return -1;
1807
1808 return 0;
1809}
1810
1811namespace llvm {
1812
1813
1814
1815
1816namespace X86 {
1817 enum {
1825}
1826
1827}
1828
1832
1833namespace {
1834
1835
1836
1837
1838class X86GenericDisassembler : public MCDisassembler {
1839 std::unique_ptr MII;
1840public:
1842 std::unique_ptr MII);
1843public:
1847
1848private:
1850};
1851
1852}
1853
1854X86GenericDisassembler::X86GenericDisassembler(
1857 std::unique_ptr MII)
1860 if (FB[X86::Is16Bit]) {
1862 return;
1863 } else if (FB[X86::Is32Bit]) {
1865 return;
1866 } else if (FB[X86::Is64Bit]) {
1868 return;
1869 }
1870
1872}
1873
1877 CommentStream = &CStream;
1878
1881 Insn.bytes = Bytes;
1884 Insn.mode = fMode;
1885
1890 return Fail;
1891 }
1892
1893 Insn.operands = x86OperandSets[Insn.spec->operands];
1894 Insn.length = Insn.readerCursor - Insn.startLocation;
1896 if (Size > 15)
1897 LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1898
1900 if (!Ret) {
1902 if (Insn.hasAdSize)
1904 if (.mandatoryPrefix) {
1905 if (Insn.hasOpSize)
1907 if (Insn.repeatPrefix == 0xf2)
1909 else if (Insn.repeatPrefix == 0xf3 &&
1910
1911 Insn.opcode != 0x90)
1913 if (Insn.hasLockPrefix)
1915 }
1916 Instr.setFlags(Flags);
1917 }
1919}
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1931#define ENTRY(x) X86::x,
1933#undef ENTRY
1934
1935 MCPhysReg llvmRegnum = llvmRegnums[reg];
1937}
1938
1940 0,
1941 X86::CS,
1942 X86::SS,
1943 X86::DS,
1944 X86::ES,
1945 X86::FS,
1946 X86::GS
1947};
1948
1949
1950
1951
1952
1954 unsigned baseRegNo;
1955
1957 baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1959 baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1960 else {
1962 baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1963 }
1966
1970 return false;
1971}
1972
1973
1974
1975
1976
1977
1979 unsigned baseRegNo;
1980
1982 baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1984 baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1985 else {
1987 baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1988 }
1991 return false;
1992}
1993
1994
1995
1996
1997
1998
1999
2004
2005
2007
2010 if (type == TYPE_REL) {
2014 default:
2015 break;
2016 case ENCODING_Iv:
2018 default:
2019 break;
2020 case 1:
2021 if(immediate & 0x80)
2022 immediate |= ~(0xffull);
2023 break;
2024 case 2:
2025 if(immediate & 0x8000)
2026 immediate |= ~(0xffffull);
2027 break;
2028 case 4:
2029 if(immediate & 0x80000000)
2030 immediate |= ~(0xffffffffull);
2031 break;
2032 case 8:
2033 break;
2034 }
2035 break;
2036 case ENCODING_IB:
2037 if(immediate & 0x80)
2038 immediate |= ~(0xffull);
2039 break;
2040 case ENCODING_IW:
2041 if(immediate & 0x8000)
2042 immediate |= ~(0xffffull);
2043 break;
2044 case ENCODING_ID:
2045 if(immediate & 0x80000000)
2046 immediate |= ~(0xffffffffull);
2047 break;
2048 }
2049 }
2050
2051 else if (type == TYPE_IMM) {
2053 default:
2054 break;
2055 case ENCODING_IB:
2056 if(immediate & 0x80)
2057 immediate |= ~(0xffull);
2058 break;
2059 case ENCODING_IW:
2060 if(immediate & 0x8000)
2061 immediate |= ~(0xffffull);
2062 break;
2063 case ENCODING_ID:
2064 if(immediate & 0x80000000)
2065 immediate |= ~(0xffffffffull);
2066 break;
2067 case ENCODING_IO:
2068 break;
2069 }
2070 }
2071
2072 switch (type) {
2073 case TYPE_XMM:
2075 return;
2076 case TYPE_YMM:
2078 return;
2079 case TYPE_ZMM:
2081 return;
2082 default:
2083
2084 break;
2085 }
2086
2091
2092 if (type == TYPE_MOFFS) {
2096 }
2097}
2098
2099
2100
2101
2102
2103
2104
2107 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2108 debug("A R/M register operand may not have a SIB byte");
2109 return true;
2110 }
2111
2112 switch (insn.eaBase) {
2113 default:
2114 debug("Unexpected EA base register");
2115 return true;
2117 debug("EA_BASE_NONE for ModR/M base");
2118 return true;
2119#define ENTRY(x) case EA_BASE_##x:
2121#undef ENTRY
2122 debug("A R/M register operand may not have a base; "
2123 "the operand must be a register.");
2124 return true;
2125#define ENTRY(x) \
2126 case EA_REG_##x: \
2127 mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2129#undef ENTRY
2130 }
2131
2132 return false;
2133}
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2146 bool ForceSIB = false) {
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2165
2166 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2169 default:
2170 debug("Unexpected sibBase");
2171 return true;
2172#define ENTRY(x) \
2173 case SIB_BASE_##x: \
2174 baseReg = MCOperand::createReg(X86::x); break;
2176#undef ENTRY
2177 }
2178 } else {
2180 }
2181
2184 default:
2185 debug("Unexpected sibIndex");
2186 return true;
2187#define ENTRY(x) \
2188 case SIB_INDEX_##x: \
2189 indexReg = MCOperand::createReg(X86::x); break;
2195#undef ENTRY
2196 }
2197 } else {
2198
2199
2200
2201
2202
2203
2204
2205 if (!ForceSIB &&
2209 insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2210 insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2212 X86::RIZ);
2213 } else
2215 }
2216
2218 } else {
2219 switch (insn.eaBase) {
2222 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2223 return true;
2224 }
2230
2232 X86::RIP);
2233 }
2234 else
2236
2238 break;
2239 case EA_BASE_BX_SI:
2242 break;
2243 case EA_BASE_BX_DI:
2246 break;
2247 case EA_BASE_BP_SI:
2250 break;
2251 case EA_BASE_BP_DI:
2254 break;
2255 default:
2257 switch (insn.eaBase) {
2258 default:
2259 debug("Unexpected eaBase");
2260 return true;
2261
2262
2263
2264
2265#define ENTRY(x) \
2266 case EA_BASE_##x: \
2267 baseReg = MCOperand::createReg(X86::x); break;
2269#undef ENTRY
2270#define ENTRY(x) case EA_REG_##x:
2272#undef ENTRY
2273 debug("A R/M memory operand may not be a register; "
2274 "the base field must be a base.");
2275 return true;
2276 }
2277 }
2278
2280 }
2281
2283
2285
2289
2290 const uint8_t dispSize =
2292
2298 return false;
2299}
2300
2301
2302
2303
2304
2305
2306
2307
2308
2311 switch (operand.type) {
2312 default:
2313 debug("Unexpected type for a R/M operand");
2314 return true;
2315 case TYPE_R8:
2316 case TYPE_R16:
2317 case TYPE_R32:
2318 case TYPE_R64:
2319 case TYPE_Rv:
2320 case TYPE_MM64:
2321 case TYPE_XMM:
2322 case TYPE_YMM:
2323 case TYPE_ZMM:
2324 case TYPE_TMM:
2325 case TYPE_TMM_PAIR:
2326 case TYPE_VK_PAIR:
2327 case TYPE_VK:
2328 case TYPE_DEBUGREG:
2329 case TYPE_CONTROLREG:
2330 case TYPE_BNDR:
2332 case TYPE_M:
2333 case TYPE_MVSIBX:
2334 case TYPE_MVSIBY:
2335 case TYPE_MVSIBZ:
2337 case TYPE_MSIB:
2339 }
2340}
2341
2342
2343
2344
2345
2346
2350}
2351
2352
2353
2354
2355
2356
2357
2360 if (maskRegNum >= 8) {
2361 debug("Invalid mask register number");
2362 return true;
2363 }
2364
2366 return false;
2367}
2368
2369
2370
2371
2372
2373
2374
2375
2380 default:
2381 debug("Unhandled operand encoding during translation");
2382 return true;
2383 case ENCODING_REG:
2385 return false;
2386 case ENCODING_WRITEMASK:
2388 case ENCODING_SIB:
2391 return translateRM(mcInst, operand, insn, Dis);
2392 case ENCODING_IB:
2393 case ENCODING_IW:
2394 case ENCODING_ID:
2395 case ENCODING_IO:
2396 case ENCODING_Iv:
2397 case ENCODING_Ia:
2400 operand,
2401 insn,
2402 Dis);
2403 return false;
2404 case ENCODING_IRC:
2406 return false;
2407 case ENCODING_SI:
2409 case ENCODING_DI:
2411 case ENCODING_RB:
2412 case ENCODING_RW:
2413 case ENCODING_RD:
2414 case ENCODING_RO:
2415 case ENCODING_Rv:
2417 return false;
2418 case ENCODING_CF:
2420 return false;
2421 case ENCODING_CC:
2424 else
2426 return false;
2427 case ENCODING_FP:
2429 return false;
2430 case ENCODING_VVVV:
2432 return false;
2433 case ENCODING_DUP:
2435 insn, Dis);
2436 }
2437}
2438
2439
2440
2441
2442
2443
2444
2448 if (!insn.spec) {
2449 debug("Instruction has no specification");
2450 return true;
2451 }
2452
2455
2456
2457
2459 if(mcInst.getOpcode() == X86::REP_PREFIX)
2460 mcInst.setOpcode(X86::XRELEASE_PREFIX);
2461 else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2462 mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2463 }
2464
2466
2467 for (const auto &Op : insn.operands) {
2468 if (Op.encoding != ENCODING_NONE) {
2470 return true;
2471 }
2472 }
2473 }
2474
2475 return false;
2476}
2477
2481 std::unique_ptr MII(T.createMCInstrInfo());
2482 return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2483}
2484
2486
2491}
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static bool isBranch(unsigned Opcode)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static uint8_t readOpcode(WasmObjectFile::ReadContext &Ctx)
static int nextByte(ArrayRef< uint8_t > Bytes, uint64_t &Size)
static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII)
Check if the instruction is a prefix.
#define CASE_ENCODING_VSIB
#define THREEDNOW_MAP_SYM
#define rFromEVEX2of4(evex)
#define lFromEVEX4of4(evex)
#define l2FromEVEX4of4(evex)
#define rFromVEX2of3(vex)
#define zFromEVEX4of4(evex)
#define bFromXOP2of3(xop)
#define xFromVEX2of3(vex)
#define mmmmmFromVEX2of3(vex)
#define rmFromModRM(modRM)
#define bFromEVEX4of4(evex)
#define rFromVEX2of2(vex)
#define ppFromEVEX3of4(evex)
#define v2FromEVEX4of4(evex)
#define modFromModRM(modRM)
#define rFromXOP2of3(xop)
#define lFromXOP3of3(xop)
#define lFromVEX2of2(vex)
#define scFromEVEX4of4(evex)
#define scaleFromSIB(sib)
#define regFromModRM(modRM)
#define b2FromEVEX2of4(evex)
#define vvvvFromVEX2of2(vex)
#define nfFromEVEX4of4(evex)
#define ppFromXOP3of3(xop)
#define vvvvFromVEX3of3(vex)
#define r2FromEVEX2of4(evex)
#define uFromEVEX3of4(evex)
#define xFromXOP2of3(xop)
#define wFromEVEX3of4(evex)
#define bFromVEX2of3(vex)
#define wFromVEX3of3(vex)
#define mmmmmFromXOP2of3(xop)
#define aaaFromEVEX4of4(evex)
#define lFromVEX3of3(vex)
#define mmmFromEVEX2of4(evex)
#define ppFromVEX3of3(vex)
#define bFromEVEX2of4(evex)
#define xFromEVEX2of4(evex)
#define ppFromVEX2of2(vex)
#define indexFromSIB(sib)
#define vvvvFromXOP3of3(xop)
#define wFromXOP3of3(xop)
#define oszcFromEVEX3of4(evex)
#define vvvvFromEVEX3of4(evex)
static void translateRegister(MCInst &mcInst, Reg reg)
translateRegister - Translates an internal register to the appropriate LLVM register,...
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix)
static int getInstructionID(struct InternalInstruction *insn, const MCInstrInfo *mii)
static bool readOpcode(struct InternalInstruction *insn)
static MCDisassembler * createX86Disassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static bool translateMaskRegister(MCInst &mcInst, uint8_t maskRegNum)
translateMaskRegister - Translates a 3-bit mask register number to LLVM form, and appends it to an MC...
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn)
translateDstIndex - Appends a destination index operand to an MCInst.
static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateImmediate - Appends an immediate operand to an MCInst.
static int readOperands(struct InternalInstruction *insn)
static void translateFPRegister(MCInst &mcInst, uint8_t stackPos)
translateFPRegister - Translates a stack position on the FPU stack to its LLVM form,...
static bool is64Bit(const char *name)
static const uint8_t segmentRegnums[SEG_OVERRIDE_max]
static int readImmediate(struct InternalInstruction *insn, uint8_t size)
static int getInstructionIDWithAttrMask(uint16_t *instructionID, struct InternalInstruction *insn, uint16_t attrMask)
static int readSIB(struct InternalInstruction *insn)
static bool isREX(struct InternalInstruction *insn, uint8_t prefix)
static int readVVVV(struct InternalInstruction *insn)
static bool isNF(InternalInstruction *insn)
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn)
translateSrcIndex - Appends a source index operand to an MCInst.
#define GENERIC_FIXUP_FUNC(name, base, prefix)
static int readMaskRegister(struct InternalInstruction *insn)
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateRM - Translates an operand stored in the R/M (and possibly SIB) byte of an instruction to LL...
static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM)
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size)
static int readDisplacement(struct InternalInstruction *insn)
static bool isCCMPOrCTEST(InternalInstruction *insn)
LLVM_C_ABI void LLVMInitializeX86Disassembler()
static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op)
static int readModRM(struct InternalInstruction *insn)
static bool is16BitEquivalent(const char *orig, const char *equiv)
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, const MCDisassembler *Dis, bool ForceSIB=false)
translateRMMemory - Translates a memory operand stored in the Mod and R/M fields of an internal instr...
static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis)
translateInstruction - Translates an internal instruction and all its operands to an MCInst.
static bool translateRMRegister(MCInst &mcInst, InternalInstruction &insn)
translateRMRegister - Translates a register stored in the R/M field of the ModR/M byte to its LLVM eq...
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, const MCDisassembler *Dis)
translateOperand - Translates an operand stored in an internal instruction to LLVM's format and appen...
static int readPrefixes(struct InternalInstruction *insn)
static bool peek(struct InternalInstruction *insn, uint8_t &byte)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
This class represents an Operation in the Expression.
Container class for subtarget features.
Context object for machine code objects.
Superclass for all disassemblers.
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const
DecodeStatus
Ternary decode status.
virtual DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CStream) const =0
Returns the disassembly of a single instruction.
Instances of this class represent a single low-level machine instruction.
unsigned getOpcode() const
void addOperand(const MCOperand Op)
void setOpcode(unsigned Op)
Interface to description of machine instruction set.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Instances of this class represent operands of the MCInst class.
static MCOperand createReg(MCRegister Reg)
static MCOperand createImm(int64_t Val)
Generic base class for all target subtargets.
const FeatureBitset & getFeatureBits() const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Target - Wrapper for Target specific information.
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ X86
Windows x64, Windows Itanium (IA-64)
EABase
All possible values of the base field for effective-address computations, a.k.a.
Reg
All possible values of the reg field in the ModR/M byte.
DisassemblerMode
Decoding mode for the Intel disassembler.
SIBBase
All possible values of the SIB base field.
SIBIndex
All possible values of the SIB index field.
NodeAddr< InstrNode * > Instr
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ATTRIBUTE_ALWAYS_INLINE DynamicAPInt mod(const DynamicAPInt &LHS, const DynamicAPInt &RHS)
is always non-negative.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Target & getTheX86_32Target()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Target & getTheX86_64Target()
Implement std::hash so that hash_code can be used in STL containers.
OpcodeDecision opcodeDecisions[IC_max]
ModRMDecision modRMDecisions[256]
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.
The specification for how to extract and interpret a full instruction and its operands.
The x86 internal instruction, which is produced by the decoder.
ArrayRef< OperandSpecifier > operands
EADisplacement eaDisplacement
uint8_t rex2ExtensionPrefix[2]
uint8_t vectorExtensionPrefix[4]
SegmentOverride segmentOverride
uint8_t numImmediatesConsumed
llvm::ArrayRef< uint8_t > bytes
uint8_t numImmediatesTranslated
const InstructionSpecifier * spec
VectorExtensionType vectorExtensionType
uint8_t displacementOffset
The specification for how to extract and interpret one operand.