From 367e86e8476d6373a00d0e56a29b03c4b8f3e2ee Mon Sep 17 00:00:00 2001 From: bellard Date: Sat, 1 Mar 2003 17:13:26 +0000 Subject: [PATCH] new x86 CPU core git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@14 c046a42c-6fe2-441c-8c8c-71466251a162 --- Makefile | 46 +- TODO | 5 +- cpu-i386.h | 148 ++++ dyngen.c | 521 +++++++++++ gen-i386.h | 8 + linux-user/main.c | 36 + op-i386.c | 813 +++++++---------- ops_template.h | 628 +++++++++++++ tests/Makefile | 20 +- thunk.h | 22 +- translate-i386.c | 2133 +++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 3834 insertions(+), 546 deletions(-) create mode 100644 cpu-i386.h create mode 100644 dyngen.c create mode 100644 gen-i386.h create mode 100644 ops_template.h create mode 100644 translate-i386.c diff --git a/Makefile b/Makefile index 9f71211333..397ddf1342 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,9 @@ ARCH=i386 #ARCH=ppc +HOST_CC=gcc ifeq ($(ARCH),i386) -CFLAGS=-Wall -O2 -g +CFLAGS=-Wall -O2 -g -fomit-frame-pointer LDFLAGS=-g LIBS= CC=gcc @@ -27,38 +28,59 @@ endif ######################################################### -DEFINES+=-D_GNU_SOURCE -DGEMU -DDOSEMU #-DNO_TRACE_MSGS +DEFINES+=-D_GNU_SOURCE -DGEMU -DDOSEMU -DNO_TRACE_MSGS +DEFINES+=-DCONFIG_PREFIX=\"/usr/local\" LDSCRIPT=$(ARCH).ld +LIBS+=-ldl OBJS= i386/fp87.o i386/interp_main.o i386/interp_modrm.o i386/interp_16_32.o \ i386/interp_32_16.o i386/interp_32_32.o i386/emu-utils.o \ i386/dis8086.o i386/emu-ldt.o +OBJS+=translate-i386.o op-i386.o OBJS+= elfload.o main.o thunk.o syscall.o - SRCS = $(OBJS:.o=.c) all: gemu gemu: $(OBJS) - $(CC) -Wl,-T,$(LDSCRIPT) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + $(CC) -Wl,-T,$(LDSCRIPT) $(LDFLAGS) -o $@ $^ $(LIBS) depend: $(SRCS) $(CC) -MM $(CFLAGS) $^ 1>.depend +# old i386 emulator +i386/interp_32_32.o: i386/interp_32_32.c i386/interp_gen.h + +i386/interp_gen.h: i386/gencode + ./i386/gencode > $@ + +i386/gencode: i386/gencode.c + $(CC) -O2 -Wall -g $< -o $@ + +# new i386 emulator +dyngen: dyngen.c + $(HOST_CC) -O2 -Wall -g $< -o $@ + +translate-i386.o: translate-i386.c op-i386.h cpu-i386.h + +op-i386.h: op-i386.o dyngen + ./dyngen -o $@ $< + +op-i386.o: op-i386.c opreg_template.h ops_template.h + $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $< + %.o: %.c $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $< clean: - rm -f *.o *~ i386/*.o i386/*~ gemu hello test1 test2 TAGS + rm -f *.o *~ i386/*.o i386/*~ gemu TAGS -hello: hello.c - $(CC) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $< +# various test targets +test speed: gemu + make -C tests $@ -test1: test1.c - $(CC) $(CFLAGS) -static $(LDFLAGS) -o $@ $< - -test2: test2.c - $(CC) $(CFLAGS) -static $(LDFLAGS) -o $@ $< +TAGS: + etags *.[ch] i386/*.[ch] ifneq ($(wildcard .depend),) include .depend diff --git a/TODO b/TODO index 045f877f46..7ba6ab4a79 100644 --- a/TODO +++ b/TODO @@ -1,2 +1,5 @@ -- swap all elf paramters +- tests +- signals +- threads - fix printf for doubles (fp87.c bug ?) +- make it self runnable (use same trick as ld.so : include its own relocator and libc) diff --git a/cpu-i386.h b/cpu-i386.h new file mode 100644 index 0000000000..a857efb7df --- /dev/null +++ b/cpu-i386.h @@ -0,0 +1,148 @@ +#ifndef CPU_I386_H +#define CPU_I386_H + +#define R_EAX 0 +#define R_ECX 1 +#define R_EDX 2 +#define R_EBX 3 +#define R_ESP 4 +#define R_EBP 5 +#define R_ESI 6 +#define R_EDI 7 + +#define R_AL 0 +#define R_CL 1 +#define R_DL 2 +#define R_BL 3 +#define R_AH 4 +#define R_CH 5 +#define R_DH 6 +#define R_BH 7 + +#define R_ES 0 +#define R_CS 1 +#define R_SS 2 +#define R_DS 3 +#define R_FS 4 +#define R_GS 5 + +#define CC_C 0x0001 +#define CC_P 0x0004 +#define CC_A 0x0010 +#define CC_Z 0x0040 +#define CC_S 0x0080 +#define CC_O 0x0800 + +#define TRAP_FLAG 0x0100 +#define INTERRUPT_FLAG 0x0200 +#define DIRECTION_FLAG 0x0400 +#define IOPL_FLAG_MASK 0x3000 +#define NESTED_FLAG 0x4000 +#define BYTE_FL 0x8000 /* Intel reserved! */ +#define RF_FLAG 0x10000 +#define VM_FLAG 0x20000 +/* AC 0x40000 */ + +enum { + CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ + CC_OP_EFLAGS, /* all cc are explicitely computed, CC_SRC = flags */ + CC_OP_MUL, /* modify all flags, C, O = (CC_SRC != 0) */ + + CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_ADDW, + CC_OP_ADDL, + + CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */ + CC_OP_SUBW, + CC_OP_SUBL, + + CC_OP_LOGICB, /* modify all flags, CC_DST = res */ + CC_OP_LOGICW, + CC_OP_LOGICL, + + CC_OP_INCB, /* modify all flags except, CC_DST = res */ + CC_OP_INCW, + CC_OP_INCL, + + CC_OP_DECB, /* modify all flags except, CC_DST = res */ + CC_OP_DECW, + CC_OP_DECL, + + CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */ + CC_OP_SHLW, + CC_OP_SHLL, + + CC_OP_NB, +}; + +typedef struct CPU86State { + /* standard registers */ + uint32_t regs[8]; + uint32_t pc; /* cs_case + eip value */ + + /* eflags handling */ + uint32_t eflags; + uint32_t cc_src; + uint32_t cc_dst; + uint32_t cc_op; + int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */ + + /* segments */ + uint8_t *segs_base[6]; + uint32_t segs[6]; + + /* emulator internal variables */ + uint32_t t0; /* temporary t0 storage */ + uint32_t t1; /* temporary t1 storage */ + uint32_t a0; /* temporary a0 storage (address) */ +} CPU86State; + +static inline int ldub(void *ptr) +{ + return *(uint8_t *)ptr; +} + +static inline int ldsb(void *ptr) +{ + return *(int8_t *)ptr; +} + +static inline int lduw(void *ptr) +{ + return *(uint16_t *)ptr; +} + +static inline int ldsw(void *ptr) +{ + return *(int16_t *)ptr; +} + +static inline int ldl(void *ptr) +{ + return *(uint32_t *)ptr; +} + + +static inline void stb(void *ptr, int v) +{ + *(uint8_t *)ptr = v; +} + +static inline void stw(void *ptr, int v) +{ + *(uint16_t *)ptr = v; +} + +static inline void stl(void *ptr, int v) +{ + *(uint32_t *)ptr = v; +} + +void port_outb(int addr, int val); +void port_outw(int addr, int val); +void port_outl(int addr, int val); +int port_inb(int addr); +int port_inw(int addr); +int port_inl(int addr); + +#endif /* CPU_I386_H */ diff --git a/dyngen.c b/dyngen.c new file mode 100644 index 0000000000..ff10891b9e --- /dev/null +++ b/dyngen.c @@ -0,0 +1,521 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "thunk.h" + +/* all dynamically generated functions begin with this code */ +#define OP_PREFIX "op" + +int elf_must_swap(Elf32_Ehdr *h) +{ + union { + uint32_t i; + uint8_t b[4]; + } swaptest; + + swaptest.i = 1; + return (h->e_ident[EI_DATA] == ELFDATA2MSB) != + (swaptest.b[0] == 0); +} + +void swab16s(uint16_t *p) +{ + *p = bswap16(*p); +} + +void swab32s(uint32_t *p) +{ + *p = bswap32(*p); +} + +void swab64s(uint32_t *p) +{ + *p = bswap64(*p); +} + +void elf_swap_ehdr(Elf32_Ehdr *h) +{ + swab16s(&h->e_type); /* Object file type */ + swab16s(&h-> e_machine); /* Architecture */ + swab32s(&h-> e_version); /* Object file version */ + swab32s(&h-> e_entry); /* Entry point virtual address */ + swab32s(&h-> e_phoff); /* Program header table file offset */ + swab32s(&h-> e_shoff); /* Section header table file offset */ + swab32s(&h-> e_flags); /* Processor-specific flags */ + swab16s(&h-> e_ehsize); /* ELF header size in bytes */ + swab16s(&h-> e_phentsize); /* Program header table entry size */ + swab16s(&h-> e_phnum); /* Program header table entry count */ + swab16s(&h-> e_shentsize); /* Section header table entry size */ + swab16s(&h-> e_shnum); /* Section header table entry count */ + swab16s(&h-> e_shstrndx); /* Section header string table index */ +} + +void elf_swap_shdr(Elf32_Shdr *h) +{ + swab32s(&h-> sh_name); /* Section name (string tbl index) */ + swab32s(&h-> sh_type); /* Section type */ + swab32s(&h-> sh_flags); /* Section flags */ + swab32s(&h-> sh_addr); /* Section virtual addr at execution */ + swab32s(&h-> sh_offset); /* Section file offset */ + swab32s(&h-> sh_size); /* Section size in bytes */ + swab32s(&h-> sh_link); /* Link to another section */ + swab32s(&h-> sh_info); /* Additional section information */ + swab32s(&h-> sh_addralign); /* Section alignment */ + swab32s(&h-> sh_entsize); /* Entry size if section holds table */ +} + +void elf_swap_phdr(Elf32_Phdr *h) +{ + swab32s(&h->p_type); /* Segment type */ + swab32s(&h->p_offset); /* Segment file offset */ + swab32s(&h->p_vaddr); /* Segment virtual address */ + swab32s(&h->p_paddr); /* Segment physical address */ + swab32s(&h->p_filesz); /* Segment size in file */ + swab32s(&h->p_memsz); /* Segment size in memory */ + swab32s(&h->p_flags); /* Segment flags */ + swab32s(&h->p_align); /* Segment alignment */ +} + +int do_swap; +int e_machine; + +uint16_t get16(uint16_t *p) +{ + uint16_t val; + val = *p; + if (do_swap) + val = bswap16(val); + return val; +} + +uint32_t get32(uint32_t *p) +{ + uint32_t val; + val = *p; + if (do_swap) + val = bswap32(val); + return val; +} + +void put16(uint16_t *p, uint16_t val) +{ + if (do_swap) + val = bswap16(val); + *p = val; +} + +void put32(uint32_t *p, uint32_t val) +{ + if (do_swap) + val = bswap32(val); + *p = val; +} + +void __attribute__((noreturn)) error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "dyngen: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + + +Elf32_Shdr *find_elf_section(Elf32_Shdr *shdr, int shnum, const char *shstr, + const char *name) +{ + int i; + const char *shname; + Elf32_Shdr *sec; + + for(i = 0; i < shnum; i++) { + sec = &shdr[i]; + if (!sec->sh_name) + continue; + shname = shstr + sec->sh_name; + if (!strcmp(shname, name)) + return sec; + } + return NULL; +} + +void *load_data(int fd, long offset, unsigned int size) +{ + char *data; + + data = malloc(size); + if (!data) + return NULL; + lseek(fd, offset, SEEK_SET); + if (read(fd, data, size) != size) { + free(data); + return NULL; + } + return data; +} + +int strstart(const char *str, const char *val, const char **ptr) +{ + const char *p, *q; + p = str; + q = val; + while (*q != '\0') { + if (*p != *q) + return 0; + p++; + q++; + } + if (ptr) + *ptr = p; + return 1; +} + +#define MAX_ARGS 3 + +/* generate op code */ +void gen_code(const char *name, unsigned long offset, unsigned long size, + FILE *outfile, uint8_t *text, void *relocs, int nb_relocs, int reloc_sh_type, + Elf32_Sym *symtab, char *strtab) +{ + int copy_size = 0; + uint8_t *p_start, *p_end; + int nb_args, i; + uint8_t args_present[MAX_ARGS]; + const char *sym_name, *p; + + /* compute exact size excluding return instruction */ + p_start = text + offset; + p_end = p_start + size; + switch(e_machine) { + case EM_386: + { + uint8_t *p; + p = p_end - 1; + /* find ret */ + while (p > p_start && *p != 0xc3) + p--; + /* skip double ret */ + if (p > p_start && p[-1] == 0xc3) + p--; + if (p == p_start) + error("empty code for %s", name); + copy_size = p - p_start; + } + break; + case EM_PPC: + { + uint8_t *p; + p = (void *)(p_end - 4); + /* find ret */ + while (p > p_start && get32((uint32_t *)p) != 0x4e800020) + p -= 4; + /* skip double ret */ + if (p > p_start && get32((uint32_t *)(p - 4)) == 0x4e800020) + p -= 4; + if (p == p_start) + error("empty code for %s", name); + copy_size = p - p_start; + } + break; + default: + error("unsupported CPU (%d)", e_machine); + } + + /* compute the number of arguments by looking at the relocations */ + for(i = 0;i < MAX_ARGS; i++) + args_present[i] = 0; + + if (reloc_sh_type == SHT_REL) { + Elf32_Rel *rel; + int n; + for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { + if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) { + sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name; + if (strstart(sym_name, "__op_param", &p)) { + n = strtoul(p, NULL, 10); + if (n >= MAX_ARGS) + error("too many arguments in %s", name); + args_present[n - 1] = 1; + } + } + } + } else { + Elf32_Rela *rel; + int n; + for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { + if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) { + sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name; + if (strstart(sym_name, "__op_param", &p)) { + n = strtoul(p, NULL, 10); + if (n >= MAX_ARGS) + error("too many arguments in %s", name); + args_present[n - 1] = 1; + } + } + } + } + + nb_args = 0; + while (nb_args < MAX_ARGS && args_present[nb_args]) + nb_args++; + for(i = nb_args; i < MAX_ARGS; i++) { + if (args_present[i]) + error("inconsistent argument numbering in %s", name); + } + + /* output C code */ + fprintf(outfile, "extern void %s();\n", name); + fprintf(outfile, "static inline void gen_%s(", name); + if (nb_args == 0) { + fprintf(outfile, "void"); + } else { + for(i = 0; i < nb_args; i++) { + if (i != 0) + fprintf(outfile, ", "); + fprintf(outfile, "long param%d", i + 1); + } + } + fprintf(outfile, ")\n"); + fprintf(outfile, "{\n"); + fprintf(outfile, " memcpy(gen_code_ptr, &%s, %d);\n", name, copy_size); + + /* patch relocations */ + switch(e_machine) { + case EM_386: + { + Elf32_Rel *rel; + char name[256]; + int type; + long addend; + for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { + if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) { + sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name; + if (strstart(sym_name, "__op_param", &p)) { + snprintf(name, sizeof(name), "param%s", p); + } else { + snprintf(name, sizeof(name), "(long)(&%s)", sym_name); + } + type = ELF32_R_TYPE(rel->r_info); + addend = get32((uint32_t *)(text + rel->r_offset)); + switch(type) { + case R_386_32: + fprintf(outfile, " *(uint32_t *)(gen_code_ptr + %ld) = %s + %ld;\n", + rel->r_offset - offset, name, addend); + break; + case R_386_PC32: + fprintf(outfile, " *(uint32_t *)(gen_code_ptr + %ld) = %s - (long)(gen_code_ptr + %ld) + %ld;\n", + rel->r_offset - offset, name, rel->r_offset - offset, addend); + break; + default: + error("unsupported i386 relocation (%d)", type); + } + } + } + } + break; + default: + error("unsupported CPU for relocations (%d)", e_machine); + } + + + fprintf(outfile, " gen_code_ptr += %d;\n", copy_size); + fprintf(outfile, "}\n\n"); +} + +/* load an elf object file */ +int load_elf(const char *filename, FILE *outfile) +{ + int fd; + Elf32_Ehdr ehdr; + Elf32_Shdr *sec, *shdr, *symtab_sec, *strtab_sec, *text_sec; + int i, j, nb_syms; + Elf32_Sym *symtab, *sym; + const char *cpu_name; + char *shstr, *strtab; + uint8_t *text; + void *relocs; + int nb_relocs, reloc_sh_type; + + fd = open(filename, O_RDONLY); + if (fd < 0) + error("can't open file '%s'", filename); + + /* Read ELF header. */ + if (read(fd, &ehdr, sizeof (ehdr)) != sizeof (ehdr)) + error("unable to read file header"); + + /* Check ELF identification. */ + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 + || ehdr.e_ident[EI_MAG1] != ELFMAG1 + || ehdr.e_ident[EI_MAG2] != ELFMAG2 + || ehdr.e_ident[EI_MAG3] != ELFMAG3 + || ehdr.e_ident[EI_CLASS] != ELFCLASS32 + || ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + error("bad ELF header"); + } + + do_swap = elf_must_swap(&ehdr); + if (do_swap) + elf_swap_ehdr(&ehdr); + if (ehdr.e_type != ET_REL) + error("ELF object file expected"); + if (ehdr.e_version != EV_CURRENT) + error("Invalid ELF version"); + e_machine = ehdr.e_machine; + + /* read section headers */ + shdr = load_data(fd, ehdr.e_shoff, ehdr.e_shnum * sizeof(Elf32_Shdr)); + if (do_swap) { + for(i = 0; i < ehdr.e_shnum; i++) { + elf_swap_shdr(&shdr[i]); + } + } + + sec = &shdr[ehdr.e_shstrndx]; + shstr = load_data(fd, sec->sh_offset, sec->sh_size); + + /* text section */ + + text_sec = find_elf_section(shdr, ehdr.e_shnum, shstr, ".text"); + if (!text_sec) + error("could not find .text section"); + text = load_data(fd, text_sec->sh_offset, text_sec->sh_size); + + /* find text relocations, if any */ + nb_relocs = 0; + relocs = NULL; + reloc_sh_type = 0; + for(i = 0; i < ehdr.e_shnum; i++) { + sec = &shdr[i]; + if ((sec->sh_type == SHT_REL || sec->sh_type == SHT_RELA) && + sec->sh_info == (text_sec - shdr)) { + reloc_sh_type = sec->sh_type; + relocs = load_data(fd, sec->sh_offset, sec->sh_size); + nb_relocs = sec->sh_size / sec->sh_entsize; + if (do_swap) { + if (sec->sh_type == SHT_REL) { + Elf32_Rel *rel = relocs; + for(j = 0, rel = relocs; j < nb_relocs; j++, rel++) { + swab32s(&rel->r_offset); + swab32s(&rel->r_info); + } + } else { + Elf32_Rela *rel = relocs; + for(j = 0, rel = relocs; j < nb_relocs; j++, rel++) { + swab32s(&rel->r_offset); + swab32s(&rel->r_info); + swab32s(&rel->r_addend); + } + } + } + break; + } + } + + symtab_sec = find_elf_section(shdr, ehdr.e_shnum, shstr, ".symtab"); + if (!symtab_sec) + error("could not find .symtab section"); + strtab_sec = &shdr[symtab_sec->sh_link]; + + symtab = load_data(fd, symtab_sec->sh_offset, symtab_sec->sh_size); + strtab = load_data(fd, strtab_sec->sh_offset, strtab_sec->sh_size); + + nb_syms = symtab_sec->sh_size / sizeof(Elf32_Sym); + if (do_swap) { + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { + swab32s(&sym->st_name); + swab32s(&sym->st_value); + swab32s(&sym->st_size); + swab16s(&sym->st_shndx); + } + } + + switch(e_machine) { + case EM_386: + cpu_name = "i386"; + break; + case EM_PPC: + cpu_name = "ppc"; + break; + case EM_MIPS: + cpu_name = "mips"; + break; + case EM_ARM: + cpu_name = "arm"; + break; + case EM_SPARC: + cpu_name = "sparc"; + break; + default: + error("unsupported CPU (e_machine=%d)", e_machine); + } + + fprintf(outfile, "#include \"gen-%s.h\"\n\n", cpu_name); + + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { + const char *name; + name = strtab + sym->st_name; + if (strstart(name, "op_", NULL) || + strstart(name, "op1_", NULL) || + strstart(name, "op2_", NULL) || + strstart(name, "op3_", NULL)) { +#if 0 + printf("%4d: %s pos=0x%08x len=%d\n", + i, name, sym->st_value, sym->st_size); +#endif + if (sym->st_shndx != (text_sec - shdr)) + error("invalid section for opcode (0x%x)", sym->st_shndx); + gen_code(name, sym->st_value, sym->st_size, outfile, + text, relocs, nb_relocs, reloc_sh_type, symtab, strtab); + } + } + + close(fd); + return 0; +} + +void usage(void) +{ + printf("dyngen (c) 2003 Fabrice Bellard\n" + "usage: dyngen [-o outfile] objfile\n" + "Generate a dynamic code generator from an object file\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int c; + const char *filename, *outfilename; + FILE *outfile; + + outfilename = "out.c"; + for(;;) { + c = getopt(argc, argv, "ho:"); + if (c == -1) + break; + switch(c) { + case 'h': + usage(); + break; + case 'o': + outfilename = optarg; + break; + } + } + if (optind >= argc) + usage(); + filename = argv[optind]; + outfile = fopen(outfilename, "w"); + if (!outfile) + error("could not open '%s'", outfilename); + load_elf(filename, outfile); + fclose(outfile); + return 0; +} diff --git a/gen-i386.h b/gen-i386.h new file mode 100644 index 0000000000..a5d7f59898 --- /dev/null +++ b/gen-i386.h @@ -0,0 +1,8 @@ +static inline void gen_start(void) +{ +} + +static inline void gen_end(void) +{ + *gen_code_ptr++ = 0xc3; /* ret */ +} diff --git a/linux-user/main.c b/linux-user/main.c index 544953eb25..1d76d4d7cc 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -191,6 +191,41 @@ void INT_handler(int num, void *env) } /***********************************************************/ +/* new CPU core */ + +void port_outb(int addr, int val) +{ + fprintf(stderr, "outb: port=0x%04x, data=%02x\n", addr, val); +} + +void port_outw(int addr, int val) +{ + fprintf(stderr, "outw: port=0x%04x, data=%04x\n", addr, val); +} + +void port_outl(int addr, int val) +{ + fprintf(stderr, "outl: port=0x%04x, data=%08x\n", addr, val); +} + +int port_inb(int addr) +{ + fprintf(stderr, "inb: port=0x%04x\n", addr); + return 0; +} + +int port_inw(int addr) +{ + fprintf(stderr, "inw: port=0x%04x\n", addr); + return 0; +} + +int port_inl(int addr) +{ + fprintf(stderr, "inl: port=0x%04x\n", addr); + return 0; +} + /* XXX: currently we use LDT entries */ #define __USER_CS (0x23|4) @@ -270,6 +305,7 @@ int main(int argc, char **argv) LDT[__USER_DS >> 3].dwSelLimit = 0xfffff; LDT[__USER_DS >> 3].lpSelBase = NULL; init_npu(); + build_decode_tables(); for(;;) { int err; diff --git a/op-i386.c b/op-i386.c index fdd2fa5ade..8607cf4552 100644 --- a/op-i386.c +++ b/op-i386.c @@ -8,6 +8,8 @@ typedef signed short int16_t; typedef signed int int32_t; typedef signed long long int64_t; +#define NULL 0 + #ifdef __i386__ register int T0 asm("esi"); register int T1 asm("ebx"); @@ -74,13 +76,12 @@ extern int __op_param1, __op_param2, __op_param3; #include "cpu-i386.h" typedef struct CCTable { - int (*compute_c)(void); /* return the C flag */ - int (*compute_z)(void); /* return the Z flag */ - int (*compute_s)(void); /* return the S flag */ - int (*compute_o)(void); /* return the O flag */ int (*compute_all)(void); /* return all the flags */ + int (*compute_c)(void); /* return the C flag */ } CCTable; +extern CCTable cc_table[]; + uint8_t parity_table[256] = { CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, @@ -116,121 +117,31 @@ uint8_t parity_table[256] = { 0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P, }; -static int compute_eflags_all(void) -{ - return CC_SRC; -} - -static int compute_eflags_addb(void) -{ - int cf, pf, af, zf, sf, of; - int src1, src2; - src1 = CC_SRC; - src2 = CC_DST - CC_SRC; - cf = (uint8_t)CC_DST < (uint8_t)src1; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4; - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_subb(void) -{ - int cf, pf, af, zf, sf, of; - int src1, src2; - src1 = CC_SRC; - src2 = CC_SRC - CC_DST; - cf = (uint8_t)src1 < (uint8_t)src2; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4; - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_logicb(void) -{ - cf = 0; - pf = parity_table[(uint8_t)CC_DST]; - af = 0; - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = 0; - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_incb(void) -{ - int cf, pf, af, zf, sf, of; - int src2; - src1 = CC_DST - 1; - src2 = 1; - cf = CC_SRC; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4; - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_decb(void) -{ - int cf, pf, af, zf, sf, of; - int src1, src2; - src1 = CC_DST + 1; - src2 = 1; - cf = (uint8_t)src1 < (uint8_t)src2; - pf = parity_table[(uint8_t)CC_DST]; - af = (CC_DST ^ src1 ^ src2) & 0x10; - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4; - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_shlb(void) -{ - cf = CC_SRC; - pf = parity_table[(uint8_t)CC_DST]; - af = 0; /* undefined */ - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = 0; /* undefined */ - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_shrb(void) -{ - cf = CC_SRC & 1; - pf = parity_table[(uint8_t)CC_DST]; - af = 0; /* undefined */ - zf = ((uint8_t)CC_DST != 0) << 6; - sf = CC_DST & 0x80; - of = sf << 4; - return cf | pf | af | zf | sf | of; -} - -static int compute_eflags_mul(void) -{ - cf = (CC_SRC != 0); - pf = 0; /* undefined */ - af = 0; /* undefined */ - zf = 0; /* undefined */ - sf = 0; /* undefined */ - of = cf << 11; - return cf | pf | af | zf | sf | of; -} - -CTable cc_table[CC_OP_NB] = { - [CC_OP_DYNAMIC] = { NULL, NULL, NULL }, - [CC_OP_EFLAGS] = { NULL, NULL, NULL }, - +/* modulo 17 table */ +const uint8_t rclw_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9,10,11,12,13,14,15, + 16, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9,10,11,12,13,14, }; +/* modulo 9 table */ +const uint8_t rclb_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 0, 1, 2, 3, 4, +}; + +/* n must be a constant to be efficient */ +static inline int lshift(int x, int n) +{ + if (n >= 0) + return x << n; + else + return x >> (-n); +} + /* we define the various pieces of code used by the JIT */ #define REG EAX @@ -365,338 +276,6 @@ void OPPROTO op_testl_T0_T1_cc(void) CC_DST = T0 & T1; } -/* shifts */ - -void OPPROTO op_roll_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count) { - CC_SRC = T0; - T0 = (T0 << count) | (T0 >> (32 - count)); - CC_DST = T0; - CC_OP = CC_OP_ROLL; - } -} - -void OPPROTO op_rolw_T0_T1_cc(void) -{ - int count; - count = T1 & 0xf; - if (count) { - T0 = T0 & 0xffff; - CC_SRC = T0; - T0 = (T0 << count) | (T0 >> (16 - count)); - CC_DST = T0; - CC_OP = CC_OP_ROLW; - } -} - -void OPPROTO op_rolb_T0_T1_cc(void) -{ - int count; - count = T1 & 0x7; - if (count) { - T0 = T0 & 0xff; - CC_SRC = T0; - T0 = (T0 << count) | (T0 >> (8 - count)); - CC_DST = T0; - CC_OP = CC_OP_ROLB; - } -} - -void OPPROTO op_rorl_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count) { - CC_SRC = T0; - T0 = (T0 >> count) | (T0 << (32 - count)); - CC_DST = T0; - CC_OP = CC_OP_RORB; - } -} - -void OPPROTO op_rorw_T0_T1_cc(void) -{ - int count; - count = T1 & 0xf; - if (count) { - CC_SRC = T0; - T0 = (T0 >> count) | (T0 << (16 - count)); - CC_DST = T0; - CC_OP = CC_OP_RORW; - } -} - -void OPPROTO op_rorb_T0_T1_cc(void) -{ - int count; - count = T1 & 0x7; - if (count) { - CC_SRC = T0; - T0 = (T0 >> count) | (T0 << (8 - count)); - CC_DST = T0; - CC_OP = CC_OP_RORL; - } -} - -/* modulo 17 table */ -const uint8_t rclw_table[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9,10,11,12,13,14,15, - 16, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9,10,11,12,13,14, -}; - -/* modulo 9 table */ -const uint8_t rclb_table[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 0, 1, 2, 3, 4, 5, - 6, 7, 8, 0, 1, 2, 3, 4, -}; - -void helper_rcll_T0_T1_cc(void) -{ - int count, res; - - count = T1 & 0x1f; - if (count) { - CC_SRC = T0; - res = (T0 << count) | (cc_table[CC_OP].compute_c() << (count - 1)); - if (count > 1) - res |= T0 >> (33 - count); - T0 = res; - CC_DST = T0 ^ CC_SRC; /* O is in bit 31 */ - CC_SRC >>= (32 - count); /* CC is in bit 0 */ - CC_OP = CC_OP_RCLL; - } -} - -void OPPROTO op_rcll_T0_T1_cc(void) -{ - helper_rcll_T0_T1_cc(); -} - -void OPPROTO op_rclw_T0_T1_cc(void) -{ - int count; - count = rclw_table[T1 & 0x1f]; - if (count) { - T0 = T0 & 0xffff; - CC_SRC = T0; - T0 = (T0 << count) | (cc_table[CC_OP].compute_c() << (count - 1)) | - (T0 >> (17 - count)); - CC_DST = T0 ^ CC_SRC; - CC_SRC >>= (16 - count); - CC_OP = CC_OP_RCLW; - } -} - -void OPPROTO op_rclb_T0_T1_cc(void) -{ - int count; - count = rclb_table[T1 & 0x1f]; - if (count) { - T0 = T0 & 0xff; - CC_SRC = T0; - T0 = (T0 << count) | (cc_table[CC_OP].compute_c() << (count - 1)) | - (T0 >> (9 - count)); - CC_DST = T0 ^ CC_SRC; - CC_SRC >>= (8 - count); - CC_OP = CC_OP_RCLB; - } -} - -void OPPROTO op_rcrl_T0_T1_cc(void) -{ - int count, res; - count = T1 & 0x1f; - if (count) { - CC_SRC = T0; - res = (T0 >> count) | (cc_table[CC_OP].compute_c() << (32 - count)); - if (count > 1) - res |= T0 << (33 - count); - T0 = res; - CC_DST = T0 ^ CC_SRC; - CC_SRC >>= (count - 1); - CC_OP = CC_OP_RCLL; - } -} - -void OPPROTO op_rcrw_T0_T1_cc(void) -{ - int count; - count = rclw_table[T1 & 0x1f]; - if (count) { - T0 = T0 & 0xffff; - CC_SRC = T0; - T0 = (T0 >> count) | (cc_table[CC_OP].compute_c() << (16 - count)) | - (T0 << (17 - count)); - CC_DST = T0 ^ CC_SRC; - CC_SRC >>= (count - 1); - CC_OP = CC_OP_RCLW; - } -} - -void OPPROTO op_rcrb_T0_T1_cc(void) -{ - int count; - count = rclb_table[T1 & 0x1f]; - if (count) { - T0 = T0 & 0xff; - CC_SRC = T0; - T0 = (T0 >> count) | (cc_table[CC_OP].compute_c() << (8 - count)) | - (T0 << (9 - count)); - CC_DST = T0 ^ CC_SRC; - CC_SRC >>= (count - 1); - CC_OP = CC_OP_RCLB; - } -} - -void OPPROTO op_shll_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count == 1) { - CC_SRC = T0; - T0 = T0 << 1; - CC_DST = T0; - CC_OP = CC_OP_ADDL; - } else if (count) { - CC_SRC = T0 >> (32 - count); - T0 = T0 << count; - CC_DST = T0; - CC_OP = CC_OP_SHLL; - } -} - -void OPPROTO op_shlw_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count == 1) { - CC_SRC = T0; - T0 = T0 << 1; - CC_DST = T0; - CC_OP = CC_OP_ADDW; - } else if (count) { - CC_SRC = T0 >> (16 - count); - T0 = T0 << count; - CC_DST = T0; - CC_OP = CC_OP_SHLW; - } -} - -void OPPROTO op_shlb_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count == 1) { - CC_SRC = T0; - T0 = T0 << 1; - CC_DST = T0; - CC_OP = CC_OP_ADDB; - } else if (count) { - CC_SRC = T0 >> (8 - count); - T0 = T0 << count; - CC_DST = T0; - CC_OP = CC_OP_SHLB; - } -} - -void OPPROTO op_shrl_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count == 1) { - CC_SRC = T0; - T0 = T0 >> 1; - CC_DST = T0; - CC_OP = CC_OP_SHRL; - } else if (count) { - CC_SRC = T0 >> (count - 1); - T0 = T0 >> count; - CC_DST = T0; - CC_OP = CC_OP_SHLL; - } -} - -void OPPROTO op_shrw_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count == 1) { - T0 = T0 & 0xffff; - CC_SRC = T0; - T0 = T0 >> 1; - CC_DST = T0; - CC_OP = CC_OP_SHRW; - } else if (count) { - T0 = T0 & 0xffff; - CC_SRC = T0 >> (count - 1); - T0 = T0 >> count; - CC_DST = T0; - CC_OP = CC_OP_SHLW; - } -} - -void OPPROTO op_shrb_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count == 1) { - T0 = T0 & 0xff; - CC_SRC = T0; - T0 = T0 >> 1; - CC_DST = T0; - CC_OP = CC_OP_SHRB; - } else if (count) { - T0 = T0 & 0xff; - CC_SRC = T0 >> (count - 1); - T0 = T0 >> count; - CC_DST = T0; - CC_OP = CC_OP_SHLB; - } -} - -void OPPROTO op_sarl_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count) { - CC_SRC = (int32_t)T0 >> (count - 1); - T0 = (int32_t)T0 >> count; - CC_DST = T0; - CC_OP = CC_OP_SHLL; - } -} - -void OPPROTO op_sarw_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count) { - CC_SRC = (int16_t)T0 >> (count - 1); - T0 = (int16_t)T0 >> count; - CC_DST = T0; - CC_OP = CC_OP_SHLW; - } -} - -void OPPROTO op_sarb_T0_T1_cc(void) -{ - int count; - count = T1 & 0x1f; - if (count) { - CC_SRC = (int8_t)T0 >> (count - 1); - T0 = (int8_t)T0 >> count; - CC_DST = T0; - CC_OP = CC_OP_SHLB; - } -} - /* multiply/divide */ void OPPROTO op_mulb_AL_T0(void) { @@ -924,41 +503,6 @@ void OPPROTO op_stl_T0_A0(void) stl((uint8_t *)A0, T0); } -/* flags */ - -void OPPROTO op_set_cc_op(void) -{ - CC_OP = PARAM1; -} - -void OPPROTO op_movl_eflags_T0(void) -{ - CC_SRC = T0; - DF = (T0 & DIRECTION_FLAG) ? -1 : 1; -} - -void OPPROTO op_movb_eflags_T0(void) -{ - int cc_o; - cc_o = cc_table[CC_OP].compute_o(); - CC_SRC = T0 | (cc_o << 11); -} - -void OPPROTO op_movl_T0_eflags(void) -{ - cc_table[CC_OP].compute_eflags(); -} - -void OPPROTO op_cld(void) -{ - DF = 1; -} - -void OPPROTO op_std(void) -{ - DF = -1; -} - /* jumps */ /* indirect jump */ @@ -972,54 +516,20 @@ void OPPROTO op_jmp_im(void) PC = PARAM1; } -void OPPROTO op_jne_b(void) -{ - if ((uint8_t)CC_DST != 0) - PC += PARAM1; - else - PC += PARAM2; - FORCE_RET(); -} - -void OPPROTO op_jne_w(void) -{ - if ((uint16_t)CC_DST != 0) - PC += PARAM1; - else - PC += PARAM2; - FORCE_RET(); -} - -void OPPROTO op_jne_l(void) -{ - if (CC_DST != 0) - PC += PARAM1; - else - PC += PARAM2; - FORCE_RET(); /* generate a return so that gcc does not generate an - early function return */ -} - /* string ops */ #define ldul ldl -#define SUFFIX b #define SHIFT 0 -#include "opstring_template.h" -#undef SUFFIX +#include "ops_template.h" #undef SHIFT -#define SUFFIX w #define SHIFT 1 -#include "opstring_template.h" -#undef SUFFIX +#include "ops_template.h" #undef SHIFT -#define SUFFIX l #define SHIFT 2 -#include "opstring_template.h" -#undef SUFFIX +#include "ops_template.h" #undef SHIFT /* sign extend */ @@ -1095,3 +605,264 @@ void op_addl_ESP_im(void) { ESP += PARAM1; } + +/* flags handling */ + +/* slow jumps cases (compute x86 flags) */ +void OPPROTO op_jo_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if (eflags & CC_O) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_jb_cc(void) +{ + if (cc_table[CC_OP].compute_c()) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_jz_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if (eflags & CC_Z) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_jbe_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if (eflags & (CC_Z | CC_C)) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_js_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if (eflags & CC_S) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_jp_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if (eflags & CC_P) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_jl_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if ((eflags ^ (eflags >> 4)) & 0x80) + PC += PARAM1; + else + PC += PARAM2; +} + +void OPPROTO op_jle_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + if (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) + PC += PARAM1; + else + PC += PARAM2; +} + +/* slow set cases (compute x86 flags) */ +void OPPROTO op_seto_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = (eflags >> 11) & 1; +} + +void OPPROTO op_setb_T0_cc(void) +{ + T0 = cc_table[CC_OP].compute_c(); +} + +void OPPROTO op_setz_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = (eflags >> 6) & 1; +} + +void OPPROTO op_setbe_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = (eflags & (CC_Z | CC_C)) != 0; +} + +void OPPROTO op_sets_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = (eflags >> 7) & 1; +} + +void OPPROTO op_setp_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = (eflags >> 2) & 1; +} + +void OPPROTO op_setl_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = ((eflags ^ (eflags >> 4)) >> 7) & 1; +} + +void OPPROTO op_setle_T0_cc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + T0 = (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) != 0; +} + +void OPPROTO op_xor_T0_1(void) +{ + T0 ^= 1; +} + +void OPPROTO op_set_cc_op(void) +{ + CC_OP = PARAM1; +} + +void OPPROTO op_movl_eflags_T0(void) +{ + CC_SRC = T0; + DF = 1 - (2 * ((T0 >> 10) & 1)); +} + +/* XXX: compute only O flag */ +void OPPROTO op_movb_eflags_T0(void) +{ + int of; + of = cc_table[CC_OP].compute_all() & CC_O; + CC_SRC = T0 | of; +} + +void OPPROTO op_movl_T0_eflags(void) +{ + T0 = cc_table[CC_OP].compute_all(); + T0 |= (DF & DIRECTION_FLAG); +} + +void OPPROTO op_cld(void) +{ + DF = 1; +} + +void OPPROTO op_std(void) +{ + DF = -1; +} + +void OPPROTO op_clc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + eflags &= ~CC_C; + CC_SRC = eflags; +} + +void OPPROTO op_stc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + eflags |= CC_C; + CC_SRC = eflags; +} + +void OPPROTO op_cmc(void) +{ + int eflags; + eflags = cc_table[CC_OP].compute_all(); + eflags ^= CC_C; + CC_SRC = eflags; +} + +static int compute_all_eflags(void) +{ + return CC_SRC; +} + +static int compute_c_eflags(void) +{ + return CC_SRC & CC_C; +} + +static int compute_c_mul(void) +{ + int cf; + cf = (CC_SRC != 0); + return cf; +} + +static int compute_all_mul(void) +{ + int cf, pf, af, zf, sf, of; + cf = (CC_SRC != 0); + pf = 0; /* undefined */ + af = 0; /* undefined */ + zf = 0; /* undefined */ + sf = 0; /* undefined */ + of = cf << 11; + return cf | pf | af | zf | sf | of; +} + +CCTable cc_table[CC_OP_NB] = { + [CC_OP_DYNAMIC] = { /* should never happen */ }, + + [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags }, + + [CC_OP_MUL] = { compute_all_mul, compute_c_mul }, + + [CC_OP_ADDB] = { compute_all_addb, compute_c_addb }, + [CC_OP_ADDW] = { compute_all_addw, compute_c_addw }, + [CC_OP_ADDL] = { compute_all_addl, compute_c_addl }, + + [CC_OP_SUBB] = { compute_all_subb, compute_c_subb }, + [CC_OP_SUBW] = { compute_all_subw, compute_c_subw }, + [CC_OP_SUBL] = { compute_all_subl, compute_c_subl }, + + [CC_OP_LOGICB] = { compute_all_logicb, compute_c_logicb }, + [CC_OP_LOGICW] = { compute_all_logicw, compute_c_logicw }, + [CC_OP_LOGICL] = { compute_all_logicl, compute_c_logicl }, + + [CC_OP_INCB] = { compute_all_incb, compute_c_incb }, + [CC_OP_INCW] = { compute_all_incw, compute_c_incw }, + [CC_OP_INCL] = { compute_all_incl, compute_c_incl }, + + [CC_OP_DECB] = { compute_all_decb, compute_c_incb }, + [CC_OP_DECW] = { compute_all_decw, compute_c_incw }, + [CC_OP_DECL] = { compute_all_decl, compute_c_incl }, + + [CC_OP_SHLB] = { compute_all_shlb, compute_c_shlb }, + [CC_OP_SHLW] = { compute_all_shlw, compute_c_shlw }, + [CC_OP_SHLL] = { compute_all_shll, compute_c_shll }, +}; diff --git a/ops_template.h b/ops_template.h new file mode 100644 index 0000000000..4032472947 --- /dev/null +++ b/ops_template.h @@ -0,0 +1,628 @@ + +#define DATA_BITS (1 << (3 + SHIFT)) +#define SHIFT_MASK (DATA_BITS - 1) +#define SIGN_MASK (1 << (DATA_BITS - 1)) + +#if DATA_BITS == 8 +#define SUFFIX b +#define DATA_TYPE uint8_t +#define DATA_STYPE int8_t +#define DATA_MASK 0xff +#elif DATA_BITS == 16 +#define SUFFIX w +#define DATA_TYPE uint16_t +#define DATA_STYPE int16_t +#define DATA_MASK 0xffff +#elif DATA_BITS == 32 +#define SUFFIX l +#define DATA_TYPE uint32_t +#define DATA_STYPE int32_t +#define DATA_MASK 0xffffffff +#else +#error unhandled operand size +#endif + +/* dynamic flags computation */ + +static int glue(compute_all_add, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + int src1, src2; + src1 = CC_SRC; + src2 = CC_DST - CC_SRC; + cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST != 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_add, SUFFIX)(void) +{ + int src1, cf; + src1 = CC_SRC; + cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1; + return cf; +} + +static int glue(compute_all_sub, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src2; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST != 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_sub, SUFFIX)(void) +{ + int src1, src2, cf; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + cf = (DATA_TYPE)src1 < (DATA_TYPE)src1; + return cf; +} + +static int glue(compute_all_logic, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = 0; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; + zf = ((DATA_TYPE)CC_DST != 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = 0; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_logic, SUFFIX)(void) +{ + return 0; +} + +static int glue(compute_all_inc, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + int src1, src2; + src1 = CC_DST - 1; + src2 = 1; + cf = CC_SRC; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST != 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_inc, SUFFIX)(void) +{ + return CC_SRC; +} + +static int glue(compute_all_dec, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + int src1, src2; + src1 = CC_DST + 1; + src2 = 1; + cf = CC_SRC; + pf = parity_table[(uint8_t)CC_DST]; + af = (CC_DST ^ src1 ^ src2) & 0x10; + zf = ((DATA_TYPE)CC_DST != 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O; + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_all_shl, SUFFIX)(void) +{ + int cf, pf, af, zf, sf, of; + cf = CC_SRC & 1; + pf = parity_table[(uint8_t)CC_DST]; + af = 0; /* undefined */ + zf = ((DATA_TYPE)CC_DST != 0) << 6; + sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80; + of = sf << 4; /* only meaniful for shr with count == 1 */ + return cf | pf | af | zf | sf | of; +} + +static int glue(compute_c_shl, SUFFIX)(void) +{ + return CC_SRC & 1; +} + +/* various optimized jumps cases */ + +void OPPROTO glue(op_jb_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + if ((DATA_TYPE)src1 < (DATA_TYPE)src2) + PC += PARAM1; + else + PC += PARAM2; + FORCE_RET(); +} + +void OPPROTO glue(op_jz_sub, SUFFIX)(void) +{ + if ((DATA_TYPE)CC_DST != 0) + PC += PARAM1; + else + PC += PARAM2; + FORCE_RET(); +} + +void OPPROTO glue(op_jbe_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + if ((DATA_TYPE)src1 <= (DATA_TYPE)src2) + PC += PARAM1; + else + PC += PARAM2; + FORCE_RET(); +} + +void OPPROTO glue(op_js_sub, SUFFIX)(void) +{ + if (CC_DST & SIGN_MASK) + PC += PARAM1; + else + PC += PARAM2; + FORCE_RET(); +} + +void OPPROTO glue(op_jl_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + if ((DATA_STYPE)src1 < (DATA_STYPE)src2) + PC += PARAM1; + else + PC += PARAM2; + FORCE_RET(); +} + +void OPPROTO glue(op_jle_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + if ((DATA_STYPE)src1 <= (DATA_STYPE)src2) + PC += PARAM1; + else + PC += PARAM2; + FORCE_RET(); +} + +/* various optimized set cases */ + +void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2); +} + +void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void) +{ + T0 = ((DATA_TYPE)CC_DST != 0); +} + +void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2); +} + +void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void) +{ + T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1; +} + +void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2); +} + +void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void) +{ + int src1, src2; + src1 = CC_SRC; + src2 = CC_SRC - CC_DST; + + T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2); +} + +/* shifts */ + +void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1_cc)(void) +{ + int count, src; + count = T1 & SHIFT_MASK; + if (count) { + CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C); + src = T0; + T0 &= DATA_MASK; + T0 = (T0 << count) | (T0 >> (DATA_BITS - count)); + CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | + (T0 & CC_C); + CC_OP = CC_OP_EFLAGS; + } +} + +void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void) +{ + int count, src; + count = T1 & SHIFT_MASK; + if (count) { + CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C); + src = T0; + T0 &= DATA_MASK; + T0 = (T0 >> count) | (T0 << (DATA_BITS - count)); + CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | + ((T0 >> (DATA_BITS - 1)) & CC_C); + CC_OP = CC_OP_EFLAGS; + } +} + +void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void) +{ + int count, res, eflags; + unsigned int src; + + count = T1 & 0x1f; +#if DATA_BITS == 16 + count = rclw_table[count]; +#elif DATA_BITS == 8 + count = rclb_table[count]; +#endif + if (count) { + eflags = cc_table[CC_OP].compute_all(); + src = T0; + res = (T0 << count) | ((eflags & CC_C) << (count - 1)); + if (count > 1) + res |= T0 >> (DATA_BITS + 1 - count); + T0 = res; + CC_SRC = (eflags & ~(CC_C | CC_O)) | + (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | + ((src >> (DATA_BITS - count)) & CC_C); + CC_OP = CC_OP_EFLAGS; + } +} + +void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void) +{ + int count, res, eflags; + unsigned int src; + + count = T1 & 0x1f; +#if DATA_BITS == 16 + count = rclw_table[count]; +#elif DATA_BITS == 8 + count = rclb_table[count]; +#endif + if (count) { + eflags = cc_table[CC_OP].compute_all(); + src = T0; + res = (T0 >> count) | ((eflags & CC_C) << (DATA_BITS - count)); + if (count > 1) + res |= T0 << (DATA_BITS + 1 - count); + T0 = res; + CC_SRC = (eflags & ~(CC_C | CC_O)) | + (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | + ((src >> (count - 1)) & CC_C); + CC_OP = CC_OP_EFLAGS; + } +} + +void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void) +{ + int count; + count = T1 & 0x1f; + if (count == 1) { + CC_SRC = T0; + T0 = T0 << 1; + CC_DST = T0; + CC_OP = CC_OP_ADDB + SHIFT; + } else if (count) { + CC_SRC = T0 >> (DATA_BITS - count); + T0 = T0 << count; + CC_DST = T0; + CC_OP = CC_OP_SHLB + SHIFT; + } +} + +void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void) +{ + int count; + count = T1 & 0x1f; + if (count) { + T0 &= DATA_MASK; + CC_SRC = T0 >> (count - 1); + T0 = T0 >> count; + CC_DST = T0; + CC_OP = CC_OP_SHLB + SHIFT; + } +} + +void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void) +{ + int count, src; + count = T1 & 0x1f; + if (count) { + src = (DATA_STYPE)T0; + CC_SRC = src >> (count - 1); + T0 = src >> count; + CC_DST = T0; + CC_OP = CC_OP_SHLB + SHIFT; + } +} + +/* string operations */ +/* XXX: maybe use lower level instructions to ease exception handling */ + +void OPPROTO glue(op_movs, SUFFIX)(void) +{ + int v; + v = glue(ldu, SUFFIX)((void *)ESI); + glue(st, SUFFIX)((void *)EDI, v); + ESI += (DF << SHIFT); + EDI += (DF << SHIFT); +} + +void OPPROTO glue(op_rep_movs, SUFFIX)(void) +{ + int v, inc; + inc = (DF << SHIFT); + while (ECX != 0) { + v = glue(ldu, SUFFIX)((void *)ESI); + glue(st, SUFFIX)((void *)EDI, v); + ESI += inc; + EDI += inc; + ECX--; + } +} + +void OPPROTO glue(op_stos, SUFFIX)(void) +{ + glue(st, SUFFIX)((void *)EDI, EAX); + EDI += (DF << SHIFT); +} + +void OPPROTO glue(op_rep_stos, SUFFIX)(void) +{ + int inc; + inc = (DF << SHIFT); + while (ECX != 0) { + glue(st, SUFFIX)((void *)EDI, EAX); + EDI += inc; + ECX--; + } +} + +void OPPROTO glue(op_lods, SUFFIX)(void) +{ + int v; + v = glue(ldu, SUFFIX)((void *)ESI); +#if SHIFT == 0 + EAX = (EAX & ~0xff) | v; +#elif SHIFT == 1 + EAX = (EAX & ~0xffff) | v; +#else + EAX = v; +#endif + ESI += (DF << SHIFT); +} + +/* don't know if it is used */ +void OPPROTO glue(op_rep_lods, SUFFIX)(void) +{ + int v, inc; + inc = (DF << SHIFT); + while (ECX != 0) { + v = glue(ldu, SUFFIX)((void *)ESI); +#if SHIFT == 0 + EAX = (EAX & ~0xff) | v; +#elif SHIFT == 1 + EAX = (EAX & ~0xffff) | v; +#else + EAX = v; +#endif + ESI += inc; + ECX--; + } +} + +void OPPROTO glue(op_scas, SUFFIX)(void) +{ + int v; + + v = glue(ldu, SUFFIX)((void *)ESI); + ESI += (DF << SHIFT); + CC_SRC = EAX; + CC_DST = EAX - v; +} + +void OPPROTO glue(op_repz_scas, SUFFIX)(void) +{ + int v1, v2, inc; + + if (ECX != 0) { + /* NOTE: the flags are not modified if ECX == 0 */ +#if SHIFT == 0 + v1 = EAX & 0xff; +#elif SHIFT == 1 + v1 = EAX & 0xffff; +#else + v1 = EAX; +#endif + inc = (DF << SHIFT); + do { + v2 = glue(ldu, SUFFIX)((void *)ESI); + if (v1 != v2) + break; + ESI += inc; + ECX--; + } while (ECX != 0); + CC_SRC = v1; + CC_DST = v1 - v2; + CC_OP = CC_OP_SUBB + SHIFT; + } +} + +void OPPROTO glue(op_repnz_scas, SUFFIX)(void) +{ + int v1, v2, inc; + + if (ECX != 0) { + /* NOTE: the flags are not modified if ECX == 0 */ +#if SHIFT == 0 + v1 = EAX & 0xff; +#elif SHIFT == 1 + v1 = EAX & 0xffff; +#else + v1 = EAX; +#endif + inc = (DF << SHIFT); + do { + v2 = glue(ldu, SUFFIX)((void *)ESI); + if (v1 == v2) + break; + ESI += inc; + ECX--; + } while (ECX != 0); + CC_SRC = v1; + CC_DST = v1 - v2; + CC_OP = CC_OP_SUBB + SHIFT; + } +} + +void OPPROTO glue(op_cmps, SUFFIX)(void) +{ + int v1, v2; + v1 = glue(ldu, SUFFIX)((void *)ESI); + v2 = glue(ldu, SUFFIX)((void *)EDI); + ESI += (DF << SHIFT); + EDI += (DF << SHIFT); + CC_SRC = v1; + CC_DST = v1 - v2; +} + +void OPPROTO glue(op_repz_cmps, SUFFIX)(void) +{ + int v1, v2, inc; + if (ECX != 0) { + inc = (DF << SHIFT); + do { + v1 = glue(ldu, SUFFIX)((void *)ESI); + v2 = glue(ldu, SUFFIX)((void *)EDI); + if (v1 != v2) + break; + ESI += inc; + EDI += inc; + ECX--; + } while (ECX != 0); + CC_SRC = v1; + CC_DST = v1 - v2; + CC_OP = CC_OP_SUBB + SHIFT; + } +} + +void OPPROTO glue(op_repnz_cmps, SUFFIX)(void) +{ + int v1, v2, inc; + if (ECX != 0) { + inc = (DF << SHIFT); + do { + v1 = glue(ldu, SUFFIX)((void *)ESI); + v2 = glue(ldu, SUFFIX)((void *)EDI); + if (v1 == v2) + break; + ESI += inc; + EDI += inc; + ECX--; + } while (ECX != 0); + CC_SRC = v1; + CC_DST = v1 - v2; + CC_OP = CC_OP_SUBB + SHIFT; + } +} + +void OPPROTO glue(op_outs, SUFFIX)(void) +{ + int v, dx; + dx = EDX & 0xffff; + v = glue(ldu, SUFFIX)((void *)ESI); + glue(port_out, SUFFIX)(dx, v); + ESI += (DF << SHIFT); +} + +void OPPROTO glue(op_rep_outs, SUFFIX)(void) +{ + int v, dx, inc; + inc = (DF << SHIFT); + dx = EDX & 0xffff; + while (ECX != 0) { + v = glue(ldu, SUFFIX)((void *)ESI); + glue(port_out, SUFFIX)(dx, v); + ESI += inc; + ECX--; + } +} + +void OPPROTO glue(op_ins, SUFFIX)(void) +{ + int v, dx; + dx = EDX & 0xffff; + v = glue(port_in, SUFFIX)(dx); + glue(st, SUFFIX)((void *)EDI, v); + EDI += (DF << SHIFT); +} + +void OPPROTO glue(op_rep_ins, SUFFIX)(void) +{ + int v, dx, inc; + inc = (DF << SHIFT); + dx = EDX & 0xffff; + while (ECX != 0) { + v = glue(port_in, SUFFIX)(dx); + glue(st, SUFFIX)((void *)EDI, v); + EDI += (DF << SHIFT); + ECX--; + } +} + +#undef DATA_BITS +#undef SHIFT_MASK +#undef SIGN_MASK +#undef DATA_TYPE +#undef DATA_STYPE +#undef DATA_MASK +#undef SUFFIX diff --git a/tests/Makefile b/tests/Makefile index c6347edfcc..2c2b059df4 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -2,7 +2,9 @@ CC=gcc CFLAGS=-Wall -O2 -g LDFLAGS= -TESTS=hello test1 test2 sha1 test-i386 +TESTS=hello test1 test2 sha1 test-i386 +TESTS+=op-i386.o #op-i386.o op-ppc.o op-arm.o op-mips.o op-sparc.o + GEMU=../gemu all: $(TESTS) @@ -25,6 +27,22 @@ test: test-i386 $(GEMU) test-i386 > test-i386.out @if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK"; fi +# dyngen tests +op-i386.o: op.c + gcc $(CFLAGS) -c -o $@ $< + +op-ppc.o: op.c + powerpc-linux-gcc $(CFLAGS) -c -o $@ $< + +op-arm.o: op.c + arm-linux-gcc $(CFLAGS) -c -o $@ $< + +op-mips.o: op.c + mips-linux-gcc $(CFLAGS) -mno-abicalls -c -o $@ $< + +op-sparc.o: op.c + sparc-linux-gcc $(CFLAGS) -mflat -c -o $@ $< + # speed test sha1: sha1.c $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< diff --git a/thunk.h b/thunk.h index 0b83d202af..5e5d9dd18d 100644 --- a/thunk.h +++ b/thunk.h @@ -28,16 +28,16 @@ #define bswap_64(x) \ ({ \ - __u64 __x = (x); \ - ((__u64)( \ - (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ - (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ - (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ - (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ - (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ - (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ - (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ - (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ + uint64_t __x = (x); \ + ((uint64_t)( \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) << 8) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >> 8) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \ + (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \ }) #endif @@ -51,7 +51,7 @@ #define BSWAP_NEEDED #endif -/* XXX: auto autoconf */ +/* XXX: autoconf */ #define TARGET_I386 #define TARGET_LONG_BITS 32 diff --git a/translate-i386.c b/translate-i386.c new file mode 100644 index 0000000000..0c1b95a923 --- /dev/null +++ b/translate-i386.c @@ -0,0 +1,2133 @@ +#include +#include +#include +#include +#include +#include + +#include "cpu-i386.h" + +static uint8_t *gen_code_ptr; +int __op_param1, __op_param2, __op_param3; + +/* supress that */ +static void error(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +#define PREFIX_REPZ 1 +#define PREFIX_REPNZ 2 +#define PREFIX_LOCK 4 +#define PREFIX_CS 8 +#define PREFIX_SS 0x10 +#define PREFIX_DS 0x20 +#define PREFIX_ES 0x40 +#define PREFIX_FS 0x80 +#define PREFIX_GS 0x100 +#define PREFIX_DATA 0x200 +#define PREFIX_ADR 0x400 +#define PREFIX_FWAIT 0x800 + +typedef struct DisasContext { + /* current insn context */ + int prefix; + int aflag, dflag; + uint8_t *pc; /* current pc */ + uint8_t *runtime_pc; /* current pc in the runtime generated code */ + int cc_op; /* current CC operation */ + int f_st; +} DisasContext; + +/* i386 arith/logic operations */ +enum { + OP_ADDL, + OP_ORL, + OP_ADCL, + OP_SBBL, + OP_ANDL, + OP_SUBL, + OP_XORL, + OP_CMPL, +}; + +/* i386 shift ops */ +enum { + OP_ROL, + OP_ROR, + OP_RCL, + OP_RCR, + OP_SHL, + OP_SHR, + OP_SHL1, /* undocumented */ + OP_SAR = 7, +}; + + +static const int fp_ops[8] = { +#if 0 + OP_FADDQ, OP_FMULQ, OP_CMP, OP_CMP, + OP_FSUBQ, OP_FSUBQ, OP_FDIVQ, OP_FDIVQ +#endif +}; + +extern char cc_table, rclw_table, rclb_table; +extern char helper_rcll_T0_T1_cc; +extern char __udivdi3, __umoddi3; + +#include "op-i386.h" + +/* operand size */ +enum { + OT_BYTE = 0, + OT_WORD, + OT_LONG, + OT_QUAD, +}; + +enum { + /* I386 int registers */ + OR_EAX, /* MUST be even numbered */ + OR_ECX, + OR_EDX, + OR_EBX, + OR_ESP, + OR_EBP, + OR_ESI, + OR_EDI, + + /* I386 float registers */ + OR_ST0, + OR_ST1, + OR_ST2, + OR_ST3, + OR_ST4, + OR_ST5, + OR_ST6, + OR_ST7, + OR_TMP0, /* temporary operand register */ + OR_TMP1, + OR_A0, /* temporary register used when doing address evaluation */ + OR_EFLAGS, /* cpu flags */ + OR_ITMP0, /* used for byte/word insertion */ + OR_ITMP1, /* used for byte/word insertion */ + OR_ITMP2, /* used for byte/word insertion */ + OR_FTMP0, /* float temporary */ + OR_DF, /* D flag, for string ops */ + OR_ZERO, /* fixed zero register */ + OR_IM, /* dummy immediate value register */ + NB_OREGS, +}; + +#if 0 +static const double tab_const[7] = { + 1.0, + 3.32192809488736234789, /* log2(10) */ + M_LOG2E, + M_PI, + 0.30102999566398119521, /* log10(2) */ + M_LN2, + 0.0 +}; +#endif + +typedef void (GenOpFunc)(void); +typedef void (GenOpFunc1)(long); +typedef void (GenOpFunc2)(long, long); + +static GenOpFunc *gen_op_mov_reg_T0[3][8] = { + [OT_BYTE] = { + gen_op_movb_EAX_T0, + gen_op_movb_ECX_T0, + gen_op_movb_EDX_T0, + gen_op_movb_EBX_T0, + gen_op_movh_EAX_T0, + gen_op_movh_ECX_T0, + gen_op_movh_EDX_T0, + gen_op_movh_EBX_T0, + }, + [OT_WORD] = { + gen_op_movw_EAX_T0, + gen_op_movw_ECX_T0, + gen_op_movw_EDX_T0, + gen_op_movw_EBX_T0, + gen_op_movw_ESP_T0, + gen_op_movw_EBP_T0, + gen_op_movw_ESI_T0, + gen_op_movw_EDI_T0, + }, + [OT_LONG] = { + gen_op_movl_EAX_T0, + gen_op_movl_ECX_T0, + gen_op_movl_EDX_T0, + gen_op_movl_EBX_T0, + gen_op_movl_ESP_T0, + gen_op_movl_EBP_T0, + gen_op_movl_ESI_T0, + gen_op_movl_EDI_T0, + }, +}; + +static GenOpFunc *gen_op_mov_reg_T1[3][8] = { + [OT_BYTE] = { + gen_op_movb_EAX_T1, + gen_op_movb_ECX_T1, + gen_op_movb_EDX_T1, + gen_op_movb_EBX_T1, + gen_op_movh_EAX_T1, + gen_op_movh_ECX_T1, + gen_op_movh_EDX_T1, + gen_op_movh_EBX_T1, + }, + [OT_WORD] = { + gen_op_movw_EAX_T1, + gen_op_movw_ECX_T1, + gen_op_movw_EDX_T1, + gen_op_movw_EBX_T1, + gen_op_movw_ESP_T1, + gen_op_movw_EBP_T1, + gen_op_movw_ESI_T1, + gen_op_movw_EDI_T1, + }, + [OT_LONG] = { + gen_op_movl_EAX_T1, + gen_op_movl_ECX_T1, + gen_op_movl_EDX_T1, + gen_op_movl_EBX_T1, + gen_op_movl_ESP_T1, + gen_op_movl_EBP_T1, + gen_op_movl_ESI_T1, + gen_op_movl_EDI_T1, + }, +}; + +static GenOpFunc *gen_op_mov_reg_A0[2][8] = { + [0] = { + gen_op_movw_EAX_A0, + gen_op_movw_ECX_A0, + gen_op_movw_EDX_A0, + gen_op_movw_EBX_A0, + gen_op_movw_ESP_A0, + gen_op_movw_EBP_A0, + gen_op_movw_ESI_A0, + gen_op_movw_EDI_A0, + }, + [1] = { + gen_op_movl_EAX_A0, + gen_op_movl_ECX_A0, + gen_op_movl_EDX_A0, + gen_op_movl_EBX_A0, + gen_op_movl_ESP_A0, + gen_op_movl_EBP_A0, + gen_op_movl_ESI_A0, + gen_op_movl_EDI_A0, + }, +}; + +static GenOpFunc *gen_op_mov_TN_reg[3][2][8] = +{ + [OT_BYTE] = { + { + gen_op_movl_T0_EAX, + gen_op_movl_T0_ECX, + gen_op_movl_T0_EDX, + gen_op_movl_T0_EBX, + gen_op_movh_T0_EAX, + gen_op_movh_T0_ECX, + gen_op_movh_T0_EDX, + gen_op_movh_T0_EBX, + }, + { + gen_op_movl_T1_EAX, + gen_op_movl_T1_ECX, + gen_op_movl_T1_EDX, + gen_op_movl_T1_EBX, + gen_op_movh_T1_EAX, + gen_op_movh_T1_ECX, + gen_op_movh_T1_EDX, + gen_op_movh_T1_EBX, + }, + }, + [OT_WORD] = { + { + gen_op_movl_T0_EAX, + gen_op_movl_T0_ECX, + gen_op_movl_T0_EDX, + gen_op_movl_T0_EBX, + gen_op_movl_T0_ESP, + gen_op_movl_T0_EBP, + gen_op_movl_T0_ESI, + gen_op_movl_T0_EDI, + }, + { + gen_op_movl_T1_EAX, + gen_op_movl_T1_ECX, + gen_op_movl_T1_EDX, + gen_op_movl_T1_EBX, + gen_op_movl_T1_ESP, + gen_op_movl_T1_EBP, + gen_op_movl_T1_ESI, + gen_op_movl_T1_EDI, + }, + }, + [OT_LONG] = { + { + gen_op_movl_T0_EAX, + gen_op_movl_T0_ECX, + gen_op_movl_T0_EDX, + gen_op_movl_T0_EBX, + gen_op_movl_T0_ESP, + gen_op_movl_T0_EBP, + gen_op_movl_T0_ESI, + gen_op_movl_T0_EDI, + }, + { + gen_op_movl_T1_EAX, + gen_op_movl_T1_ECX, + gen_op_movl_T1_EDX, + gen_op_movl_T1_EBX, + gen_op_movl_T1_ESP, + gen_op_movl_T1_EBP, + gen_op_movl_T1_ESI, + gen_op_movl_T1_EDI, + }, + }, +}; + +static GenOpFunc *gen_op_movl_A0_reg[8] = { + gen_op_movl_A0_EAX, + gen_op_movl_A0_ECX, + gen_op_movl_A0_EDX, + gen_op_movl_A0_EBX, + gen_op_movl_A0_ESP, + gen_op_movl_A0_EBP, + gen_op_movl_A0_ESI, + gen_op_movl_A0_EDI, +}; + +static GenOpFunc *gen_op_addl_A0_reg_sN[4][8] = { + [0] = { + gen_op_addl_A0_EAX, + gen_op_addl_A0_ECX, + gen_op_addl_A0_EDX, + gen_op_addl_A0_EBX, + gen_op_addl_A0_ESP, + gen_op_addl_A0_EBP, + gen_op_addl_A0_ESI, + gen_op_addl_A0_EDI, + }, + [1] = { + gen_op_addl_A0_EAX_s1, + gen_op_addl_A0_ECX_s1, + gen_op_addl_A0_EDX_s1, + gen_op_addl_A0_EBX_s1, + gen_op_addl_A0_ESP_s1, + gen_op_addl_A0_EBP_s1, + gen_op_addl_A0_ESI_s1, + gen_op_addl_A0_EDI_s1, + }, + [2] = { + gen_op_addl_A0_EAX_s2, + gen_op_addl_A0_ECX_s2, + gen_op_addl_A0_EDX_s2, + gen_op_addl_A0_EBX_s2, + gen_op_addl_A0_ESP_s2, + gen_op_addl_A0_EBP_s2, + gen_op_addl_A0_ESI_s2, + gen_op_addl_A0_EDI_s2, + }, + [3] = { + gen_op_addl_A0_EAX_s3, + gen_op_addl_A0_ECX_s3, + gen_op_addl_A0_EDX_s3, + gen_op_addl_A0_EBX_s3, + gen_op_addl_A0_ESP_s3, + gen_op_addl_A0_EBP_s3, + gen_op_addl_A0_ESI_s3, + gen_op_addl_A0_EDI_s3, + }, +}; + +static GenOpFunc *gen_op_arith_T0_T1_cc[8] = { + gen_op_addl_T0_T1_cc, + gen_op_orl_T0_T1_cc, + gen_op_adcl_T0_T1_cc, + gen_op_sbbl_T0_T1_cc, + gen_op_andl_T0_T1_cc, + gen_op_subl_T0_T1_cc, + gen_op_xorl_T0_T1_cc, + gen_op_cmpl_T0_T1_cc, +}; + +static const int cc_op_arithb[8] = { + CC_OP_ADDB, + CC_OP_LOGICB, + CC_OP_ADDB, + CC_OP_SUBB, + CC_OP_LOGICB, + CC_OP_SUBB, + CC_OP_LOGICB, + CC_OP_SUBB, +}; + +static GenOpFunc *gen_op_shift_T0_T1_cc[3][8] = { + [OT_BYTE] = { + gen_op_rolb_T0_T1_cc, + gen_op_rorb_T0_T1_cc, + gen_op_rclb_T0_T1_cc, + gen_op_rcrb_T0_T1_cc, + gen_op_shlb_T0_T1_cc, + gen_op_shrb_T0_T1_cc, + gen_op_shlb_T0_T1_cc, + gen_op_sarb_T0_T1_cc, + }, + [OT_WORD] = { + gen_op_rolw_T0_T1_cc, + gen_op_rorw_T0_T1_cc, + gen_op_rclw_T0_T1_cc, + gen_op_rcrw_T0_T1_cc, + gen_op_shlw_T0_T1_cc, + gen_op_shrw_T0_T1_cc, + gen_op_shlw_T0_T1_cc, + gen_op_sarw_T0_T1_cc, + }, + [OT_LONG] = { + gen_op_roll_T0_T1_cc, + gen_op_rorl_T0_T1_cc, + gen_op_rcll_T0_T1_cc, + gen_op_rcrl_T0_T1_cc, + gen_op_shll_T0_T1_cc, + gen_op_shrl_T0_T1_cc, + gen_op_shll_T0_T1_cc, + gen_op_sarl_T0_T1_cc, + }, +}; + +static GenOpFunc *gen_op_lds_T0_A0[3] = { + gen_op_ldsb_T0_A0, + gen_op_ldsw_T0_A0, +}; + +static GenOpFunc *gen_op_ldu_T0_A0[3] = { + gen_op_ldub_T0_A0, + gen_op_lduw_T0_A0, +}; + +/* sign does not matter */ +static GenOpFunc *gen_op_ld_T0_A0[3] = { + gen_op_ldub_T0_A0, + gen_op_lduw_T0_A0, + gen_op_ldl_T0_A0, +}; + +static GenOpFunc *gen_op_ld_T1_A0[3] = { + gen_op_ldub_T1_A0, + gen_op_lduw_T1_A0, + gen_op_ldl_T1_A0, +}; + +static GenOpFunc *gen_op_st_T0_A0[3] = { + gen_op_stb_T0_A0, + gen_op_stw_T0_A0, + gen_op_stl_T0_A0, +}; + +static GenOpFunc *gen_op_movs[6] = { + gen_op_movsb, + gen_op_movsw, + gen_op_movsl, + gen_op_rep_movsb, + gen_op_rep_movsw, + gen_op_rep_movsl, +}; + +static GenOpFunc *gen_op_stos[6] = { + gen_op_stosb, + gen_op_stosw, + gen_op_stosl, + gen_op_rep_stosb, + gen_op_rep_stosw, + gen_op_rep_stosl, +}; + +static GenOpFunc *gen_op_lods[6] = { + gen_op_lodsb, + gen_op_lodsw, + gen_op_lodsl, + gen_op_rep_lodsb, + gen_op_rep_lodsw, + gen_op_rep_lodsl, +}; + +static GenOpFunc *gen_op_scas[9] = { + gen_op_scasb, + gen_op_scasw, + gen_op_scasl, + gen_op_repz_scasb, + gen_op_repz_scasw, + gen_op_repz_scasl, + gen_op_repnz_scasb, + gen_op_repnz_scasw, + gen_op_repnz_scasl, +}; + +static GenOpFunc *gen_op_cmps[9] = { + gen_op_cmpsb, + gen_op_cmpsw, + gen_op_cmpsl, + gen_op_repz_cmpsb, + gen_op_repz_cmpsw, + gen_op_repz_cmpsl, + gen_op_repnz_cmpsb, + gen_op_repnz_cmpsw, + gen_op_repnz_cmpsl, +}; + +static GenOpFunc *gen_op_ins[6] = { + gen_op_insb, + gen_op_insw, + gen_op_insl, + gen_op_rep_insb, + gen_op_rep_insw, + gen_op_rep_insl, +}; + + +static GenOpFunc *gen_op_outs[6] = { + gen_op_outsb, + gen_op_outsw, + gen_op_outsl, + gen_op_rep_outsb, + gen_op_rep_outsw, + gen_op_rep_outsl, +}; + +enum { + JCC_O, + JCC_B, + JCC_Z, + JCC_BE, + JCC_S, + JCC_P, + JCC_L, + JCC_LE, +}; + +static GenOpFunc2 *gen_jcc_slow[8] = { + gen_op_jo_cc, + gen_op_jb_cc, + gen_op_jz_cc, + gen_op_jbe_cc, + gen_op_js_cc, + gen_op_jp_cc, + gen_op_jl_cc, + gen_op_jle_cc, +}; + +static GenOpFunc2 *gen_jcc_sub[3][8] = { + [OT_BYTE] = { + NULL, + gen_op_jb_subb, + gen_op_jz_subb, + gen_op_jbe_subb, + gen_op_js_subb, + NULL, + gen_op_jl_subb, + gen_op_jle_subb, + }, + [OT_WORD] = { + NULL, + gen_op_jb_subw, + gen_op_jz_subw, + gen_op_jbe_subw, + gen_op_js_subw, + NULL, + gen_op_jl_subw, + gen_op_jle_subw, + }, + [OT_LONG] = { + NULL, + gen_op_jb_subl, + gen_op_jz_subl, + gen_op_jbe_subl, + gen_op_js_subl, + NULL, + gen_op_jl_subl, + gen_op_jle_subl, + }, +}; + +static GenOpFunc *gen_setcc_slow[8] = { + gen_op_seto_T0_cc, + gen_op_setb_T0_cc, + gen_op_setz_T0_cc, + gen_op_setbe_T0_cc, + gen_op_sets_T0_cc, + gen_op_setp_T0_cc, + gen_op_setl_T0_cc, + gen_op_setle_T0_cc, +}; + +static GenOpFunc *gen_setcc_sub[3][8] = { + [OT_BYTE] = { + NULL, + gen_op_setb_T0_subb, + gen_op_setz_T0_subb, + gen_op_setbe_T0_subb, + gen_op_sets_T0_subb, + NULL, + gen_op_setl_T0_subb, + gen_op_setle_T0_subb, + }, + [OT_WORD] = { + NULL, + gen_op_setb_T0_subw, + gen_op_setz_T0_subw, + gen_op_setbe_T0_subw, + gen_op_sets_T0_subw, + NULL, + gen_op_setl_T0_subw, + gen_op_setle_T0_subw, + }, + [OT_LONG] = { + NULL, + gen_op_setb_T0_subl, + gen_op_setz_T0_subl, + gen_op_setbe_T0_subl, + gen_op_sets_T0_subl, + NULL, + gen_op_setl_T0_subl, + gen_op_setle_T0_subl, + }, +}; + +static void gen_op(DisasContext *s1, int op, int ot, int d, int s) +{ + if (d != OR_TMP0) + gen_op_mov_TN_reg[ot][0][d](); + if (s != OR_TMP1) + gen_op_mov_TN_reg[ot][1][s](); + if ((op == OP_ADCL || op == OP_SBBL) && s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_op_arith_T0_T1_cc[op](); + if (d != OR_TMP0 && op != OP_CMPL) + gen_op_mov_reg_T0[ot][d](); + s1->cc_op = cc_op_arithb[op] + ot; +} + +static void gen_opi(DisasContext *s1, int op, int ot, int d, int c) +{ + gen_op1_movl_T1_im(c); + gen_op(s1, op, ot, d, OR_TMP0); +} + +static void gen_inc(DisasContext *s1, int ot, int d, int c) +{ + if (d != OR_TMP0) + gen_op_mov_TN_reg[ot][0][d](); + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + if (c > 0) + gen_op_incl_T0_cc(); + else + gen_op_decl_T0_cc(); + if (d != OR_TMP0) + gen_op_mov_reg_T0[ot][d](); +} + +static void gen_shift(DisasContext *s1, int op, int ot, int d, int s) +{ + if (d != OR_TMP0) + gen_op_mov_TN_reg[ot][0][d](); + if (s != OR_TMP1) + gen_op_mov_TN_reg[ot][1][s](); + switch(op) { + case OP_ROL: + case OP_ROR: + case OP_RCL: + case OP_RCR: + /* only C and O are modified, so we must update flags dynamically */ + if (s1->cc_op != CC_OP_DYNAMIC) + gen_op_set_cc_op(s1->cc_op); + gen_op_shift_T0_T1_cc[ot][op](); + break; + default: + gen_op_shift_T0_T1_cc[ot][op](); + break; + } + if (d != OR_TMP0) + gen_op_mov_reg_T0[ot][d](); + s1->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */ +} + +static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c) +{ + /* currently not optimized */ + gen_op1_movl_T1_im(c); + gen_shift(s1, op, ot, d, OR_TMP1); +} + +static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr) +{ + int havesib; + int havebase; + int base, disp; + int index = 0; + int scale = 0; + int reg1, reg2, opreg; + int mod, rm, code; + + mod = (modrm >> 6) & 3; + rm = modrm & 7; + + if (s->aflag) { + + havesib = 0; + havebase = 1; + base = rm; + + if (base == 4) { + havesib = 1; + code = ldub(s->pc++); + scale = (code >> 6) & 3; + index = (code >> 3) & 7; + base = code & 7; + } + + switch (mod) { + case 0: + if (base == 5) { + havebase = 0; + disp = ldl(s->pc); + s->pc += 4; + } else { + disp = 0; + } + break; + case 1: + disp = (int8_t)ldub(s->pc++); + break; + default: + case 2: + disp = ldl(s->pc); + s->pc += 4; + break; + } + + reg1 = OR_ZERO; + reg2 = OR_ZERO; + + if (havebase || (havesib && (index != 4 || scale != 0))) { + if (havebase) + reg1 = OR_EAX + base; + if (havesib && index != 4) { + if (havebase) + reg2 = index + OR_EAX; + else + reg1 = index + OR_EAX; + } + } + /* XXX: disp only ? */ + if (reg2 == OR_ZERO) { + /* op: disp + (reg1 << scale) */ + if (reg1 == OR_ZERO) { + gen_op1_movl_A0_im(disp); + } else if (scale == 0 && disp == 0) { + gen_op_movl_A0_reg[reg1](); + } else { + gen_op_addl_A0_reg_sN[scale][reg1](); + } + } else { + /* op: disp + reg1 + (reg2 << scale) */ + if (disp != 0) { + gen_op1_movl_A0_im(disp); + gen_op_addl_A0_reg_sN[0][reg1](); + } else { + gen_op_movl_A0_reg[reg1](); + } + gen_op_addl_A0_reg_sN[scale][reg2](); + } + opreg = OR_A0; + } else { + fprintf(stderr, "16 bit addressing not supported\n"); + disp = 0; + opreg = 0; + } + *reg_ptr = opreg; + *offset_ptr = disp; +} + +/* generate modrm memory load or store of 'reg'. TMP0 is used if reg != + OR_TMP0 */ +static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store) +{ + int mod, rm, opreg, disp; + + mod = (modrm >> 6) & 3; + rm = modrm & 7; + if (mod == 3) { + if (is_store) { + if (reg != OR_TMP0) + gen_op_mov_TN_reg[ot][0][reg](); + gen_op_mov_reg_T0[ot][rm](); + } else { + gen_op_mov_TN_reg[ot][0][rm](); + if (reg != OR_TMP0) + gen_op_mov_reg_T0[ot][reg](); + } + } else { + gen_lea_modrm(s, modrm, &opreg, &disp); + if (is_store) { + if (reg != OR_TMP0) + gen_op_mov_TN_reg[ot][0][reg](); + gen_op_st_T0_A0[ot](); + } else { + gen_op_ld_T0_A0[ot](); + if (reg != OR_TMP0) + gen_op_mov_reg_T0[ot][reg](); + } + } +} + +static inline uint32_t insn_get(DisasContext *s, int ot) +{ + uint32_t ret; + + switch(ot) { + case OT_BYTE: + ret = ldub(s->pc); + s->pc++; + break; + case OT_WORD: + ret = lduw(s->pc); + s->pc += 2; + break; + default: + case OT_LONG: + ret = ldl(s->pc); + s->pc += 4; + break; + } + return ret; +} + +static void gen_jcc(DisasContext *s, int b, int val) +{ + int inv, jcc_op; + GenOpFunc2 *func; + + inv = b & 1; + jcc_op = (b >> 1) & 7; + switch(s->cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; + if (!func) + goto slow_jcc; + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + switch(jcc_op) { + case JCC_Z: + func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op]; + break; + case JCC_S: + func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op]; + break; + default: + goto slow_jcc; + } + break; + default: + slow_jcc: + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + func = gen_jcc_slow[jcc_op]; + break; + } + if (!inv) { + func(val, (long)s->pc); + } else { + func((long)s->pc, val); + } +} + +static void gen_setcc(DisasContext *s, int b) +{ + int inv, jcc_op; + GenOpFunc *func; + + inv = b & 1; + jcc_op = (b >> 1) & 7; + switch(s->cc_op) { + /* we optimize the cmp/jcc case */ + case CC_OP_SUBB: + case CC_OP_SUBW: + case CC_OP_SUBL: + func = gen_setcc_sub[s->cc_op - CC_OP_SUBB][jcc_op]; + if (!func) + goto slow_jcc; + break; + + /* some jumps are easy to compute */ + case CC_OP_ADDB: + case CC_OP_ADDW: + case CC_OP_ADDL: + case CC_OP_LOGICB: + case CC_OP_LOGICW: + case CC_OP_LOGICL: + case CC_OP_INCB: + case CC_OP_INCW: + case CC_OP_INCL: + case CC_OP_DECB: + case CC_OP_DECW: + case CC_OP_DECL: + case CC_OP_SHLB: + case CC_OP_SHLW: + case CC_OP_SHLL: + switch(jcc_op) { + case JCC_Z: + func = gen_setcc_sub[s->cc_op - CC_OP_ADDB][jcc_op]; + break; + case JCC_S: + func = gen_setcc_sub[s->cc_op - CC_OP_ADDB][jcc_op]; + break; + default: + goto slow_jcc; + } + break; + default: + slow_jcc: + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + func = gen_setcc_slow[jcc_op]; + break; + } + func(); + if (inv) { + gen_op_xor_T0_1(); + } +} + +/* return the size of the intruction. Return -1 if no insn found */ +int disas_insn(DisasContext *s, uint8_t *pc_start) +{ + int b, prefixes, aflag, dflag; + int shift, ot; + int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val; + + s->pc = pc_start; + prefixes = 0; + aflag = 1; + dflag = 1; + // cur_pc = s->pc; /* for insn generation */ + next_byte: + b = ldub(s->pc); + if (b < 0) + return -1; + s->pc++; + /* check prefixes */ + switch (b) { + case 0xf3: + prefixes |= PREFIX_REPZ; + goto next_byte; + case 0xf2: + prefixes |= PREFIX_REPNZ; + goto next_byte; + case 0xf0: + prefixes |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + prefixes |= PREFIX_CS; + goto next_byte; + case 0x36: + prefixes |= PREFIX_SS; + goto next_byte; + case 0x3e: + prefixes |= PREFIX_DS; + goto next_byte; + case 0x26: + prefixes |= PREFIX_ES; + goto next_byte; + case 0x64: + prefixes |= PREFIX_FS; + goto next_byte; + case 0x65: + prefixes |= PREFIX_GS; + goto next_byte; + case 0x66: + prefixes |= PREFIX_DATA; + goto next_byte; + case 0x67: + prefixes |= PREFIX_ADR; + goto next_byte; + case 0x9b: + prefixes |= PREFIX_FWAIT; + goto next_byte; + } + + if (prefixes & PREFIX_DATA) + dflag ^= 1; + if (prefixes & PREFIX_ADR) + aflag ^= 1; + + s->prefix = prefixes; + s->aflag = aflag; + s->dflag = dflag; + + /* now check op code */ + reswitch: + switch(b) { + case 0x0f: + /**************************/ + /* extended op code */ + b = ldub(s->pc++) | 0x100; + goto reswitch; + + /**************************/ + /* arith & logic */ + case 0x00 ... 0x05: + case 0x08 ... 0x0d: + case 0x10 ... 0x15: + case 0x18 ... 0x1d: + case 0x20 ... 0x25: + case 0x28 ... 0x2d: + case 0x30 ... 0x35: + case 0x38 ... 0x3d: + { + int op, f, val; + op = (b >> 3) & 7; + f = (b >> 1) & 3; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + + switch(f) { + case 0: /* OP Ev, Gv */ + modrm = ldub(s->pc++); + reg = ((modrm >> 3) & 7) + OR_EAX; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0[ot](); + opreg = OR_TMP0; + } else { + opreg = OR_EAX + rm; + } + gen_op(s, op, ot, opreg, reg); + if (mod != 3 && op != 7) { + gen_op_st_T0_A0[ot](); + } + break; + case 1: /* OP Gv, Ev */ + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + reg = ((modrm >> 3) & 7) + OR_EAX; + rm = modrm & 7; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T1_A0[ot](); + opreg = OR_TMP1; + } else { + opreg = OR_EAX + rm; + } + gen_op(s, op, ot, reg, opreg); + break; + case 2: /* OP A, Iv */ + val = insn_get(s, ot); + gen_opi(s, op, ot, OR_EAX, val); + break; + } + } + break; + + case 0x80: /* GRP1 */ + case 0x81: + case 0x83: + { + int val; + + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = (modrm >> 3) & 7; + + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0[ot](); + opreg = OR_TMP0; + } else { + opreg = rm + OR_EAX; + } + + switch(b) { + default: + case 0x80: + case 0x81: + val = insn_get(s, ot); + break; + case 0x83: + val = (int8_t)insn_get(s, OT_BYTE); + break; + } + + gen_opi(s, op, ot, opreg, val); + if (op != 7 && mod != 3) { + gen_op_st_T0_A0[ot](); + } + } + break; + + /**************************/ + /* inc, dec, and other misc arith */ + case 0x40 ... 0x47: /* inc Gv */ + ot = dflag ? OT_LONG : OT_WORD; + gen_inc(s, ot, OR_EAX + (b & 7), 1); + break; + case 0x48 ... 0x4f: /* dec Gv */ + ot = dflag ? OT_LONG : OT_WORD; + gen_inc(s, ot, OR_EAX + (b & 7), -1); + break; + case 0xf6: /* GRP3 */ + case 0xf7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = (modrm >> 3) & 7; + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0[ot](); + } else { + gen_op_mov_TN_reg[ot][0][rm](); + } + + switch(op) { + case 0: /* test */ + val = insn_get(s, ot); + gen_op1_movl_T1_im(val); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + case 2: /* not */ + gen_op_notl_T0(); + if (mod != 3) { + gen_op_st_T0_A0[ot](); + } else { + gen_op_mov_reg_T0[ot][rm](); + } + break; + case 3: /* neg */ + gen_op_negl_T0_cc(); + if (mod != 3) { + gen_op_st_T0_A0[ot](); + } else { + gen_op_mov_reg_T0[ot][rm](); + } + s->cc_op = CC_OP_SUBB + ot; + break; + case 4: /* mul */ + switch(ot) { + case OT_BYTE: + gen_op_mulb_AL_T0(); + break; + case OT_WORD: + gen_op_mulw_AX_T0(); + break; + default: + case OT_LONG: + gen_op_mull_EAX_T0(); + break; + } + break; + case 5: /* imul */ + switch(ot) { + case OT_BYTE: + gen_op_imulb_AL_T0(); + break; + case OT_WORD: + gen_op_imulw_AX_T0(); + break; + default: + case OT_LONG: + gen_op_imull_EAX_T0(); + break; + } + break; + case 6: /* div */ + switch(ot) { + case OT_BYTE: + gen_op_divb_AL_T0(); + break; + case OT_WORD: + gen_op_divw_AX_T0(); + break; + default: + case OT_LONG: + gen_op_divl_EAX_T0(); + break; + } + break; + case 7: /* idiv */ + switch(ot) { + case OT_BYTE: + gen_op_idivb_AL_T0(); + break; + case OT_WORD: + gen_op_idivw_AX_T0(); + break; + default: + case OT_LONG: + gen_op_idivl_EAX_T0(); + break; + } + break; + default: + error("GRP3: bad instruction"); + return -1; + } + break; + + case 0xfe: /* GRP4 */ + case 0xff: /* GRP5 */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = (modrm >> 3) & 7; + if (op >= 2 && b == 0xfe) { + error("GRP4: bad instruction"); + return -1; + } + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0[ot](); + } else { + gen_op_mov_TN_reg[ot][0][rm](); + } + + switch(op) { + case 0: /* inc Ev */ + gen_inc(s, ot, OR_TMP0, 1); + if (mod != 3) + gen_op_st_T0_A0[ot](); + break; + case 1: /* dec Ev */ + gen_inc(s, ot, OR_TMP0, -1); + if (mod != 3) + gen_op_st_T0_A0[ot](); + break; + case 2: /* call Ev */ + gen_op1_movl_T1_im((long)s->pc); + gen_op_pushl_T1(); + gen_op_jmp_T0(); + break; + case 4: /* jmp Ev */ + gen_op_jmp_T0(); + break; + case 6: /* push Ev */ + gen_op_pushl_T0(); + break; + default: + error("GRP5: bad instruction"); + return -1; + } + break; + + case 0x84: /* test Ev, Gv */ + case 0x85: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + reg = (modrm >> 3) & 7; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_mov_TN_reg[ot][1][reg + OR_EAX](); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + + case 0xa8: /* test eAX, Iv */ + case 0xa9: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + val = insn_get(s, ot); + + gen_op_mov_TN_reg[ot][0][OR_EAX](); + gen_op1_movl_T1_im(val); + gen_op_testl_T0_T1_cc(); + s->cc_op = CC_OP_LOGICB + ot; + break; + + case 0x98: /* CWDE/CBW */ + if (dflag) + gen_op_movswl_EAX_AX(); + else + gen_op_movsbw_AX_AL(); + break; + case 0x99: /* CDQ/CWD */ + if (dflag) + gen_op_movslq_EDX_EAX(); + else + gen_op_movswl_DX_AX(); + break; + case 0x1af: /* imul Gv, Ev */ + case 0x69: /* imul Gv, Ev, I */ + case 0x6b: + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = ((modrm >> 3) & 7) + OR_EAX; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + if (b == 0x69) { + val = insn_get(s, ot); + gen_op1_movl_T1_im(val); + } else if (b == 0x6b) { + val = insn_get(s, OT_BYTE); + gen_op1_movl_T1_im(val); + } else { + gen_op_mov_TN_reg[ot][1][reg](); + } + + if (ot == OT_LONG) { + op_imull_T0_T1(); + } else { + op_imulw_T0_T1(); + } + gen_op_mov_reg_T0[ot][reg](); + break; + + /**************************/ + /* push/pop */ + case 0x50 ... 0x57: /* push */ + gen_op_mov_TN_reg[OT_LONG][0][(b & 7)](); + gen_op_pushl_T0(); + break; + case 0x58 ... 0x5f: /* pop */ + gen_op_popl_T0(); + gen_op_mov_reg_T0[OT_LONG][reg](); + break; + case 0x68: /* push Iv */ + case 0x6a: + ot = dflag ? OT_LONG : OT_WORD; + if (b == 0x68) + val = insn_get(s, ot); + else + val = (int8_t)insn_get(s, OT_BYTE); + gen_op1_movl_T0_im(val); + gen_op_pushl_T0(); + break; + case 0x8f: /* pop Ev */ + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + gen_op_popl_T0(); + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + case 0xc9: /* leave */ + gen_op_mov_TN_reg[OT_LONG][0][R_EBP](); + gen_op_mov_reg_T0[OT_LONG][R_ESP](); + gen_op_popl_T0(); + gen_op_mov_reg_T0[OT_LONG][R_EBP](); + break; + /**************************/ + /* mov */ + case 0x88: + case 0x89: /* mov Gv, Ev */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + + /* generate a generic store */ + gen_ldst_modrm(s, modrm, ot, OR_EAX + reg, 1); + break; + case 0xc6: + case 0xc7: /* mov Ev, Iv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + + val = insn_get(s, ot); + gen_op1_movl_T0_im(val); + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1); + break; + case 0x8a: + case 0x8b: /* mov Ev, Gv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_mov_reg_T0[ot][reg](); + break; + + case 0x1b6: /* movzbS Gv, Eb */ + case 0x1b7: /* movzwS Gv, Eb */ + case 0x1be: /* movsbS Gv, Eb */ + case 0x1bf: /* movswS Gv, Eb */ + { + int d_ot; + /* d_ot is the size of destination */ + d_ot = dflag + OT_WORD; + /* ot is the size of source */ + ot = (b & 1) + OT_BYTE; + modrm = ldub(s->pc++); + reg = ((modrm >> 3) & 7) + OR_EAX; + mod = (modrm >> 6) & 3; + rm = modrm & 7; + + if (mod == 3) { + gen_op_mov_TN_reg[ot][0][rm](); + switch(ot | (b & 8)) { + case OT_BYTE: + gen_op_movzbl_T0_T0(); + break; + case OT_BYTE | 8: + gen_op_movsbl_T0_T0(); + break; + case OT_WORD: + gen_op_movzwl_T0_T0(); + break; + default: + case OT_WORD | 8: + gen_op_movswl_T0_T0(); + break; + } + gen_op_mov_reg_T0[d_ot][reg](); + } else { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + if (b & 8) { + gen_op_lds_T0_A0[ot](); + } else { + gen_op_ldu_T0_A0[ot](); + } + gen_op_mov_reg_T0[d_ot][reg](); + } + } + break; + + case 0x8d: /* lea */ + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_mov_reg_A0[ot - OT_WORD][reg](); + break; + + case 0xa0: /* mov EAX, Ov */ + case 0xa1: + case 0xa2: /* mov Ov, EAX */ + case 0xa3: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (s->aflag) + offset_addr = insn_get(s, OT_LONG); + else + offset_addr = insn_get(s, OT_WORD); + + if ((b & 2) == 0) { + gen_op_ld_T0_A0[ot](); + gen_op_mov_reg_T0[ot][R_EAX](); + } else { + gen_op_mov_TN_reg[ot][0][R_EAX](); + gen_op_st_T0_A0[ot](); + } + break; + + case 0xb0 ... 0xb7: /* mov R, Ib */ + val = insn_get(s, OT_BYTE); + gen_op1_movl_T0_im(val); + gen_op_mov_reg_T0[OT_BYTE][b & 7](); + break; + case 0xb8 ... 0xbf: /* mov R, Iv */ + ot = dflag ? OT_LONG : OT_WORD; + val = insn_get(s, ot); + reg = OR_EAX + (b & 7); + gen_op1_movl_T0_im(val); + gen_op_mov_reg_T0[ot][reg](); + break; + + case 0x91 ... 0x97: /* xchg R, EAX */ + ot = dflag ? OT_LONG : OT_WORD; + reg = b & 7; + gen_op_mov_TN_reg[ot][0][reg](); + gen_op_mov_TN_reg[ot][1][R_EAX](); + gen_op_mov_reg_T0[ot][R_EAX](); + gen_op_mov_reg_T1[ot][reg](); + break; + case 0x86: + case 0x87: /* xchg Ev, Gv */ + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_mov_TN_reg[ot][0][reg](); + gen_op_ld_T1_A0[ot](); + gen_op_st_T0_A0[ot](); + gen_op_mov_reg_T1[ot][reg](); + break; + + /************************/ + /* shifts */ + case 0xc0: + case 0xc1: + /* shift Ev,Ib */ + shift = 2; + grp2: + { + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = (modrm >> 3) & 7; + + if (mod != 3) { + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + gen_op_ld_T0_A0[ot](); + opreg = OR_TMP0; + } else { + opreg = rm + OR_EAX; + } + + /* simpler op */ + if (shift == 0) { + gen_shift(s, op, ot, opreg, OR_ECX); + } else { + if (shift == 2) { + shift = ldub(s->pc++); + } + gen_shifti(s, op, ot, opreg, shift); + } + + if (mod != 3) { + gen_op_st_T0_A0[ot](); + } + } + break; + case 0xd0: + case 0xd1: + /* shift Ev,1 */ + shift = 1; + goto grp2; + case 0xd2: + case 0xd3: + /* shift Ev,cl */ + shift = 0; + goto grp2; + + /************************/ + /* floats */ +#if 0 + case 0xd8 ... 0xdf: + modrm = ldub(s->pc++); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = ((b & 7) << 3) | ((modrm >> 3) & 7); + + if (mod != 3) { + /* memory op */ + gen_lea_modrm(s, modrm, ®_addr, &offset_addr); + switch(op) { + case 0x00 ... 0x07: /* fxxxs */ + case 0x10 ... 0x17: /* fixxxl */ + case 0x20 ... 0x27: /* fxxxl */ + case 0x30 ... 0x37: /* fixxx */ + { + int op1, swap; + op1 = fp_ops[op & 7]; + + swap = 0; + if ((op & 7) == 5 || (op & 7) == 7) + swap = 1; + + switch(op >> 4) { + case 0: + ot = OT_LONG; + is_int = 0; + break; + case 1: + ot = OT_LONG; + is_int = 1; + break; + case 2: + ot = OT_QUAD; + is_int = 0; + break; + case 3: + default: + ot = OT_WORD; + is_int = 1; + break; + } + + /* if integer, needs to convert to float */ + if (is_int) { + /* XXX: potential loss of precision if large integer */ + gen_ld(OP_LDUB + ot, OR_TMP0, reg_addr, offset_addr); + gen_insn2(OP_I2FL, OR_FTMP0, OR_TMP0); + } else { + gen_ld(OP_LDUB + ot, OR_FTMP0, reg_addr, offset_addr); + } + if (ot != OT_QUAD) + op1 += OP_FADDL - OP_FADDQ; + + if (!swap) + gen_insn3(op1, OR_ST0, OR_ST0, OR_FTMP0); + else + gen_insn3(op1, OR_ST0, OR_FTMP0, OR_ST0); + + if ((op & 7) == 3) { + /* fcomp needs pop */ + gen_insn0(OP_FPOP); + } + } + break; + case 0x08: /* flds */ + case 0x0a: /* fsts */ + case 0x0b: /* fstps */ + case 0x18: /* fildl */ + case 0x1a: /* fistl */ + case 0x1b: /* fistpl */ + case 0x28: /* fldl */ + case 0x2a: /* fstl */ + case 0x2b: /* fstpl */ + case 0x38: /* filds */ + case 0x3a: /* fists */ + case 0x3b: /* fistps */ + + switch(op >> 4) { + case 0: + ot = OT_LONG; + is_int = 0; + break; + case 1: + ot = OT_LONG; + is_int = 1; + break; + case 2: + ot = OT_QUAD; + is_int = 0; + break; + case 3: + default: + ot = OT_WORD; + is_int = 1; + break; + } + + switch(op & 7) { + case 0: + gen_insn0(OP_FPUSH); + if (is_int) { + /* XXX: potential loss of precision */ + gen_ld(OP_LDUB + ot, OR_TMP0, reg_addr, offset_addr); + gen_insn2(OP_I2FL, OR_ST0, OR_TMP0); + } else { + gen_ld(OP_LDUB + ot, OR_ST0, reg_addr, offset_addr); + } + break; + default: + if (is_int) { + gen_insn2(OP_F2IL, OR_TMP0, OR_ST0); + gen_st(OP_STB + ot, OR_TMP0, reg_addr, offset_addr); + } else { + gen_st(OP_STB + ot, OR_ST0, reg_addr, offset_addr); + } + if ((op & 7) == 3) + gen_insn0(OP_FPOP); + break; + } + break; + case 0x2f: /* fnstsw mem */ + gen_insn3(OP_FNSTS, OR_TMP0, OR_ZERO, OR_ZERO); + gen_st(OP_STW, OR_TMP0, reg_addr, offset_addr); + break; + + case 0x3c: /* fbld */ + case 0x3e: /* fbstp */ + error("float BCD not hanlded"); + return -1; + case 0x3d: /* fildll */ + gen_insn0(OP_FPUSH); + gen_ld(OP_LDQ, OR_TMP0, reg_addr, offset_addr); + gen_insn2(OP_I2FQ, OR_ST0, OR_TMP0); + break; + case 0x3f: /* fistpll */ + gen_insn2(OP_F2IQ, OR_TMP0, OR_ST0); + gen_st(OP_STQ, OR_TMP0, reg_addr, offset_addr); + gen_insn0(OP_FPOP); + break; + default: + error("unhandled memory FP\n"); + return -1; + } + } else { + /* register float ops */ + opreg = rm + OR_ST0; + + switch(op) { + case 0x08: /* fld sti */ + gen_insn0(OP_FPUSH); + gen_mov(OR_ST0, OR_ST0 + ((rm + 1) & 7)); + break; + case 0x09: /* fxchg sti */ + gen_mov(OR_TMP0, OR_ST0); + gen_mov(OR_ST0, opreg); + gen_mov(opreg, OR_TMP0); + break; + case 0x0a: /* grp d9/2 */ + switch(rm) { + case 0: /* fnop */ + gen_insn0(OP_NOP); + break; + default: + error("unhandled FP GRP d9/2\n"); + return -1; + } + break; + case 0x0c: /* grp d9/4 */ + switch(rm) { + case 0: /* fchs */ + gen_insn3(OP_FSUBQ, OR_ST0, OR_ZERO, OR_ST0); + break; + case 1: /* fabs */ + gen_insn2(OP_FABSQ, OR_ST0, OR_ST0); + break; + case 4: /* ftst */ + gen_insn3(OP_CMP, OR_ZERO, OR_ST0, OR_ZERO); + break; + case 5: /* fxam */ + gen_insn3(OP_FSPECIAL, OR_ZERO, OR_ST0, OR_ZERO); + break; + default: + return -1; + } + break; + case 0x0d: /* grp d9/5 */ + { + if (rm == 7) { + error("bad GRP d9/5"); + return -1; + } + /* XXX: needs constant load or symbol table */ + gen_insn0(OP_FPUSH); + gen_ld(OP_LDQ, OR_ST0, OR_ZERO, + (rm * 8) + FLOAT_CONST_ADDR); + } + break; + case 0x0e: /* grp d9/6 */ + switch(rm) { + case 0: /* f2xm1 */ + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ZERO); + break; + case 1: /* fyl2x */ + gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST0, OR_ST1); + gen_insn0(OP_FPOP); + break; + case 2: /* fptan */ + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ZERO); + gen_insn0(OP_FPUSH); + /* load one */ + gen_ld(OP_LDQ, OR_ST0, OR_ZERO, + (0 * 8) + FLOAT_CONST_ADDR); + break; + case 3: /* fpatan */ + gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST0, OR_ST1); + gen_insn0(OP_FPOP); + break; + case 4: /* fxtract */ + gen_insn0(OP_FPUSH); + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST1, OR_ZERO); + gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST1, OR_ZERO); + break; + case 5: /* fprem1 */ + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ST1); + break; + case 6: /* fdecstp */ + gen_insn0(OP_FPUSH); + break; + default: + case 7: /* fdecstp */ + gen_insn0(OP_FPOP); + break; + } + break; + case 0x0f: /* grp d9/7 */ + switch(rm) { + case 0: /* fprem */ + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ST1); + break; + case 1: /* fyl2xp1 */ + gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST0, OR_ST1); + gen_insn0(OP_FPOP); + break; + case 3: /* fsincos */ + gen_insn0(OP_FPUSH); + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST1, OR_ZERO); + gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST1, OR_ZERO); + break; + case 5: /* fscale */ + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ST1); + break; + case 2: /* fsqrt */ + case 4: /* frndint */ + case 6: /* fsin */ + default: + case 7: /* fcos */ + gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ZERO); + break; + } + break; + case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ + case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ + case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ + { + int op1, swap; + + op1 = fp_ops[op & 7]; + swap = 0; + if ((op & 7) == 5 || (op & 7) == 7) + swap = 1; + if (op >= 0x20) { + if (swap) + gen_insn3(op1, opreg, OR_ST0, opreg); + else + gen_insn3(op1, opreg, opreg, OR_ST0); + } else { + if (swap) + gen_insn3(op1, OR_ST0, opreg, OR_ST0); + else + gen_insn3(op1, OR_ST0, OR_ST0, opreg); + } + if (op >= 0x30) + gen_insn0(OP_FPOP); + } + break; + case 0x02: /* fcom */ + gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg); + break; + case 0x03: /* fcomp */ + gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg); + gen_insn0(OP_FPOP); + break; + case 0x15: /* da/5 */ + switch(rm) { + case 1: /* fucompp */ + gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg); + gen_insn0(OP_FPOP); + gen_insn0(OP_FPOP); + break; + default: + return -1; + } + break; + case 0x2a: /* fst sti */ + gen_mov(opreg, OR_ST0); + break; + case 0x2b: /* fstp sti */ + gen_mov(opreg, OR_ST0); + gen_insn0(OP_FPOP); + break; + case 0x33: /* de/3 */ + switch(rm) { + case 1: /* fcompp */ + gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg); + gen_insn0(OP_FPOP); + gen_insn0(OP_FPOP); + break; + default: + return -1; + } + break; + case 0x3c: /* df/4 */ + switch(rm) { + case 0: + gen_insn3(OP_FNSTS, OR_EAX, OR_ZERO, OR_ZERO); + break; + default: + return -1; + } + break; + default: + error("unhandled FP\n"); + return -1; + } + } + break; +#endif + /************************/ + /* string ops */ + case 0xa4: /* movsS */ + case 0xa5: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPZ) { + gen_op_movs[3 + ot](); + } else { + gen_op_movs[ot](); + } + break; + + case 0xaa: /* stosS */ + case 0xab: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPZ) { + gen_op_stos[3 + ot](); + } else { + gen_op_stos[ot](); + } + break; + case 0xac: /* lodsS */ + case 0xad: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPZ) { + gen_op_lods[3 + ot](); + } else { + gen_op_lods[ot](); + } + break; + case 0xae: /* scasS */ + case 0xaf: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPNZ) { + gen_op_scas[6 + ot](); + } else if (prefixes & PREFIX_REPZ) { + gen_op_scas[3 + ot](); + } else { + gen_op_scas[ot](); + } + break; + + case 0xa6: /* cmpsS */ + case 0xa7: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPNZ) { + gen_op_cmps[6 + ot](); + } else if (prefixes & PREFIX_REPZ) { + gen_op_cmps[3 + ot](); + } else { + gen_op_cmps[ot](); + } + break; + + case 0x6c: /* insS */ + case 0x6d: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPZ) { + gen_op_ins[3 + ot](); + } else { + gen_op_ins[ot](); + } + break; + case 0x6e: /* outsS */ + case 0x6f: + if ((b & 1) == 0) + ot = OT_BYTE; + else + ot = dflag ? OT_LONG : OT_WORD; + if (prefixes & PREFIX_REPZ) { + gen_op_outs[3 + ot](); + } else { + gen_op_outs[ot](); + } + break; + + /************************/ + /* control */ + case 0xc2: /* ret im */ + /* XXX: handle stack pop ? */ + val = ldsw(s->pc); + s->pc += 2; + gen_op_popl_T0(); + gen_op_addl_ESP_im(val); + gen_op_jmp_T0(); + break; + case 0xc3: /* ret */ + gen_op_popl_T0(); + gen_op_jmp_T0(); + break; + case 0xe8: /* call */ + val = insn_get(s, OT_LONG); + val += (long)s->pc; + gen_op1_movl_T1_im((long)s->pc); + gen_op_pushl_T1(); + gen_op_jmp_im(val); + break; + case 0xe9: /* jmp */ + val = insn_get(s, OT_LONG); + val += (long)s->pc; + gen_op_jmp_im(val); + break; + case 0xeb: /* jmp Jb */ + val = (int8_t)insn_get(s, OT_BYTE); + val += (long)s->pc; + gen_op_jmp_im(val); + break; + case 0x70 ... 0x7f: /* jcc Jb */ + val = (int8_t)insn_get(s, OT_BYTE); + val += (long)s->pc; + goto do_jcc; + case 0x180 ... 0x18f: /* jcc Jv */ + if (dflag) { + val = insn_get(s, OT_LONG); + } else { + val = (int16_t)insn_get(s, OT_WORD); + } + val += (long)s->pc; /* XXX: fix 16 bit wrap */ + do_jcc: + gen_jcc(s, b, val); + break; + + case 0x190 ... 0x19f: + modrm = ldub(s->pc++); + gen_setcc(s, b); + gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1); + break; + + /************************/ + /* flags */ + case 0x9c: /* pushf */ + gen_op_movl_T0_eflags(); + gen_op_pushl_T0(); + break; + case 0x9d: /* popf */ + gen_op_popl_T0(); + gen_op_movl_eflags_T0(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x9e: /* sahf */ + gen_op_mov_TN_reg[OT_BYTE][0][R_AH](); + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + gen_op_movb_eflags_T0(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0x9f: /* lahf */ + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + gen_op_movl_T0_eflags(); + gen_op_mov_reg_T0[OT_BYTE][R_AH](); + break; + case 0xf5: /* cmc */ + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + gen_op_cmc(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xf8: /* clc */ + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + gen_op_clc(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xf9: /* stc */ + if (s->cc_op != CC_OP_DYNAMIC) + op_set_cc_op(s->cc_op); + gen_op_stc(); + s->cc_op = CC_OP_EFLAGS; + break; + case 0xfc: /* cld */ + gen_op_cld(); + break; + case 0xfd: /* std */ + gen_op_std(); + break; + + /************************/ + /* misc */ + case 0x90: /* nop */ + break; + +#if 0 + case 0x1a2: /* cpuid */ + gen_insn0(OP_ASM); + break; +#endif + default: + error("unknown opcode %x", b); + return -1; + } + return (long)s->pc; +} +