From 6302e9b4159c87fc66c5e413b2f7afa40d8fade4 Mon Sep 17 00:00:00 2001 From: Zoe Roux Date: Wed, 5 Jul 2023 11:10:43 +0900 Subject: [PATCH] Start to implement mov --- Makefile | 3 +- src/dasm.c | 10 ++-- src/dasm.h | 10 +++- src/instructions.c | 61 ++++++++++++------------ src/instructions.h | 5 ++ src/instructions/memory.c | 98 +++++++++++++++++++++++++++++++++++++++ src/interpretor.c | 4 +- src/interpretor.h | 6 +++ 8 files changed, 158 insertions(+), 39 deletions(-) create mode 100644 src/instructions.h create mode 100644 src/instructions/memory.c diff --git a/Makefile b/Makefile index f9b9c05..6131866 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,8 @@ LDFLAGS = SRC = src/main.c \ src/dasm.c \ src/instructions.c \ - src/interpretor.c + src/interpretor.c \ + src/instructions/memory.c OBJ = $(SRC:%.c=%.o) diff --git a/src/dasm.c b/src/dasm.c index eb1ca76..0cd5dce 100644 --- a/src/dasm.c +++ b/src/dasm.c @@ -34,7 +34,7 @@ int print_rm_operand(u_int8_t *binary, int imm_idx, bool is16bit) if (mod == 0b01) { int8_t disp_v = binary[imm_idx]; imm_offset++; - snprintf(disp, 20, "%c%x", disp_v < 0 ? '-' : '+', disp_v < 0 ? -disp_v : disp_v); + snprintf(disp, sizeof(disp), "%c%x", disp_v < 0 ? '-' : '+', disp_v < 0 ? -disp_v : disp_v); } switch (rm) @@ -62,10 +62,10 @@ unsigned read_size(u_int8_t *binary, unsigned size) return ret; } -bool has_reg(instruction_t inst) +bool has_reg(const instruction_t *inst) { - for (int i = 0; inst.mode[i] != END; i++) { - if (inst.mode[i] == REG8 || inst.mode[i] == REG16) + for (int i = 0; inst->mode[i] != END; i++) { + if (inst->mode[i] == REG8 || inst->mode[i] == REG16) return true; } return false; @@ -77,7 +77,7 @@ void print_instruction(unsigned addr, instruction_t inst, unsigned inst_size, u_ // if the instruction has already a param in it (ex `in al`), we need to directly add a comma. // `jmp short` is an exception as it is the only instruction with a space in it that is not a parameter. bool need_comma = !last_param && strchr(inst.name, ' ') && inst.opcode != 0xEB; - int imm_idx = 1 + (inst.extended != -1 || has_reg(inst)); + int imm_idx = 1 + (inst.extended != -1 || has_reg(&inst)); printf("%04x:%s%0*x%-*s", addr, space ? " " : "", inst_size * 2, read_size(binary, inst_size), 14 - inst_size * 2, ""); if (last_param) diff --git a/src/dasm.h b/src/dasm.h index 2a2b056..96ac61d 100644 --- a/src/dasm.h +++ b/src/dasm.h @@ -4,6 +4,8 @@ #include #include +#define FALLTHROUGHT __attribute__((fallthrough)); + int dasm(u_int8_t *binary, unsigned long size); int interpret(u_int8_t *binary, unsigned long size); @@ -24,6 +26,10 @@ typedef enum addressing_mode { REG8, // 16bit register REG16, + // 8bit register but stored inside the opcode + OPREG8, + // 16bit register but stored inside the opcode + OPREG16, // Register or 8bit of memory R_M8, // Register or 16bit of memory @@ -45,7 +51,7 @@ typedef struct instruction { const char *name; addressing_mode_t mode[5]; int extended; - void (*exec)(state_t *state); + void (*exec)(const struct instruction *self, state_t *state); } instruction_t; extern const instruction_t instructions[]; @@ -55,4 +61,4 @@ extern const instruction_t invalid_instruction; instruction_t parse_inst(u_int8_t *binary, unsigned long size); unsigned get_inst_size(instruction_t inst, u_int8_t *binary, unsigned bin_size); void print_instruction(unsigned addr, instruction_t inst, unsigned inst_size, u_int8_t *binary, bool space); - +bool has_reg(const instruction_t *inst); diff --git a/src/instructions.c b/src/instructions.c index 2c1690c..aa777b9 100644 --- a/src/instructions.c +++ b/src/instructions.c @@ -1,6 +1,7 @@ #include #include #include "dasm.h" +#include "instructions.h" instruction_t parse_inst(u_int8_t *binary, unsigned long size) { @@ -27,8 +28,8 @@ const instruction_t instructions[] = { {.opcode = 0x01, .extended = -1, .name = "add", .mode = {R_M16, REG16, END}, .exec = NULL}, {.opcode = 0x02, .extended = -1, .name = "add", .mode = {REG8, R_M8, END}, .exec = NULL}, {.opcode = 0x03, .extended = -1, .name = "add", .mode = {REG16, R_M16, END}, .exec = NULL}, - {.opcode = 0x04, .extended = -1, .name = "add al", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0x05, .extended = -1, .name = "add ax", .mode = {IMM16, END}, .exec = NULL}, + {.opcode = 0x04, .extended = -1, .name = "add al", .mode = {OPREG8, IMM8, END}, .exec = NULL}, + {.opcode = 0x05, .extended = -1, .name = "add ax", .mode = {OPREG16, IMM16, END}, .exec = NULL}, {.opcode = 0x06, .extended = -1, .name = "push es", .mode = {END}, .exec = NULL}, {.opcode = 0x07, .extended = -1, .name = "pop es", .mode = {END}, .exec = NULL}, {.opcode = 0x08, .extended = -1, .name = "or", .mode = {R_M8, REG8, END}, .exec = NULL}, @@ -143,13 +144,13 @@ const instruction_t instructions[] = { {.opcode = 0x85, .extended = -1, .name = "test", .mode = {R_M16, REG16, END}, .exec = NULL}, {.opcode = 0x86, .extended = -1, .name = "xchg", .mode = {R_M8, REG8, END}, .exec = NULL}, {.opcode = 0x87, .extended = -1, .name = "xchg", .mode = {R_M16, REG16, END}, .exec = NULL}, - {.opcode = 0x88, .extended = -1, .name = "mov", .mode = {R_M8, REG8, END}, .exec = NULL}, - {.opcode = 0x89, .extended = -1, .name = "mov", .mode = {R_M16, REG16, END}, .exec = NULL}, - {.opcode = 0x8A, .extended = -1, .name = "mov", .mode = {REG8, R_M8, END}, .exec = NULL}, - {.opcode = 0x8B, .extended = -1, .name = "mov", .mode = {REG16, R_M16, END}, .exec = NULL}, - // {.opcode = 0x8C, .extended = -1, .name = "mov", .mode = {R_M16, sreg, END}, .exec = NULL}, + {.opcode = 0x88, .extended = -1, .name = "mov", .mode = {R_M8, REG8, END}, .exec = &mov}, + {.opcode = 0x89, .extended = -1, .name = "mov", .mode = {R_M16, REG16, END}, .exec = &mov}, + {.opcode = 0x8A, .extended = -1, .name = "mov", .mode = {REG8, R_M8, END}, .exec = &mov}, + {.opcode = 0x8B, .extended = -1, .name = "mov", .mode = {REG16, R_M16, END}, .exec = &mov}, + // {.opcode = 0x8C, .extended = -1, .name = "mov", .mode = {R_M16, sreg, END}, .exec = &mov}, {.opcode = 0x8D, .extended = -1, .name = "lea", .mode = {REG16, R_M16, END}, .exec = NULL}, - // {.opcode = 0x8E, .extended = -1, .name = "mov", .mode = {sreg, R_M16, END}, .exec = NULL}, + // {.opcode = 0x8E, .extended = -1, .name = "mov", .mode = {sreg, R_M16, END}, .exec = &mov}, {.opcode = 0x8F, .extended = -1, .name = "pop", .mode = {R_M16, END}, .exec = NULL}, {.opcode = 0x90, .extended = -1, .name = "xchg ax, ax", .mode = {END}, .exec = NULL}, {.opcode = 0x91, .extended = -1, .name = "xchg cx, ax", .mode = {END}, .exec = NULL}, @@ -167,10 +168,10 @@ const instruction_t instructions[] = { {.opcode = 0x9D, .extended = -1, .name = "popf", .mode = {END}, .exec = NULL}, {.opcode = 0x9E, .extended = -1, .name = "sahf", .mode = {END}, .exec = NULL}, {.opcode = 0x9F, .extended = -1, .name = "lahf", .mode = {END}, .exec = NULL}, - // {.opcode = 0xA0, .extended = -1, .name = "mov al", .mode = {moffs16, END}, .exec = NULL}, - // {.opcode = 0xA1, .extended = -1, .name = "mov ax", .mode = {moffs16, END}, .exec = NULL}, - // {.opcode = 0xA2, .extended = -1, .name = "mov %s, al", .mode = {moffs16, END}, .exec = NULL}, - // {.opcode = 0xA3, .extended = -1, .name = "mov %s, ax", .mode = {moffs16, END}, .exec = NULL}, + // {.opcode = 0xA0, .extended = -1, .name = "mov al", .mode = {moffs16, END}, .exec = &mov}, + // {.opcode = 0xA1, .extended = -1, .name = "mov ax", .mode = {moffs16, END}, .exec = &mov}, + // {.opcode = 0xA2, .extended = -1, .name = "mov %s, al", .mode = {moffs16, END}, .exec = &mov}, + // {.opcode = 0xA3, .extended = -1, .name = "mov %s, ax", .mode = {moffs16, END}, .exec = &mov}, {.opcode = 0xA4, .extended = -1, .name = "movsb", .mode = {END}, .exec = NULL}, {.opcode = 0xA5, .extended = -1, .name = "movsw", .mode = {END}, .exec = NULL}, {.opcode = 0xA6, .extended = -1, .name = "cmpsb", .mode = {END}, .exec = NULL}, @@ -183,29 +184,29 @@ const instruction_t instructions[] = { {.opcode = 0xAD, .extended = -1, .name = "lodsw", .mode = {END}, .exec = NULL}, {.opcode = 0xAE, .extended = -1, .name = "scasb", .mode = {END}, .exec = NULL}, {.opcode = 0xAF, .extended = -1, .name = "scasw", .mode = {END}, .exec = NULL}, - {.opcode = 0xB0, .extended = -1, .name = "mov al", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB1, .extended = -1, .name = "mov cl", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB2, .extended = -1, .name = "mov dl", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB3, .extended = -1, .name = "mov bl", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB4, .extended = -1, .name = "mov ah", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB5, .extended = -1, .name = "mov ch", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB6, .extended = -1, .name = "mov dh", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB7, .extended = -1, .name = "mov bh", .mode = {IMM8, END}, .exec = NULL}, - {.opcode = 0xB8, .extended = -1, .name = "mov ax", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xB9, .extended = -1, .name = "mov cx", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xBA, .extended = -1, .name = "mov dx", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xBB, .extended = -1, .name = "mov bx", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xBC, .extended = -1, .name = "mov sp", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xBD, .extended = -1, .name = "mov bp", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xBE, .extended = -1, .name = "mov si", .mode = {IMM16, END}, .exec = NULL}, - {.opcode = 0xBF, .extended = -1, .name = "mov di", .mode = {IMM16, END}, .exec = NULL}, + {.opcode = 0xB0, .extended = -1, .name = "mov al", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB1, .extended = -1, .name = "mov cl", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB2, .extended = -1, .name = "mov dl", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB3, .extended = -1, .name = "mov bl", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB4, .extended = -1, .name = "mov ah", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB5, .extended = -1, .name = "mov ch", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB6, .extended = -1, .name = "mov dh", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB7, .extended = -1, .name = "mov bh", .mode = {IMM8, END}, .exec = &mov}, + {.opcode = 0xB8, .extended = -1, .name = "mov ax", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xB9, .extended = -1, .name = "mov cx", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xBA, .extended = -1, .name = "mov dx", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xBB, .extended = -1, .name = "mov bx", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xBC, .extended = -1, .name = "mov sp", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xBD, .extended = -1, .name = "mov bp", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xBE, .extended = -1, .name = "mov si", .mode = {IMM16, END}, .exec = &mov}, + {.opcode = 0xBF, .extended = -1, .name = "mov di", .mode = {IMM16, END}, .exec = &mov}, {.opcode = 0xC2, .extended = -1, .name = "ret", .mode = {IMM16, END}, .exec = NULL}, {.opcode = 0xC3, .extended = -1, .name = "ret", .mode = {END}, .exec = NULL}, {.opcode = 0xC4, .extended = -1, .name = "les", .mode = {REG16, R_M16, END}, .exec = NULL}, {.opcode = 0xC5, .extended = -1, .name = "lds", .mode = {R_M16, REG16, END}, .exec = NULL}, - {.opcode = 0xC6, .extended = -1, .name = "mov", .mode = {R_M16, IMM8, END}, .exec = NULL}, - {.opcode = 0xC7, .extended = -1, .name = "mov", .mode = {R_M16, IMM16, END}, .exec = NULL}, + {.opcode = 0xC6, .extended = -1, .name = "mov", .mode = {R_M16, IMM8, END}, .exec = &mov}, + {.opcode = 0xC7, .extended = -1, .name = "mov", .mode = {R_M16, IMM16, END}, .exec = &mov}, {.opcode = 0xCA, .extended = -1, .name = "retf", .mode = {IMM16, END}, .exec = NULL}, {.opcode = 0xCB, .extended = -1, .name = "retf", .mode = {END}, .exec = NULL}, {.opcode = 0xCC, .extended = -1, .name = "int3", .mode = {END}, .exec = NULL}, diff --git a/src/instructions.h b/src/instructions.h new file mode 100644 index 0000000..f85177e --- /dev/null +++ b/src/instructions.h @@ -0,0 +1,5 @@ +#pragma once + +#include "dasm.h" + +void mov(const instruction_t *self, state_t *state); diff --git a/src/instructions/memory.c b/src/instructions/memory.c new file mode 100644 index 0000000..0778668 --- /dev/null +++ b/src/instructions/memory.c @@ -0,0 +1,98 @@ +#include +#include +#include +#include "../dasm.h" + +unsigned *get_reg_operand(state_t *state, bool is16bit) +{ + uint8_t *registers8[8] = { + &state->al, + &state->cl, + &state->dl, + &state->bl, + &state->ah, + &state->ch, + &state->dh, + &state->bh, + }; + uint16_t *registers16[8] = { + &state->ax, + &state->cx, + &state->dx, + &state->bx, + &state->sp, + &state->bp, + &state->si, + &state->di, + }; + + if (is16bit) + return (void *)registers16[(state->binary[state->pc + 1] & 0b111000) >> 3]; + return (void *)registers8[(state->binary[state->pc + 1] & 0b111000) >> 3]; +} + +unsigned *get_rm_operand(const instruction_t *inst, state_t *state, unsigned *imm_idx, bool is16bit) +{ + unsigned mod = state->binary[1] >> 6; + unsigned rm = state->binary[1] & 0b111; + + if (mod == 0b11) + return get_reg_operand(state, is16bit); + + if (mod == 0 && rm == 0b110) { + *imm_idx += 2; + return (void *)&state->binary[state->pc + *imm_idx - 2]; + } + // TODO: + return NULL; +} + +unsigned *get_operand(const instruction_t *inst, unsigned i, state_t *state) +{ + unsigned *ret = NULL; + unsigned imm_idx = 0; + + switch (inst->mode[i]) { + case IMM16: + imm_idx++; + FALLTHROUGHT; + case IMM8: + imm_idx++; + ret = (void *)&state->binary[state->pc + state->parse_data.imm_idx]; + break; + case REL16: + imm_idx++; + FALLTHROUGHT; + case REL8: + imm_idx++; + ret = NULL; + // TODO: + break; + case REG8: + ret = get_reg_operand(state, false); + break; + case REG16: + ret = get_reg_operand(state, true); + break; + case R_M8: + ret = get_rm_operand(inst, state, &imm_idx, false); + break; + case R_M16: + ret = get_rm_operand(inst, state, &imm_idx, true); + break; + case END: + printf("Invalid parameter read."); + break; + } + state->parse_data.imm_idx += imm_idx; + return ret; +} + +void mov(const instruction_t *self, state_t *state) +{ + unsigned *from = get_operand(self, 0, state); + unsigned *to = get_operand(self, 1, state); + + printf("mov %p %p\n", from, to); + printf("mov %x %x\n", (uint16_t)*from, (uint16_t)*to); +} diff --git a/src/interpretor.c b/src/interpretor.c index d7ff87d..5455a4d 100644 --- a/src/interpretor.c +++ b/src/interpretor.c @@ -37,6 +37,7 @@ int interpret(u_int8_t *binary, unsigned long size) } binary += header_size; + state->binary = binary; printf(" AX BX CX DX SP BP SI DI FLAGS IP\n"); while (state->pc < size) { @@ -47,8 +48,9 @@ int interpret(u_int8_t *binary, unsigned long size) } print_state(state); print_instruction(state->pc, inst, inst_size, binary, false); + state->parse_data.imm_idx = 1 + (inst.extended != -1 || has_reg(&inst)); if (inst.exec) - inst.exec(state); + inst.exec(&inst, state); else printf("Not implemented.\n"); state->pc += inst_size; diff --git a/src/interpretor.h b/src/interpretor.h index 9eb5828..3b82307 100644 --- a/src/interpretor.h +++ b/src/interpretor.h @@ -53,4 +53,10 @@ typedef struct state { }; unsigned flags:10; }; + + uint8_t *binary; + + struct { + unsigned imm_idx; + } parse_data; } state_t;