From e9b2a773acc73c40e4cc2db827161c1380a45ad5 Mon Sep 17 00:00:00 2001 From: Zoe Roux Date: Wed, 31 May 2023 16:08:56 +0900 Subject: [PATCH] Parse header and first instruction --- .clang-format | 1 + .editorconfig | 9 +++++++++ Makefile | 5 +++-- src/dasm.c | 44 ++++++++++++++++++++++++++++++++++++++------ src/dasm.h | 34 +++++++++++++++++++++++++++++++++- src/instructions.c | 24 ++++++++++++++++++++++++ src/main.c | 23 ++++++++++++----------- 7 files changed, 120 insertions(+), 20 deletions(-) create mode 100644 .clang-format create mode 100644 .editorconfig create mode 100644 src/instructions.c diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..67bd190 --- /dev/null +++ b/.clang-format @@ -0,0 +1 @@ +UseTab: Always diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..b696824 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +trim_trailing_whitespace = true +insert_final_newline = true +indent_style = tab +indent_size = tab diff --git a/Makefile b/Makefile index 5b959a3..76769e1 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,10 @@ NAME = dasm CFLAGS = -Wall -Wextra -Wshadow -LDFLAGS = +LDFLAGS = SRC = src/main.c \ - src/dasm.c + src/dasm.c \ + src/instructions.c OBJ = $(SRC:%.c=%.o) diff --git a/src/dasm.c b/src/dasm.c index d029e7a..e70c255 100644 --- a/src/dasm.c +++ b/src/dasm.c @@ -2,19 +2,51 @@ #include #include "dasm.h" -void print_instruction(unsigned addr) +void print_instruction(unsigned addr, instruction_t inst, unsigned raw) { - printf("%X\n", addr); + printf("%04x: %-14x %s\n", addr, raw, inst.name); } -int dasm(char *binary) +unsigned read_size(u_int8_t *binary, unsigned size) +{ + unsigned ret = binary[0]; + + for (unsigned i = 1; i < size; i++) { + ret |= binary[i] << (8 * i); + } + return ret; +} + +instruction_t parse_inst(u_int8_t *binary) +{ + for (int i = 0; instructions[i].name; i++) { + if (instructions[i].opcode == *binary) + return instructions[i]; + } + return (const instruction_t){ + .opcode = 0xFF, .name = "unknown", .mode = {END}, .size = 1 + }; +} + +int dasm(u_int8_t *binary, unsigned long size) { int pc = 0; + int header_size = 0; - while (*binary) { - instruction_t inst = *binary; - print_instruction(pc); + if (binary[0] == 0xEB && binary[1] == 0x0E) + header_size = 16; + else if (binary[0] == 0x01 && binary[1] == 0x03) + header_size = binary[4]; + + printf("Header size: %d\n", header_size); + binary += header_size; + + while (pc < size - header_size) { + instruction_t inst = parse_inst(binary); + unsigned long raw = read_size(binary, inst.size); + print_instruction(pc, inst, raw); pc += inst.size; + binary += inst.size; } return 0; } diff --git a/src/dasm.h b/src/dasm.h index 5da5f6f..cb35b57 100644 --- a/src/dasm.h +++ b/src/dasm.h @@ -1,6 +1,38 @@ -int dasm(char *binary); +#include + +int dasm(u_int8_t *binary, unsigned long size); + +// typedef enum addressing_mode { +// /// Param is the data (MOV AL, ~68FE~) +// IMMEDIATE, +// REGISTER, +// DIRECT, +// REGISTER_INDIRECT, +// BASED, +// INDEXED, +// BASED_INDEX, +// BASED_INDEX_DISPLACEMENT, +// } addressing_mode_t; + +typedef enum addressing_mode { + // Register or 8bit of memory + R_M8, + // 8bit register + REG8, + // 8bit immediate data + IMM8, + // 16bit immediate data + IMM16, + // Special item just to mark the end + END, +} addressing_mode_t; typedef struct instruction { + u_int8_t opcode; const char *name; unsigned size; + addressing_mode_t mode[5]; } instruction_t; + +extern const instruction_t instructions[]; + diff --git a/src/instructions.c b/src/instructions.c new file mode 100644 index 0000000..50642d8 --- /dev/null +++ b/src/instructions.c @@ -0,0 +1,24 @@ +#include +#include "dasm.h" + +const instruction_t instructions[] = { + {.opcode = 0x00, .name = "add", .mode = {R_M8, REG8, END}, .size = 2}, + {.opcode = 0xBB, .name = "mov bf", .mode = {R_M8, REG8, END}, .size = 2}, + {.opcode = 0xB0, .name = "mov al", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB1, .name = "mov cl", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB2, .name = "mov dl", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB3, .name = "mov bl", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB4, .name = "mov ah", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB5, .name = "mov ch", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB6, .name = "mov dh", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB7, .name = "mov bh", .mode = {IMM8}, .size = 2}, + {.opcode = 0xB8, .name = "mov ax", .mode = {IMM16}, .size = 3}, + {.opcode = 0xB9, .name = "mov cx", .mode = {IMM16}, .size = 3}, + {.opcode = 0xBA, .name = "mov dx", .mode = {IMM16}, .size = 3}, + {.opcode = 0xBB, .name = "mov bx", .mode = {IMM16}, .size = 3}, + {.opcode = 0xBC, .name = "mov sp", .mode = {IMM16}, .size = 3}, + {.opcode = 0xBD, .name = "mov bp", .mode = {IMM16}, .size = 3}, + {.opcode = 0xBE, .name = "mov si", .mode = {IMM16}, .size = 3}, + {.opcode = 0xBF, .name = "mov di", .mode = {IMM16}, .size = 3}, + {.name = NULL} +}; diff --git a/src/main.c b/src/main.c index ed43cfb..981d7b9 100644 --- a/src/main.c +++ b/src/main.c @@ -2,33 +2,33 @@ #include #include #include "dasm.h" +#include "sys/types.h" -char *open_and_read(char *path) +unsigned long open_and_read(char *path, u_int8_t **out) { + *out = NULL; FILE *file = fopen(path, "r"); if (!file) { dprintf(2, "Invalid path: %s. File does not exist.\n", path); - return NULL; + return 0; } int rc = fseek(file, 0L, SEEK_END); if (rc != 0) { fclose(file); - return NULL; + return 0; } long size = ftell(file); if (size == -1) { fclose(file); - return NULL; + return 0; } rewind(file); - char *ret = malloc((size + 1) * sizeof(char)); - size = fread(ret, sizeof(char), size, file); - ret[size] = '\0'; - + *out = malloc(size * sizeof(u_int8_t)); + size = fread(*out, sizeof(u_int8_t), size, file); fclose(file); - return ret; + return size; } int main(int argc, char **argv) @@ -38,13 +38,14 @@ int main(int argc, char **argv) return 2; } - char *binary = open_and_read(argv[2]); + u_int8_t *binary; + unsigned long size = open_and_read(argv[2], &binary); if (!binary) { puts("Could not read binary file."); return 2; } - int ret = dasm(binary); + int ret = dasm(binary, size); free(binary); return ret; }