compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit 8ecfb0be1f42f3f88bc7117830ed054dd9ebe8d0
parent 0acd25e23eb849d8570612c2eb44d864e6ad04eb
Author: Brian Swetland <swetland@frotz.net>
Date:   Mon,  2 Mar 2020 13:10:55 -0800

new disassmbler

- based on rv32 table driven disassembler
- library-ized

Diffstat:
MMakefile | 15++++++++++++---
Asrc/mkinstab.c | 38++++++++++++++++++++++++++++++++++++++
Asrc/r5d.c | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Dsrc/r5dis.c | 206-------------------------------------------------------------------------------
Asrc/risc5.h | 4++++
Asrc/risc5dis.c | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/risc5ins.txt | 33+++++++++++++++++++++++++++++++++
7 files changed, 261 insertions(+), 209 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ -all: bin/tlc bin/fs bin/r5dis +all: bin/tlc bin/fs bin/r5d bin/mkinstab clean: rm -rf bin out @@ -15,6 +15,15 @@ bin/fs: src/fs.c src/fs.h @mkdir -p bin $(CC) -o $@ $(CFLAGS) src/fs.c -bin/r5dis: src/r5dis.c +bin/r5d: src/r5d.c src/risc5dis.c src/risc5.h out/risc5ins.h @mkdir -p bin - $(CC) -o $@ $(CFLAGS) src/r5dis.c + $(CC) -o $@ $(CFLAGS) src/r5d.c src/risc5dis.c src/risc5.h + +bin/mkinstab: src/mkinstab.c + @mkdir -p bin + $(CC) -o $@ $(CFLAGS) src/mkinstab.c + +out/risc5ins.h: src/risc5ins.txt bin/mkinstab + @mkdir -p out + bin/mkinstab < src/risc5ins.txt > $@ + diff --git a/src/mkinstab.c b/src/mkinstab.c @@ -0,0 +1,38 @@ +// Copyright 2019, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <ctype.h> + +int main(int argc, char** argv) { + char line[128]; + while (fgets(line, sizeof(line), stdin) != NULL) { + unsigned end = strlen(line); + while (end > 0) { + end--; + if (!isspace(line[end])) break; + line[end] = 0; + } + if ((line[0] == 0) || (line[0] == '#') || + isspace(line[0]) || (end < 34)) { + continue; + } + uint32_t mask = 0, bits = 0; + for (unsigned n = 0; n < 32; n++) { + uint32_t bit = 1U << (31 - n); + switch (line[n]) { + case '1': + mask |= bit; + bits |= bit; + break; + case '0': + mask |= bit; + break; + } + } + printf("{ 0x%08x, 0x%08x, \"%s\" },\n", mask, bits, line + 33); + } + return 0; +} diff --git a/src/r5d.c b/src/r5d.c @@ -0,0 +1,94 @@ +// Copyright 2020, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> + +#include "risc5.h" + +void disasm(uint32_t pc, uint32_t ins) { + char buf[256]; + risc5dis(pc, ins, buf); + printf("%08x: %08x %s\n", pc, ins, buf); +} + +const char* readname(int fd) { + static char name[64]; + for (int n = 0; n < 64; n++) { + if (read(fd, name + n, 1) != 1) exit(1); + if (name[n] == 0) return name; + } + exit(1); + return NULL; +} + +uint32_t readint(int fd) { + uint32_t n; + if (read(fd, &n, 4) != 4) exit(1); + return n; +} + +uint32_t readbyte(int fd) { + uint8_t n; + if (read(fd, &n, 1) != 1) exit(1); + return n; +} + +uint32_t dishdr(int fd) { + const char* name = readname(fd); + uint32_t key = readint(fd); + uint32_t cls = readbyte(fd); + uint32_t size = readint(fd); + printf("[ name='%s', key=%08x, class=%02x, size=%u ]\n", + name, key, cls, size); + printf("[ imports:"); + for (;;) { + name = readname(fd); + if (name[0] == 0) break; + printf(" %s(%08x)", name, readint(fd)); + } + printf(" ]\n"); + // type descrs? + uint32_t n = readint(fd) / 4; + printf("[ typedesc=%u", n); + while (n > 0) { readint(fd); n--; } + // data? + n = readint(fd); + printf(", data=%08x", n); + // stringdata + n = readint(fd); + printf(", stringdata=%u", n); + while (n > 0) { readbyte(fd); n--; } + // instructions + n = readint(fd); + printf(", instructions=%u ]\n", n); + return n; +} + +int main(int argc, char** argv) { + int fd; + if (argc != 2) return -1; + if ((fd = open(argv[1], O_RDONLY)) < 0) return -1; + + uint32_t count; + count = strlen(argv[1]); + if ((count > 5) && (!strcmp(argv[1] + count - 4, ".rsc"))) { + count = dishdr(fd); + } else { + count = 0xffffffff; + } + + uint32_t ins; + uint32_t pc = 0; + while (count > 0) { + count--; + if (read(fd, &ins, sizeof(ins)) != sizeof(ins)) break; + disasm(pc, ins); + pc += 4; + } + return 0; +} diff --git a/src/r5dis.c b/src/r5dis.c @@ -1,206 +0,0 @@ -// Copyright 2020, Brian Swetland <swetland@frotz.net> -// Licensed under the Apache License, Version 2.0. - -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <unistd.h> -#include <fcntl.h> -#include <string.h> - -static const char* opname[16] = { - "MOV", "LSL", "ASR", "ROR", "AND", "ANN", "IOR", "XOR", - "ADD", "SUB", "MUL", "DIV", "FAD", "FSB", "FML", "FDV", -}; -const char* OP(uint32_t n) { return opname[n & 15]; } - -static const char* ccname[16] = { - "MI", "EQ", "CS", "VS", "LS", "LT", "LE", "", - "PL", "NE", "CC", "VC", "HI", "GE", "GT", "NV", -}; -const char* CC(uint32_t n) { return ccname[n & 15]; } - -static const char* regname[16] = { - "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", - "R8", "R9", "R10", "R11", "MT", "SB", "SP", "LR", -}; -const char* R(uint32_t n) { return regname[n & 15]; } - -void disasm(uint32_t pc, uint32_t ins) { - uint32_t t = ins >> 28; - uint32_t a = (ins >> 24) & 15; - uint32_t b = (ins >> 20) & 15; - uint32_t o = (ins >> 16) & 15; - uint32_t c = ins & 15; - uint32_t i16 = ins & 0xffff; - uint32_t i20 = ins & 0xfffff; - uint32_t i24 = ins & 0xffffff; - if (i16 & 0x8000) i16 |= 0xffff0000; - if (i20 & 0x80000) i20 |= 0xfff00000; - if (i24 & 0x800000) i24 |= 0xff000000; - - printf("%08x: %08x ", pc, ins); - switch (t) { - // --uv - case 0b0000: - case 0b0001: - case 0b0010: - case 0b0011: - if (o == 0) { // MOV ignores Rb - switch (t & 3) { - case 0b00: - case 0b01: - printf("MOV %s, %s\n", R(a), R(c)); - break; - case 0b10: - printf("MOV %s, H\n", R(a)); - break; - case 0b11: - printf("MOV %s, NZCF\n", R(a)); - break; - } - } else { - const char* op = OP(o); - switch (o) { // u-bit modifiers - case 8: if (t & 2) op = "ADC"; break; - case 9: if (t & 2) op = "SBC"; break; - case 10: if (t & 2) op = "UMUL"; break; - } - printf("%s %s, %s, %s\n", op, R(a), R(b), R(c)); - } - break; - case 0b0100: - case 0b0101: - case 0b0110: - case 0b0111: - if (o == 0) { - printf("MOV %s, %d\n", R(a), (t & 1) ? (i16 << 16) : i16); - } else { - const char* op = OP(o); - switch (o) { // u-bit modifiers - case 8: if (t & 2) op = "ADC"; break; - case 9: if (t & 2) op = "SBC"; break; - case 10: if (t & 2) op = "UMUL"; break; - } - printf("%s %s, %s, %d\n", op, R(a), R(b), i16); - } - break; - case 0b1000: - printf("LW %s, [%s, %d]\n", R(a), R(b), i20); - break; - case 0b1001: - printf("LB %s, [%s, %d]\n", R(a), R(b), i20); - break; - case 0b1010: - printf("SW %s, [%s, %d]\n", R(a), R(b), i20); - break; - case 0b1011: - printf("SB %s, [%s, %d]\n", R(a), R(b), i20); - break; - case 0b1100: - switch ((ins >> 4) & 15) { - case 0b0000: - printf("B%s %s\n", CC(a), R(c)); - break; - case 0b0001: - printf("RTI %s\n", R(c)); - break; - case 0b0010: - printf("%s\n", ins & 1 ? "STI" : "CLI"); - break; - default: - printf("??? %08x\n", ins); - break; - } - break; - case 0b1101: - printf("BL%s %s\n", CC(a), R(c)); - break; - case 0b1110: - printf("B%s %d\n", CC(a), i24); - break; - case 0b1111: - printf("BL%s %d\n", CC(a), i24); - break; - default: - printf("??? %08x\n", ins); - break; - } -} - -const char* readname(int fd) { - static char name[64]; - for (int n = 0; n < 64; n++) { - if (read(fd, name + n, 1) != 1) exit(1); - if (name[n] == 0) return name; - } - exit(1); - return NULL; -} - -uint32_t readint(int fd) { - uint32_t n; - if (read(fd, &n, 4) != 4) exit(1); - return n; -} - -uint32_t readbyte(int fd) { - uint8_t n; - if (read(fd, &n, 1) != 1) exit(1); - return n; -} - -uint32_t dishdr(int fd) { - const char* name = readname(fd); - uint32_t key = readint(fd); - uint32_t cls = readbyte(fd); - uint32_t size = readint(fd); - printf("[ name='%s', key=%08x, class=%02x, size=%u ]\n", - name, key, cls, size); - printf("[ imports:"); - for (;;) { - name = readname(fd); - if (name[0] == 0) break; - printf(" %s(%08x)", name, readint(fd)); - } - printf(" ]\n"); - // type descrs? - uint32_t n = readint(fd) / 4; - printf("[ typedesc=%u", n); - while (n > 0) { readint(fd); n--; } - // data? - n = readint(fd); - printf(", data=%08x", n); - // stringdata - n = readint(fd); - printf(", stringdata=%u", n); - while (n > 0) { readbyte(fd); n--; } - // instructions - n = readint(fd); - printf(", instructions=%u ]\n", n); - return n; -} - -int main(int argc, char** argv) { - int fd; - if (argc != 2) return -1; - if ((fd = open(argv[1], O_RDONLY)) < 0) return -1; - - uint32_t count; - count = strlen(argv[1]); - if ((count > 5) && (!strcmp(argv[1] + count - 4, ".rsc"))) { - count = dishdr(fd); - } else { - count = 0xffffffff; - } - - uint32_t ins; - uint32_t pc = 0; - while (count > 0) { - count--; - if (read(fd, &ins, sizeof(ins)) != sizeof(ins)) break; - disasm(pc, ins); - pc += 4; - } - return 0; -} diff --git a/src/risc5.h b/src/risc5.h @@ -0,0 +1,4 @@ + +#include <stdint.h> + +void risc5dis(uint32_t pc, uint32_t ins, char *out); diff --git a/src/risc5dis.c b/src/risc5dis.c @@ -0,0 +1,80 @@ +// Copyright 2020, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#include <string.h> +#include <stdio.h> + +#include "risc5.h" + +static char *append_str(char *buf, const char *s) { + while (*s) *buf++ = *s++; + return buf; +} + +static char *append_i32(char *buf, int32_t n) { + return buf + sprintf(buf, "%d", n); +} + +static char *append_u32(char *buf, int32_t n) { + return buf + sprintf(buf, "0x%x", n); +} + +static const char* regname[16] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "mt", "sb", "sp", "lr", +}; +#define R(n) regname[(n) & 15] + +static const char* opname[16] = { + "mov", "lsl", "asr", "ror", "and", "ann", "ior", "xor", + "add", "sub", "mul", "div", "fad", "fsb", "fml", "fdv", +}; +#define OP(n) opname[(n) & 15] + +static const char* ccname[16] = { + "mi", "eq", "cs", "vs", "ls", "lt", "le", "", + "pl", "ne", "cc", "vc", "hi", "ge", "gt", "nv", +}; +#define CC(n) ccname[(n) & 15] + +typedef struct { + uint32_t mask; + uint32_t bits; + const char* fmt; +} ins_t; + +static ins_t instab[] = { +#include "../out/risc5ins.h" +}; + +void risc5dis(uint32_t pc, uint32_t ins, char *out) { + unsigned n = 0; + while ((ins & instab[n].mask) != instab[n].bits) n++; + const char* fmt = instab[n].fmt; + char x; + + unsigned mo = ins & 0xFFFFF; + unsigned bo = ins & 0xFFFFFF; + if (mo & 0x80000) mo |= 0xFFF00000; + if (bo & 0x800000) bo |= 0xFF000000; + + while ((x = *fmt++) != 0) { + if (x != '%') { + *out++ = x; + continue; + } + switch (*fmt++) { + case 'C': out = append_str(out, CC((ins >> 24) & 15)); break; + case 'a': out = append_str(out, R((ins >> 24) & 15)); break; + case 'b': out = append_str(out, R((ins >> 20) & 15)); break; + case 'o': out = append_str(out, OP((ins >> 16) & 15)); break; + case 'c': out = append_str(out, R(ins & 15)); break; + case 'n': out = append_i32(out, ins & 0xFFFF); break; + case 's': out = append_u32(out, (ins & 0xFFFF) | 0xFFFF0000); break; + case 'N': out = append_u32(out, (ins & 0xFFFF) << 16); break; + case 'm': out = append_i32(out, mo); break; + case 'B': out = append_u32(out, pc + 4 + (bo << 2)); break; + } + } + *out = 0; +} diff --git a/src/risc5ins.txt b/src/risc5ins.txt @@ -0,0 +1,33 @@ +# Copyright 2020, Brian Swetland <swetland@frotz.net> +# Licensed under the Apache License, Version 2.0. + +0000aaaa----0000------------cccc mov %a, c +0010aaaa----0000---------------- mov %a, h +0011aaaa----0000---------------- mov %a, nzcv +0010aaaabbbb1000------------cccc adc %a, %b, %c +0010aaaabbbb1001------------cccc sbc %a, %b, %c +0010aaaabbbb1010------------cccc uml %a, %b, %c +0000aaaabbbboooo------------cccc %o %a, %b, %c +0100aaaa----0000nnnnnnnnnnnnnnnn mov %a, %n +0101aaaa----0000nnnnnnnnnnnnnnnn mov %a, %s +0100aaaabbbboooonnnnnnnnnnnnnnnn %o %a, %b, %n +0101aaaabbbboooossssssssssssssss %o %a, %b, %s +0110aaaa----0000NNNNNNNNNNNNNNNN mhi %a, %N +0111aaaa----0000NNNNNNNNNNNNNNNN mhi %a, %N +0110aaaabbbb1000nnnnnnnnnnnnnnnn adc %a, %b, %n +0111aaaabbbb1000ssssssssssssssss adc %a, %b, %s +0110aaaabbbb1001nnnnnnnnnnnnnnnn sbc %a, %b, %n +0111aaaabbbb1001ssssssssssssssss sbc %a, %b, %s +0110aaaabbbb1010nnnnnnnnnnnnnnnn uml %a, %b, %n +0111aaaabbbb1010ssssssssssssssss uml %%a, %b, %s +1000aaaabbbbmmmmmmmmmmmmmmmmmmmm lw %a, [%b, %m] +1001aaaabbbbmmmmmmmmmmmmmmmmmmmm lb %a, [%b, %m] +1010aaaabbbbmmmmmmmmmmmmmmmmmmmm sw %a, [%b, %m] +1011aaaabbbbmmmmmmmmmmmmmmmmmmmm sb %a, [%b, %m] +11000111-------------------1---- rti +11001111------------------1----0 sti +11001111------------------1----1 cli +1100CCCC--------------------cccc b%C %c +1101CCCC--------------------cccc bl%C %c +1110CCCCBBBBBBBBBBBBBBBBBBBBBBBB b%C %B +1111CCCCBBBBBBBBBBBBBBBBBBBBBBBB bl%C %B