compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit 823f7bf3b95488383ae412406fb57acac9a47955
Author: Brian Swetland <swetland@frotz.net>
Date:   Mon,  2 Mar 2020 11:31:16 -0800

initial

Diffstat:
A.gitignore | 3+++
Adocs/notes.oberon.compiler.txt | 29+++++++++++++++++++++++++++++
Adocs/notes.project.oberon.txt | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adocs/project-oberon-risc5-architecture.txt | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/fs.c | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/fs.h | 30++++++++++++++++++++++++++++++
Asrc/r5dis.c | 205+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 561 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,3 @@ +.* +bin +out diff --git a/docs/notes.oberon.compiler.txt b/docs/notes.oberon.compiler.txt @@ -0,0 +1,29 @@ + +ObjDesc + class: byte // Const Var Par Fld Typ SProc SFunc Mod + val: int // value adr adr off tda num num key + // tda -> type descriptor address + exno: byte + expo: bool // exported + rdo: bool // readonly + lev: int // ?depth + next: Object + dsc: Object + type: Type + name: Ident + +TypeDesc + form: int // Pointer Proc Array Record + dsc: Object // list: n/a params n/a fields + base: Type // of: derefobj result elem + ref: int + mno: int + len: int // arrays: < 0 = open, records adr of descriptor + typobj: Object + size: int + + +0x'8000'2301 + +0b'1000'1010'0011'0100 + diff --git a/docs/notes.project.oberon.txt b/docs/notes.project.oberon.txt @@ -0,0 +1,83 @@ + +mailing list: http://lists.inf.ethz.ch/pipermail/oberon/ + + +sector numbers (from PO.System) +------------------------------- +3. Sector pointers are represented by sector numbers of type INTEGER. Actually, +we use the numbers multiplied by 29. This implies that any single-bit error +leads to a number which is not a multiple of 29, and hence can easily be +detected. Thereby the crucial sector addresses are software parity checked and +are safe (against single-bit errors) even on computers without hardware parity +check. The check is performed by procedures Kernel.GetSector and +Kernel.PutSector. + + +traps (from PO.Application) +--------------------------- +1 array index out of range +2 type guard failure +3 array or string copy overflow +4 access via NIL pointer +5 illegal procedure call +6 integer division by zero +7 assertion violated + +handled by BLR MT +- first word of module table is trap handler entry +- trap number encoded in bits 4:7 of instruction +- trap 0 used for proc New (sort of a syscall mechanism) + + + + +compiler/ORB.Mod registers builtin funcs, procs, types +UML, SBC, ADC, ROR, ADR, LSL, LEN, CHR, ORD, FLT, FLOOR, ODD, ABS, +LED, UNPK, PACK, NEW, ASSERT, EXCL, INCL, DEC, INC, +SET, BOOLEAN, BYTE, CHAR, LONGREAL, REAL, LONGINT, INTEGER + +SYSTEM module setup with: +H, COND, SIZE, ADR, VAL, REG, BIT, +LDREG, LDPSR, COPY, PUT, GET + +LEN(x) length of array + +SYSTEM module has built-in low-level/unsafe ops +------------------------------------------------ +v (var), x,a,n (expr) + +ADR(v) INTEGER address of Variable +SIZE(T) INTEGER size in bytes of Type + +VAL(T, n) raw cast n to T if SIZE(n) <= SIZE(T) +ADC(m, n) add w/ carry C +SBC(m, n) sub w/ carry C +UML(m, n) unsigned mul +COND(n) IF Cond(n) Then... ? set cc? + +LED(n) display on LEDs + +BIT(a, n) BOOLEAN bit n of mem[a] +GET(a, v) v := mem[a] +PUT(a, x) mem[a] := x +COPY(src, dst, n) memcpy(dst, src, n * sizeof(WORD)) + +|> maybe split to builtin.* and unsafe.* + + + +core/Display.Mod +---------------- +(*a pattern is an array of bytes; the first is its width (< 32), the second its height, the rest the raster*) +arrow := SYSTEM.ADR($0F0F 0060 0070 0038 001C 000E 0007 8003 C101 E300 7700 3F00 1F00 3F00 7F00 FF00$); + + +joerg.straube on mailing list +-------------------- +You can ask the RISC processor to reveal its version with this code + + cpu := SYSTEM.H(2019) MOD 80H; + IF cpu = 53H THEN (* RISC5: with interrupts + floating-point, 31.8.2018 *) + ELSIF cpu = 54H THEN (* RISC5a: no interrupts, no floating-point, 1.9.2018*) + ELSIF cpu = A0H THEN (* RISC0, 26.12.2013 *) + END; diff --git a/docs/project-oberon-risc5-architecture.txt b/docs/project-oberon-risc5-architecture.txt @@ -0,0 +1,137 @@ +Project Oberon RISC5 Architecture +--------------------------------- + +The various documents describing Project Oberon's RISC5 architecture +are not entirely in agreement with each other, and (maybe most critically) +not entirely in agreement with the verilog implementation of the CPU +is RISC.v. This document attempts to be the "most correct" version, +looking to the verilog implementation RISC.v as "most authoritative" + +0. Processor Resources +---------------------- + +Program Counter PC +General Registers R0..R15 +Flag Bits N C V Z +Mul/Div Result Reg H +Special Irq Reg SPC captures NCVZ, PC on IRQ, restores on RTI +Memory Mem[] byte or word addressible main memory + +R12 is known as MT, the Module Table pointer (system globals) +R13 is known as SB, the Static Base pointer (module globals) +R14 is known as SP, the Stack Pointer +R15 is known as LNK, the Link Register + +The first three are Project Oberon software conventions and have +no special meaning in hardware. R15 is where branch-with-link +instructions store the return address. + +1. Register Instructions (F0 & F1) +---------------------------------- + 4 4 4 4 4 4 + +------+-------+-------+-------+-------------+-------+-------+ +F0 | 00u0 | a | b | op | | 0000 | c | + +------+-------+-------+-------+-------------+-------+-------+ + n=Rc + + 4 4 4 4 16 + +------+-------+-------+-------+-----------------------------+ +F1 | 01uv | a | b | op | n | + +------+-------+-------+-------+-----------------------------+ + v=0: 0-extend n, v=1: 1-extend n + + 0 MOV a, n Ra = n + 1 LSL a, b, n Ra = Rb << n (shift left) + 2 ASR a, b, n Ra = Rb >> n (sight right, sign extending) + 3 ROR a, b, n Ra = Rb rot n (rotate right) + 4 AND a, b, n Ra = Rb & n logical operations + 5 ANN a, b, n Ra = Rb & ~n + 6 IOR a, b, n Ra = Rb | n + 7 XOR a, b, n Ra = Rb ^ n + 8 ADD a, b, n Ra = Rb + n integer arithmetic + 9 SUB a, b, n Ra = Rb - n +10 MUL a, b, n Ra = Rb * n H = result high 32 bits +11 DIV a, b, n Ra = Rb / n H = remainder +12 FAD a, b, n Ra = Rb + n floating point arithmetic +13 FSB a, b, n Ra = Rb - n +14 FML a, b, n Ra = Rb * n +15 FDV a, b, n Ra = Rb / n + + u=1 modifies some ops: + 0 MOV a, H Ra = H v=0 + 0 MOV a, NZCV Ra = (NZCV<<28)|INFO) v=1 + 0 MHI a, n Ra = n << 16 + 8 ADC a, b, n Ra = Rb + n + C + 9 SBC a, b, n Ra = Rb - n - C +10 UMUL a, b, n Ra = Rb * n (unsigned multiply) + +INFO is 0x0000053 on the 2018 FPGA RISC5 implementation. + +1a. Flag Bits +------------- + +N and Z are set on *any* register write (arithmetic or load) + +C and V are set on integer ADD or SUB operations. + + +2. Memory Instructions (F2) +--------------------------- + 4 4 4 20 + +------+-------+-------+-------------------------------------+ +F2 | 10uv | a | b | off | + +------+-------+-------+-------------------------------------+ + u=0: load, u=1: store v=0: word, v=1: byte + + LD a, b, off Ra = Mem[Rb + off] + ST a, b, off Mem[Rb + off] = Ra + +3. Branch Instructions (F3) +--------------------------- + 4 4 4 4 + +------+-------+-----------------------------+-------+-------+ +F3 | 110v | cond | | 0000 | c | + +------+-------+-----------------------------+-------+-------+ + 4 4 24 + +------+-------+---------------------------------------------+ +F3 | 111v | cond | off | + +------+-------+---------------------------------------------+ + v=0: no link, v=1: link + + B<cond> c PC = Rc (low two bits 0'd) + BL<cond> c R15 = PC + 4, PC = Rc (low two bits 0'd) + B<cond> off PC = PC + 4 + off * 4 + BL<cond> off R15 = PC + 4, PC = PC + 4 + off * 4 + +0000 MI negative (minus) N 1000 PL positive ~N +0001 EQ equal (zero) Z 1001 NE not equal ~Z +0010 CS carry set C 1010 CC carry clear ~C +0011 VS overflow set V 1011 VC overflow clear ~V +0100 LS less or same C|Z 1100 HI high ~(C|Z) +0101 LT less than (N^V) 1101 GE greater or equal ~(N^V) +0110 LE less or equal (N^V)|Z 1110 GT greater than ~(N^V)|Z +0111 always T 1111 never F + + +4. Interrupts (Special F3 Encodings) +------------------------------------ + 4 4 4 4 + +------+-------+-----------------------------+-------+-------+ +F3 | 1100 | 0111 | | 0001 | xxxx | + +------+-------+-----------------------------+-------+-------+ + RTI Return from IRQ, restoring PC and Flags + + 4 4 4 4 + +------+-------+-----------------------------+-------+-------+ +F3 | 1100 | 1111 | | 0010 | 000e | + +------+-------+-----------------------------+-------+-------+ + STI set irq enable, allowing irqs (e=1) + CLI clear irq enable, masking irqs (e=0) + +On interrupt, the flags and PC are saved and execution continues +at address 0x00000004. + +On RTI, the flags and PC are restored. + +The H register is neither saved nor restored (MUL/DIV during an +irq handler is thus unsafe). diff --git a/src/fs.c b/src/fs.c @@ -0,0 +1,74 @@ +// Copyright 2020, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> + +#include "fs.h" + +int read_sector(int fd, uint32_t sector, void* buffer) { + if (sector % 29) { + fprintf(stderr, "invalid sector $%u\n", sector); + } + sector = (sector / 29) - 1; + if (pread(fd, buffer, 1024, 1024 * sector) != 1024) { + fprintf(stderr, "cannot read sector #%u\n", sector); + return -1; + } else { + return 0; + } +} + +char* fixfn(char* fn) { + for (int i = 0; i < 32; i++) { + if ((fn[i] < ' ') || (fn[i] > 127)) fn[i] = '.'; + } + fn[31] = 0; + return fn; +} + +int dump_dir(int fd, uint32_t sector) { + ofs_dir_page dp; + ofs_file f; + if (read_sector(fd, sector, &dp)) return -1; + if (dp.mark != OFS_DIR_MARK) { + fprintf(stderr, "dir page @%u bad mark 0x%08x\n", sector, dp.mark); + return -1; + } + printf(" m=%u p0=%u\n", dp.m, dp.p0); + if (dp.p0) dump_dir(fd, dp.p0); + for (int e = 0; e < 24; e++) { + printf("e[%02d] fn='%s' adr=%u p=%u\n", e, + dp.e[e].filename, dp.e[e].adr, dp.e[e].p); + if (dp.e[e].p) dump_dir(fd, dp.e[e].p); +#if 0 + if (read_sector(fd, dp.e[e].adr, &f)) return -1; + if (f.mark != OFS_FILE_MARK) { + fprintf(stderr, "file sector @%u bad mark 0x%08x\n", + dp.e[e].adr, f.mark); + return -1; + } + printf(" aleng=%u bleng=%u date=%u\n", + f.aleng, f.bleng, f.date); + printf(" sectors ["); + for (int s = 0; s < 64; s++) { + if (f.sector[s]) printf(" %u", f.sector[s]); + } + printf(" ]\n"); +#endif + } + return 0; +} + +int main(int argc, char** argv) { + int fd; + if (argc != 2) return -1; + if ((fd = open(argv[1], O_RDONLY)) < 0) { + fprintf(stderr, "cannot open '%s'\n", argv[1]); + return -1; + } + + dump_dir(fd, 29); +} diff --git a/src/fs.h b/src/fs.h @@ -0,0 +1,30 @@ +// Copyright 2020, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#include <stdint.h> + +#define OFS_FILE_MARK 0x9BA71D86 +#define OFS_DIR_MARK 0x9B1EA38D + +typedef struct { + uint32_t mark; + uint8_t filename[32]; + uint32_t aleng, bleng, date; + uint32_t extension[12]; + uint32_t sector[64]; + uint32_t data[672]; +} ofs_file; + +typedef struct { + uint8_t filename[32]; + uint32_t adr; + uint32_t p; +} ofs_dir_entry; + +typedef struct { + uint32_t mark; + uint32_t m; + uint32_t p0; + uint8_t pad[52]; + ofs_dir_entry e[24]; +} ofs_dir_page; diff --git a/src/r5dis.c b/src/r5dis.c @@ -0,0 +1,205 @@ +// Copyright 2020, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <fcntl.h> + +static const char* opname[16] = { + "MOV", "LSL", "ASR", "ROR", "AND", "ANN", "IOR", "XOR", + "ADD", "SUB", "MUL", "DIV", "FAD", "FSB", "FML", "FDV", +}; +const char* OP(uint32_t n) { return opname[n & 15]; } + +static const char* ccname[16] = { + "MI", "EQ", "CS", "VS", "LS", "LT", "LE", "", + "PL", "NE", "CC", "VC", "HI", "GE", "GT", "NV", +}; +const char* CC(uint32_t n) { return ccname[n & 15]; } + +static const char* regname[16] = { + "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", + "R8", "R9", "R10", "R11", "MT", "SB", "SP", "LR", +}; +const char* R(uint32_t n) { return regname[n & 15]; } + +void disasm(uint32_t pc, uint32_t ins) { + uint32_t t = ins >> 28; + uint32_t a = (ins >> 24) & 15; + uint32_t b = (ins >> 20) & 15; + uint32_t o = (ins >> 16) & 15; + uint32_t c = ins & 15; + uint32_t i16 = ins & 0xffff; + uint32_t i20 = ins & 0xfffff; + uint32_t i24 = ins & 0xffffff; + if (i16 & 0x8000) i16 |= 0xffff0000; + if (i20 & 0x80000) i20 |= 0xfff00000; + if (i24 & 0x800000) i24 |= 0xff000000; + + printf("%08x: %08x ", pc, ins); + switch (t) { + // --uv + case 0b0000: + case 0b0001: + case 0b0010: + case 0b0011: + if (o == 0) { // MOV ignores Rb + switch (t & 3) { + case 0b00: + case 0b01: + printf("MOV %s, %s\n", R(a), R(c)); + break; + case 0b10: + printf("MOV %s, H\n", R(a)); + break; + case 0b11: + printf("MOV %s, NZCF\n", R(a)); + break; + } + } else { + const char* op = OP(o); + switch (o) { // u-bit modifiers + case 8: if (t & 2) op = "ADC"; break; + case 9: if (t & 2) op = "SBC"; break; + case 10: if (t & 2) op = "UMUL"; break; + } + printf("%s %s, %s, %s\n", op, R(a), R(b), R(c)); + } + break; + case 0b0100: + case 0b0101: + case 0b0110: + case 0b0111: + if (o == 0) { + printf("MOV %s, %d\n", R(a), (t & 1) ? (i16 << 16) : i16); + } else { + const char* op = OP(o); + switch (o) { // u-bit modifiers + case 8: if (t & 2) op = "ADC"; break; + case 9: if (t & 2) op = "SBC"; break; + case 10: if (t & 2) op = "UMUL"; break; + } + printf("%s %s, %s, %d\n", op, R(a), R(b), i16); + } + break; + case 0b1000: + printf("LW %s, [%s, %d]\n", R(a), R(b), i20); + break; + case 0b1001: + printf("LB %s, [%s, %d]\n", R(a), R(b), i20); + break; + case 0b1010: + printf("SW %s, [%s, %d]\n", R(a), R(b), i20); + break; + case 0b1011: + printf("SB %s, [%s, %d]\n", R(a), R(b), i20); + break; + case 0b1100: + switch ((ins >> 4) & 15) { + case 0b0000: + printf("B%s %s\n", CC(a), R(c)); + break; + case 0b0001: + printf("RTI %s\n", R(c)); + break; + case 0b0010: + printf("%s\n", ins & 1 ? "STI" : "CLI"); + break; + default: + printf("??? %08x\n", ins); + break; + } + break; + case 0b1101: + printf("BL%s %s\n", CC(a), R(c)); + break; + case 0b1110: + printf("B%s %d\n", CC(a), i24); + break; + case 0b1111: + printf("BL%s %d\n", CC(a), i24); + break; + default: + printf("??? %08x\n", ins); + break; + } +} + +const char* readname(int fd) { + static char name[64]; + for (int n = 0; n < 64; n++) { + if (read(fd, name + n, 1) != 1) exit(1); + if (name[n] == 0) return name; + } + exit(1); + return NULL; +} + +uint32_t readint(int fd) { + uint32_t n; + if (read(fd, &n, 4) != 4) exit(1); + return n; +} + +uint32_t readbyte(int fd) { + uint8_t n; + if (read(fd, &n, 1) != 1) exit(1); + return n; +} + +uint32_t dishdr(int fd) { + const char* name = readname(fd); + uint32_t key = readint(fd); + uint32_t cls = readbyte(fd); + uint32_t size = readint(fd); + printf("[ name='%s', key=%08x, class=%02x, size=%u ]\n", + name, key, cls, size); + printf("[ imports:"); + for (;;) { + name = readname(fd); + if (name[0] == 0) break; + printf(" %s(%08x)", name, readint(fd)); + } + printf(" ]\n"); + // type descrs? + uint32_t n = readint(fd) / 4; + printf("[ typedesc=%u", n); + while (n > 0) { readint(fd); n--; } + // data? + n = readint(fd); + printf(", data=%08x", n); + // stringdata + n = readint(fd); + printf(", stringdata=%u", n); + while (n > 0) { readbyte(fd); n--; } + // instructions + n = readint(fd); + printf(", instructions=%u ]\n", n); + return n; +} + +int main(int argc, char** argv) { + int fd; + if (argc != 2) return -1; + if ((fd = open(argv[1], O_RDONLY)) < 0) return -1; + + uint32_t count; + count = strlen(argv[1]); + if ((count > 5) && (!strcmp(argv + count - 4, ".rsc"))) { + count = dishdr(fd); + } else { + count = 0xffffffff; + } + + uint32_t ins; + uint32_t pc = 0; + while (count > 0) { + count--; + if (read(fd, &ins, sizeof(ins)) != sizeof(ins)) break; + disasm(pc, ins); + pc += 4; + } + return 0; +}