compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit 7d28c912594987b5dc380f51b0647252e30a70ad
parent 0abd0555d53c600b54f1731d44ceda56eebe4a48
Author: Brian Swetland <swetland@frotz.net>
Date:   Wed, 11 Mar 2020 00:12:45 -0700

new fibonacci test, new codegen bugfixes

- increase max emulator cycles quite a bit
- reserve stack space for spilling tmp regs
- save/restore tmp regs around function calls
- shuffle r0 (func return) into a tmp reg immediately
  to avoid stepping on it in the case of back-to-back
  calls like: fib(n-1) + fib(n-2)
- track the total stack size needed for the current func
- patch the stack reservation in the prologue to reflect
  this total (which may grow due to locals, etc) when
  emitting the epilogue
- add some notes about nonoptimal codegen patterns
- ensure we un-reserve tmp regs if we discard function
  results (via new gen_discard())
- backend gains tmp_reg_{count,first,last}
- add ctx.last_target_pc to track fixed-up forward jumps
- use this to re-enable elide-branch-to-next optimization
  (when it's safe to do so)

Diffstat:
Mdocs/notebook.md | 34++++++++++++++++++++++++++++++++++
Msrc/compiler.c | 102+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Msrc/r5e.c | 2+-
Atest/1025-fibonacci.log | 31+++++++++++++++++++++++++++++++
Atest/1025-fibonacci.src | 20++++++++++++++++++++
5 files changed, 173 insertions(+), 16 deletions(-)

diff --git a/docs/notebook.md b/docs/notebook.md @@ -2,6 +2,40 @@ A place to jot down ideas and/or commentary about the work in progress. +### 2020 Mar 10 - Non-optimal codegen + +When a break / continue / return takes control out of a scope, we're +not smart enough to discard stuff like the jump-around-else emitted +at the end of an if block, resulting in a dead branch op: + +```text + if (n < 2) { + return n; + +00000020: 80e00004 ldw r0, [sp, 4] +00000024: e700000d b 0x5c +00000028: e700000c b 0x5c +``` + +We move return values from r0 to a temp register to cover cases where +we may immediately need r0 again (another call, etc) but that means +extra instructions and copies in other cases like `return fib(n - 1) + fib(n - 2);` + +```text +0000002c: 88e00004 ldw r8, [sp, 4] # get n +00000030: 40890001 sub r0, r8, 1 # r0 = n - 1 +00000034: f7fffff3 bl 0x4 # r0 = fib(r0) +00000038: 08000000 mov r8, r0 # r0 -> tmp0 +0000003c: 89e00004 ldw r9, [sp, 4] # get n +00000040: 40990002 sub r0, r9, 2 # r0 = n - 2 +00000044: a8e00008 stw r8, [sp, 8] # save tmp0 +00000048: f7ffffee bl 0x4 # r0 = fib(r0) +0000004c: 88e00008 ldw r8, [sp, 8] # restore tmp0 +00000050: 09000000 mov r9, r0 # r0 -> tmp1 +00000054: 00880009 add r0, r8, r9 # r0 = tmp0 + tmp1 +00000058: e7000000 b 0x5c # jump to epilogue (return) +``` + ### 2020 Mar 09 - Simplifying the Code Up til now I had been passing around a "compiler context" pointer, so diff --git a/src/compiler.c b/src/compiler.c @@ -233,7 +233,10 @@ struct CtxRec { Object typetab; // TODO: hashtable Scope scope; // scope stack ScopeRec global; + Object fn; // function being compiled if non-nil + u32 spill_stack; // where to spill temp regs (TODO: dynamic) + u32 local_stack; // how much stack we use (adjusted for vars, etc) Type type_void; Type type_byte; @@ -245,6 +248,11 @@ struct CtxRec { u32 code[8192]; u32 pc; + // The latest pc that a forward branch has been + // fixed up to. cannot safely move code at or + // before this address. + u32 last_target_pc; + u32 regbits; u32 xref[8192]; @@ -274,6 +282,9 @@ void gen_unary_op(u32 op, Item x); // stores val into var, consuming both void gen_store(Item val, Item var); +// release any registers or other resources held by this item +void gen_discard(Item val); + // sets up call param #n, consuming val void gen_param(u32 n, Item val); @@ -1214,6 +1225,8 @@ void parse_block() { ItemRec y; set_item(&y, iConst, ctx.type_int32, 0, 1, 0); next(); + } else { + gen_discard(&x); } require(tSEMI); } @@ -1407,12 +1420,15 @@ void parse_program() { // ================================================================ +#define tmp_reg_count 4 +#define tmp_reg_first 8 +#define tmp_reg_last 11 + u32 get_reg_tmp() { - u32 n = 8; - while (n < 12) { + u32 n = tmp_reg_first; + while (n <= tmp_reg_last) { if (!(ctx.regbits & (1 << n))) { ctx.regbits |= (1 << n); - //printf("GET REG %u\n", n); return n; } n++; @@ -1422,8 +1438,7 @@ u32 get_reg_tmp() { } void put_reg(u32 r) { - //printf("PUT REG %u\n", r); - if (r < 8) { + if ((r < tmp_reg_first) || (r > tmp_reg_last)) { // currently we don't strictly track r0..r7 // they are used for function calls and returns return; @@ -1575,6 +1590,12 @@ void gen_load_reg(Item x, u32 r) { x->r = r; } +void gen_discard(Item x) { + if (x->kind == iReg) { + put_reg(x->r); + } +} + // convert an item to value-in-register format // if it's not already in that format void gen_load(Item x) { @@ -1652,8 +1673,34 @@ void gen_builtin(u32 id) { } } +void gen_save_regs() { + u32 n = tmp_reg_first; + u32 off = ctx.spill_stack; + while (n <= tmp_reg_last) { + if (ctx.regbits & (1 << n)) { + emit_mem(STW, n, SP, off); + off += 4; + } + n++; + } +} + +void gen_restore_regs() { + u32 n = tmp_reg_first; + u32 off = ctx.spill_stack; + while (n <= tmp_reg_last) { + if (ctx.regbits & (1 << n)) { + emit_mem(LDW, n, SP, off); + off += 4; + } + n++; + } +} + void gen_call(Item x) { + gen_save_regs(); if (x->type->obj->flags & ofBuiltin) { + // OPT: not all builtins will require save/restore regs gen_builtin(x->type->obj->value); } else if (x->type->obj->flags & ofDefined) { u32 fnpc = x->type->obj->value; @@ -1662,14 +1709,19 @@ void gen_call(Item x) { emit_bi(AL|L, 0); add_object_fixup(x->type->obj); } + gen_restore_regs(); + // item becomes the return value x->type = x->type->base; if (x->type == ctx.type_void) { x->kind = iConst; } else { x->kind = iReg; + x->r = R0; + // OPT if we tracked r0 usage we could + // avoid moving from R0 to a tmp reg here + gen_load_reg(x, get_reg_tmp()); } - x->r = R0; x->a = 0; x->b = 0; } @@ -1756,6 +1808,8 @@ void gen_mul_op(u32 op, Item x, Item y) { void gen_rel_op(u32 op, Item x, Item y) { gen_load(x); gen_load(y); + + // fuse x and y items into the new Comparison Item x x->kind = iComp; x->a = x->r; x->b = y->r; @@ -1789,22 +1843,25 @@ void gen_unary_op(u32 op, Item x) { // patch branch instruction at addr to // branch to current pc void fixup_branch_fwd(u32 addr) { -#if 0 - // disabled for now as this gets tripped up - // by stuff like test/1030-flow-control.src (if (n==3) ...) - if (addr == ctx.pc - 4) { + if ((addr == ctx.pc - 4) && (ctx.last_target_pc < ctx.pc)) { // if the branch to be patched is the // instruction just previously emitted, we // can simply erase it + // + // provided it's safe to do so (no branches are + // already arriving here...) ctx.pc -= 4; fprintf(stderr, "DELETED BRANCH @ 0x%x\n", ctx.pc); - } else -#endif - { + } else { u32 off = (ctx.pc - addr - 4) >> 2; u32 ins = ctx.code[addr >> 2] & 0xFF000000; ctx.code[addr >> 2] = ins | (off & 0x00FFFFFF); } + + // note that we have forward branches to this pc + // (to prevent optimizations from moving code + // behind them) + ctx.last_target_pc = ctx.pc; } void fixup_branches_fwd(Fixup fixup) { @@ -1820,17 +1877,32 @@ void gen_prologue(Object fn) { emit_mem(STW, LR, SP, 0); Object param = fn->first; + u32 off = 4; u32 r = 0; while (param != nil) { - emit_mem(STW, r, SP, (r + 1) * 4); + emit_mem(STW, r, SP, off); r++; + off += 4; param = param->next; } + // set aside space to spill temporary regs + // OPT: would be nice to only reserve space we need + ctx.spill_stack = off; + ctx.local_stack = off + 4 * tmp_reg_count; } void gen_epilogue(Object fn) { + if (ctx.local_stack > 0xFFFF) { + error("using too much stack"); + } + + // patch prologue + u32 ins = ctx.code[fn->value / 4]; + ctx.code[fn->value / 4] = (ins & 0xFFFF0000) | ctx.local_stack; + + // emit epilogue emit_mem(LDW, LR, SP, 0); - emit_opi(ADD, SP, SP, 4 + fn->type->len * 4); + emit_opi(ADD, SP, SP, ctx.local_stack); emit_br(AL, LR); } diff --git a/src/r5e.c b/src/r5e.c @@ -89,6 +89,6 @@ int main(int argc, char** argv) { // set SP risc_set_register(r, 14, sp); - risc_run(r, 100000); + risc_run(r, 100000000); return 0; } diff --git a/test/1025-fibonacci.log b/test/1025-fibonacci.log @@ -0,0 +1,31 @@ +D 00000000 +D 00000001 +D 00000001 +D 00000002 +D 00000003 +D 00000005 +D 00000008 +D 0000000d +D 00000015 +D 00000022 +D 00000037 +D 00000059 +D 00000090 +D 000000e9 +D 00000179 +D 00000262 +D 000003db +D 0000063d +D 00000a18 +D 00001055 +D 00001a6d +D 00002ac2 +D 0000452f +D 00006ff1 +D 0000b520 +D 00012511 +D 0001da31 +D 0002ff42 +D 0004d973 +D 0007d8b5 +X 00000007 diff --git a/test/1025-fibonacci.src b/test/1025-fibonacci.src @@ -0,0 +1,20 @@ + +func fib(n i32) i32 { + if (n < 2) { + return n; + } else { + return fib(n - 1) + fib(n - 2); + } +} + +func test(n i32, end i32) { + while (n < end) { + _hexout_(fib(n)); + n = n + 1; + } +} + +func start() i32 { + test(0, 30); + return 7; +}