commit 7d28c912594987b5dc380f51b0647252e30a70ad
parent 0abd0555d53c600b54f1731d44ceda56eebe4a48
Author: Brian Swetland <swetland@frotz.net>
Date: Wed, 11 Mar 2020 00:12:45 -0700
new fibonacci test, new codegen bugfixes
- increase max emulator cycles quite a bit
- reserve stack space for spilling tmp regs
- save/restore tmp regs around function calls
- shuffle r0 (func return) into a tmp reg immediately
to avoid stepping on it in the case of back-to-back
calls like: fib(n-1) + fib(n-2)
- track the total stack size needed for the current func
- patch the stack reservation in the prologue to reflect
this total (which may grow due to locals, etc) when
emitting the epilogue
- add some notes about nonoptimal codegen patterns
- ensure we un-reserve tmp regs if we discard function
results (via new gen_discard())
- backend gains tmp_reg_{count,first,last}
- add ctx.last_target_pc to track fixed-up forward jumps
- use this to re-enable elide-branch-to-next optimization
(when it's safe to do so)
Diffstat:
5 files changed, 173 insertions(+), 16 deletions(-)
diff --git a/docs/notebook.md b/docs/notebook.md
@@ -2,6 +2,40 @@
A place to jot down ideas and/or commentary about the work in progress.
+### 2020 Mar 10 - Non-optimal codegen
+
+When a break / continue / return takes control out of a scope, we're
+not smart enough to discard stuff like the jump-around-else emitted
+at the end of an if block, resulting in a dead branch op:
+
+```text
+ if (n < 2) {
+ return n;
+
+00000020: 80e00004 ldw r0, [sp, 4]
+00000024: e700000d b 0x5c
+00000028: e700000c b 0x5c
+```
+
+We move return values from r0 to a temp register to cover cases where
+we may immediately need r0 again (another call, etc) but that means
+extra instructions and copies in other cases like `return fib(n - 1) + fib(n - 2);`
+
+```text
+0000002c: 88e00004 ldw r8, [sp, 4] # get n
+00000030: 40890001 sub r0, r8, 1 # r0 = n - 1
+00000034: f7fffff3 bl 0x4 # r0 = fib(r0)
+00000038: 08000000 mov r8, r0 # r0 -> tmp0
+0000003c: 89e00004 ldw r9, [sp, 4] # get n
+00000040: 40990002 sub r0, r9, 2 # r0 = n - 2
+00000044: a8e00008 stw r8, [sp, 8] # save tmp0
+00000048: f7ffffee bl 0x4 # r0 = fib(r0)
+0000004c: 88e00008 ldw r8, [sp, 8] # restore tmp0
+00000050: 09000000 mov r9, r0 # r0 -> tmp1
+00000054: 00880009 add r0, r8, r9 # r0 = tmp0 + tmp1
+00000058: e7000000 b 0x5c # jump to epilogue (return)
+```
+
### 2020 Mar 09 - Simplifying the Code
Up til now I had been passing around a "compiler context" pointer, so
diff --git a/src/compiler.c b/src/compiler.c
@@ -233,7 +233,10 @@ struct CtxRec {
Object typetab; // TODO: hashtable
Scope scope; // scope stack
ScopeRec global;
+
Object fn; // function being compiled if non-nil
+ u32 spill_stack; // where to spill temp regs (TODO: dynamic)
+ u32 local_stack; // how much stack we use (adjusted for vars, etc)
Type type_void;
Type type_byte;
@@ -245,6 +248,11 @@ struct CtxRec {
u32 code[8192];
u32 pc;
+ // The latest pc that a forward branch has been
+ // fixed up to. cannot safely move code at or
+ // before this address.
+ u32 last_target_pc;
+
u32 regbits;
u32 xref[8192];
@@ -274,6 +282,9 @@ void gen_unary_op(u32 op, Item x);
// stores val into var, consuming both
void gen_store(Item val, Item var);
+// release any registers or other resources held by this item
+void gen_discard(Item val);
+
// sets up call param #n, consuming val
void gen_param(u32 n, Item val);
@@ -1214,6 +1225,8 @@ void parse_block() {
ItemRec y;
set_item(&y, iConst, ctx.type_int32, 0, 1, 0);
next();
+ } else {
+ gen_discard(&x);
}
require(tSEMI);
}
@@ -1407,12 +1420,15 @@ void parse_program() {
// ================================================================
+#define tmp_reg_count 4
+#define tmp_reg_first 8
+#define tmp_reg_last 11
+
u32 get_reg_tmp() {
- u32 n = 8;
- while (n < 12) {
+ u32 n = tmp_reg_first;
+ while (n <= tmp_reg_last) {
if (!(ctx.regbits & (1 << n))) {
ctx.regbits |= (1 << n);
- //printf("GET REG %u\n", n);
return n;
}
n++;
@@ -1422,8 +1438,7 @@ u32 get_reg_tmp() {
}
void put_reg(u32 r) {
- //printf("PUT REG %u\n", r);
- if (r < 8) {
+ if ((r < tmp_reg_first) || (r > tmp_reg_last)) {
// currently we don't strictly track r0..r7
// they are used for function calls and returns
return;
@@ -1575,6 +1590,12 @@ void gen_load_reg(Item x, u32 r) {
x->r = r;
}
+void gen_discard(Item x) {
+ if (x->kind == iReg) {
+ put_reg(x->r);
+ }
+}
+
// convert an item to value-in-register format
// if it's not already in that format
void gen_load(Item x) {
@@ -1652,8 +1673,34 @@ void gen_builtin(u32 id) {
}
}
+void gen_save_regs() {
+ u32 n = tmp_reg_first;
+ u32 off = ctx.spill_stack;
+ while (n <= tmp_reg_last) {
+ if (ctx.regbits & (1 << n)) {
+ emit_mem(STW, n, SP, off);
+ off += 4;
+ }
+ n++;
+ }
+}
+
+void gen_restore_regs() {
+ u32 n = tmp_reg_first;
+ u32 off = ctx.spill_stack;
+ while (n <= tmp_reg_last) {
+ if (ctx.regbits & (1 << n)) {
+ emit_mem(LDW, n, SP, off);
+ off += 4;
+ }
+ n++;
+ }
+}
+
void gen_call(Item x) {
+ gen_save_regs();
if (x->type->obj->flags & ofBuiltin) {
+ // OPT: not all builtins will require save/restore regs
gen_builtin(x->type->obj->value);
} else if (x->type->obj->flags & ofDefined) {
u32 fnpc = x->type->obj->value;
@@ -1662,14 +1709,19 @@ void gen_call(Item x) {
emit_bi(AL|L, 0);
add_object_fixup(x->type->obj);
}
+ gen_restore_regs();
+
// item becomes the return value
x->type = x->type->base;
if (x->type == ctx.type_void) {
x->kind = iConst;
} else {
x->kind = iReg;
+ x->r = R0;
+ // OPT if we tracked r0 usage we could
+ // avoid moving from R0 to a tmp reg here
+ gen_load_reg(x, get_reg_tmp());
}
- x->r = R0;
x->a = 0;
x->b = 0;
}
@@ -1756,6 +1808,8 @@ void gen_mul_op(u32 op, Item x, Item y) {
void gen_rel_op(u32 op, Item x, Item y) {
gen_load(x);
gen_load(y);
+
+ // fuse x and y items into the new Comparison Item x
x->kind = iComp;
x->a = x->r;
x->b = y->r;
@@ -1789,22 +1843,25 @@ void gen_unary_op(u32 op, Item x) {
// patch branch instruction at addr to
// branch to current pc
void fixup_branch_fwd(u32 addr) {
-#if 0
- // disabled for now as this gets tripped up
- // by stuff like test/1030-flow-control.src (if (n==3) ...)
- if (addr == ctx.pc - 4) {
+ if ((addr == ctx.pc - 4) && (ctx.last_target_pc < ctx.pc)) {
// if the branch to be patched is the
// instruction just previously emitted, we
// can simply erase it
+ //
+ // provided it's safe to do so (no branches are
+ // already arriving here...)
ctx.pc -= 4;
fprintf(stderr, "DELETED BRANCH @ 0x%x\n", ctx.pc);
- } else
-#endif
- {
+ } else {
u32 off = (ctx.pc - addr - 4) >> 2;
u32 ins = ctx.code[addr >> 2] & 0xFF000000;
ctx.code[addr >> 2] = ins | (off & 0x00FFFFFF);
}
+
+ // note that we have forward branches to this pc
+ // (to prevent optimizations from moving code
+ // behind them)
+ ctx.last_target_pc = ctx.pc;
}
void fixup_branches_fwd(Fixup fixup) {
@@ -1820,17 +1877,32 @@ void gen_prologue(Object fn) {
emit_mem(STW, LR, SP, 0);
Object param = fn->first;
+ u32 off = 4;
u32 r = 0;
while (param != nil) {
- emit_mem(STW, r, SP, (r + 1) * 4);
+ emit_mem(STW, r, SP, off);
r++;
+ off += 4;
param = param->next;
}
+ // set aside space to spill temporary regs
+ // OPT: would be nice to only reserve space we need
+ ctx.spill_stack = off;
+ ctx.local_stack = off + 4 * tmp_reg_count;
}
void gen_epilogue(Object fn) {
+ if (ctx.local_stack > 0xFFFF) {
+ error("using too much stack");
+ }
+
+ // patch prologue
+ u32 ins = ctx.code[fn->value / 4];
+ ctx.code[fn->value / 4] = (ins & 0xFFFF0000) | ctx.local_stack;
+
+ // emit epilogue
emit_mem(LDW, LR, SP, 0);
- emit_opi(ADD, SP, SP, 4 + fn->type->len * 4);
+ emit_opi(ADD, SP, SP, ctx.local_stack);
emit_br(AL, LR);
}
diff --git a/src/r5e.c b/src/r5e.c
@@ -89,6 +89,6 @@ int main(int argc, char** argv) {
// set SP
risc_set_register(r, 14, sp);
- risc_run(r, 100000);
+ risc_run(r, 100000000);
return 0;
}
diff --git a/test/1025-fibonacci.log b/test/1025-fibonacci.log
@@ -0,0 +1,31 @@
+D 00000000
+D 00000001
+D 00000001
+D 00000002
+D 00000003
+D 00000005
+D 00000008
+D 0000000d
+D 00000015
+D 00000022
+D 00000037
+D 00000059
+D 00000090
+D 000000e9
+D 00000179
+D 00000262
+D 000003db
+D 0000063d
+D 00000a18
+D 00001055
+D 00001a6d
+D 00002ac2
+D 0000452f
+D 00006ff1
+D 0000b520
+D 00012511
+D 0001da31
+D 0002ff42
+D 0004d973
+D 0007d8b5
+X 00000007
diff --git a/test/1025-fibonacci.src b/test/1025-fibonacci.src
@@ -0,0 +1,20 @@
+
+func fib(n i32) i32 {
+ if (n < 2) {
+ return n;
+ } else {
+ return fib(n - 1) + fib(n - 2);
+ }
+}
+
+func test(n i32, end i32) {
+ while (n < end) {
+ _hexout_(fib(n));
+ n = n + 1;
+ }
+}
+
+func start() i32 {
+ test(0, 30);
+ return 7;
+}