new fibonacci test, new codegen bugfixes - compiler - Unnamed Compiled Systems Language Project

commit 7d28c912594987b5dc380f51b0647252e30a70ad
parent 0abd0555d53c600b54f1731d44ceda56eebe4a48
Author: Brian Swetland <swetland@frotz.net>
Date:   Wed, 11 Mar 2020 00:12:45 -0700

new fibonacci test, new codegen bugfixes

- increase max emulator cycles quite a bit
- reserve stack space for spilling tmp regs
- save/restore tmp regs around function calls
- shuffle r0 (func return) into a tmp reg immediately
  to avoid stepping on it in the case of back-to-back
  calls like: fib(n-1) + fib(n-2)
- track the total stack size needed for the current func
- patch the stack reservation in the prologue to reflect
  this total (which may grow due to locals, etc) when
  emitting the epilogue
- add some notes about nonoptimal codegen patterns
- ensure we un-reserve tmp regs if we discard function
  results (via new gen_discard())
- backend gains tmp_reg_{count,first,last}
- add ctx.last_target_pc to track fixed-up forward jumps
- use this to re-enable elide-branch-to-next optimization
  (when it's safe to do so)

Diffstat:
M docs/notebook.md  | 34 ++++++++++++++++++++++++++++++++++
M src/compiler.c  | 102 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
M src/r5e.c  | 2 +-
A test/1025-fibonacci.log  | 31 +++++++++++++++++++++++++++++++
A test/1025-fibonacci.src  | 20 ++++++++++++++++++++

5 files changed, 173 insertions(+), 16 deletions(-)
diff --git a/docs/notebook.md b/docs/notebook.md
@@ -2,6 +2,40 @@
 
 A place to jot down ideas and/or commentary about the work in progress.
 
+### 2020 Mar 10 - Non-optimal codegen
+
+When a break / continue / return takes control out of a scope, we're
+not smart enough to discard stuff like the jump-around-else emitted
+at the end of an if block, resulting in a dead branch op:
+
+```text
+	if (n < 2) {
+		return n;
+
+00000020: 80e00004  ldw r0, [sp, 4]
+00000024: e700000d  b 0x5c
+00000028: e700000c  b 0x5c
+```
+
+We move return values from r0 to a temp register to cover cases where
+we may immediately need r0 again (another call, etc) but that means
+extra instructions and copies in other cases like `return fib(n - 1) + fib(n - 2);`
+
+```text
+0000002c: 88e00004  ldw r8, [sp, 4]   # get n
+00000030: 40890001  sub r0, r8, 1     # r0 = n - 1
+00000034: f7fffff3  bl 0x4            # r0 = fib(r0)
+00000038: 08000000  mov r8, r0        # r0 -> tmp0
+0000003c: 89e00004  ldw r9, [sp, 4]   # get n
+00000040: 40990002  sub r0, r9, 2     # r0 = n - 2
+00000044: a8e00008  stw r8, [sp, 8]   # save tmp0
+00000048: f7ffffee  bl 0x4            # r0 = fib(r0)
+0000004c: 88e00008  ldw r8, [sp, 8]   # restore tmp0
+00000050: 09000000  mov r9, r0        # r0 -> tmp1
+00000054: 00880009  add r0, r8, r9    # r0 = tmp0 + tmp1
+00000058: e7000000  b 0x5c            # jump to epilogue (return)
+```
+
 ### 2020 Mar 09 - Simplifying the Code
 
 Up til now I had been passing around a "compiler context" pointer, so
diff --git a/src/compiler.c b/src/compiler.c
@@ -233,7 +233,10 @@ struct CtxRec {
 	Object typetab;        // TODO: hashtable
 	Scope scope;           // scope stack
 	ScopeRec global;
+
 	Object fn;             // function being compiled if non-nil
+	u32 spill_stack;       // where to spill temp regs (TODO: dynamic)
+	u32 local_stack;       // how much stack we use (adjusted for vars, etc)
 
 	Type type_void;
 	Type type_byte;
@@ -245,6 +248,11 @@ struct CtxRec {
 	u32 code[8192];
 	u32 pc;
 
+	// The latest pc that a forward branch has been
+	// fixed up to.  cannot safely move code at or
+	// before this address.
+	u32 last_target_pc;
+
 	u32 regbits;
 
 	u32 xref[8192];
@@ -274,6 +282,9 @@ void gen_unary_op(u32 op, Item x);
 // stores val into var, consuming both
 void gen_store(Item val, Item var);
 
+// release any registers or other resources held by this item
+void gen_discard(Item val);
+
 // sets up call param #n, consuming val
 void gen_param(u32 n, Item val);
 
@@ -1214,6 +1225,8 @@ void parse_block() {
 				ItemRec y;
 				set_item(&y, iConst, ctx.type_int32, 0, 1, 0);
 				next();
+			} else {
+				gen_discard(&x);
 			}
 			require(tSEMI);
 		}
@@ -1407,12 +1420,15 @@ void parse_program() {
 
 // ================================================================
 
+#define tmp_reg_count  4
+#define tmp_reg_first  8
+#define tmp_reg_last  11
+
 u32 get_reg_tmp() {
-	u32 n = 8;
-	while (n < 12) {
+	u32 n = tmp_reg_first;
+	while (n <= tmp_reg_last) {
 		if (!(ctx.regbits & (1 << n))) {
 			ctx.regbits |= (1 << n);
-			//printf("GET REG %u\n", n);
 			return n;
 		}
 		n++;
@@ -1422,8 +1438,7 @@ u32 get_reg_tmp() {
 }
 
 void put_reg(u32 r) {
-	//printf("PUT REG %u\n", r);
-	if (r < 8) {
+	if ((r < tmp_reg_first) || (r > tmp_reg_last)) {
 		// currently we don't strictly track r0..r7
 		// they are used for function calls and returns
 		return;
@@ -1575,6 +1590,12 @@ void gen_load_reg(Item x, u32 r) {
 	x->r = r;
 }
 
+void gen_discard(Item x) {
+	if (x->kind == iReg) {
+		put_reg(x->r);
+	}
+}
+
 // convert an item to value-in-register format
 // if it's not already in that format
 void gen_load(Item x) {
@@ -1652,8 +1673,34 @@ void gen_builtin(u32 id) {
 	}
 }
 
+void gen_save_regs() {
+	u32 n = tmp_reg_first;
+	u32 off = ctx.spill_stack;
+	while (n <= tmp_reg_last) {
+		if (ctx.regbits & (1 << n)) {
+			emit_mem(STW, n, SP, off);
+			off += 4;
+		}
+		n++;
+	}
+}
+
+void gen_restore_regs() {
+	u32 n = tmp_reg_first;
+	u32 off = ctx.spill_stack;
+	while (n <= tmp_reg_last) {
+		if (ctx.regbits & (1 << n)) {
+			emit_mem(LDW, n, SP, off);
+			off += 4;
+		}
+		n++;
+	}
+}
+
 void gen_call(Item x) {
+	gen_save_regs();
 	if (x->type->obj->flags & ofBuiltin) {
+		// OPT: not all builtins will require save/restore regs
 		gen_builtin(x->type->obj->value);
 	} else if (x->type->obj->flags & ofDefined) {
 		u32 fnpc = x->type->obj->value;
@@ -1662,14 +1709,19 @@ void gen_call(Item x) {
 		emit_bi(AL|L, 0);
 		add_object_fixup(x->type->obj);
 	}
+	gen_restore_regs();
+
 	// item becomes the return value
 	x->type = x->type->base;
 	if (x->type == ctx.type_void) {
 		x->kind = iConst;
 	} else {
 		x->kind = iReg;
+		x->r = R0;
+		// OPT if we tracked r0 usage we could
+		// avoid moving from R0 to a tmp reg here
+		gen_load_reg(x, get_reg_tmp());
 	}
-	x->r = R0;
 	x->a = 0;
 	x->b = 0;
 }
@@ -1756,6 +1808,8 @@ void gen_mul_op(u32 op, Item x, Item y) {
 void gen_rel_op(u32 op, Item x, Item y) {
 	gen_load(x);
 	gen_load(y);
+
+	// fuse x and y items into the new Comparison Item x
 	x->kind = iComp;
 	x->a = x->r;
 	x->b = y->r;
@@ -1789,22 +1843,25 @@ void gen_unary_op(u32 op, Item x) {
 // patch branch instruction at addr to 
 // branch to current pc
 void fixup_branch_fwd(u32 addr) {
-#if 0
-	// disabled for now as this gets tripped up
-	// by stuff like test/1030-flow-control.src (if (n==3) ...)
-	if (addr == ctx.pc - 4) {
+	if ((addr == ctx.pc - 4) && (ctx.last_target_pc < ctx.pc)) {
 		// if the branch to be patched is the
 		// instruction just previously emitted, we
 		// can simply erase it
+		//
+		// provided it's safe to do so (no branches are
+		// already arriving here...)
 		ctx.pc -= 4;
 		fprintf(stderr, "DELETED BRANCH @ 0x%x\n", ctx.pc);
-	} else
-#endif
-       	{
+	} else {
 		u32 off = (ctx.pc - addr - 4) >> 2;
 		u32 ins = ctx.code[addr >> 2] & 0xFF000000;
 		ctx.code[addr >> 2] = ins | (off & 0x00FFFFFF);
 	}
+
+	// note that we have forward branches to this pc
+	// (to prevent optimizations from moving code
+	// behind them)
+	ctx.last_target_pc = ctx.pc;
 }
 
 void fixup_branches_fwd(Fixup fixup) {
@@ -1820,17 +1877,32 @@ void gen_prologue(Object fn) {
 	emit_mem(STW, LR, SP, 0);
 
 	Object param = fn->first;
+	u32 off = 4;
 	u32 r = 0;
 	while (param != nil) {
-		emit_mem(STW, r, SP, (r + 1) * 4);
+		emit_mem(STW, r, SP, off);
 		r++;
+		off += 4;
 		param = param->next;
 	}
+	// set aside space to spill temporary regs
+	// OPT: would be nice to only reserve space we need
+	ctx.spill_stack = off;
+	ctx.local_stack = off + 4 * tmp_reg_count;
 }
 
 void gen_epilogue(Object fn) {
+	if (ctx.local_stack > 0xFFFF) {
+		error("using too much stack");
+	}
+
+	// patch prologue
+	u32 ins = ctx.code[fn->value / 4];
+	ctx.code[fn->value / 4] = (ins & 0xFFFF0000) | ctx.local_stack;
+
+	// emit epilogue
 	emit_mem(LDW, LR, SP, 0);
-	emit_opi(ADD, SP, SP, 4 + fn->type->len * 4);
+	emit_opi(ADD, SP, SP, ctx.local_stack);
 	emit_br(AL, LR);
 }
 
diff --git a/src/r5e.c b/src/r5e.c
@@ -89,6 +89,6 @@ int main(int argc, char** argv) {
 	// set SP
 	risc_set_register(r, 14, sp);
 
-	risc_run(r, 100000);
+	risc_run(r, 100000000);
 	return 0;
 }
diff --git a/test/1025-fibonacci.log b/test/1025-fibonacci.log
@@ -0,0 +1,31 @@
+D 00000000
+D 00000001
+D 00000001
+D 00000002
+D 00000003
+D 00000005
+D 00000008
+D 0000000d
+D 00000015
+D 00000022
+D 00000037
+D 00000059
+D 00000090
+D 000000e9
+D 00000179
+D 00000262
+D 000003db
+D 0000063d
+D 00000a18
+D 00001055
+D 00001a6d
+D 00002ac2
+D 0000452f
+D 00006ff1
+D 0000b520
+D 00012511
+D 0001da31
+D 0002ff42
+D 0004d973
+D 0007d8b5
+X 00000007
diff --git a/test/1025-fibonacci.src b/test/1025-fibonacci.src
@@ -0,0 +1,20 @@
+
+func fib(n i32) i32 {
+	if (n < 2) {
+		return n;
+	} else {
+		return fib(n - 1) + fib(n - 2);
+	}
+}
+
+func test(n i32, end i32) {
+	while (n < end) {
+		_hexout_(fib(n));
+		n = n + 1;
+	}
+}
+	
+func start() i32 {
+	test(0, 30);
+	return 7;
+}

	compiler Unnamed Compiled Systems Language Project
	git clone http://frotz.net/git/compiler.git
	Log \| Files \| Refs

M	docs/notebook.md	\|	34	++++++++++++++++++++++++++++++++++
M	src/compiler.c	\|	102	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
M	src/r5e.c	\|	2	+-
A	test/1025-fibonacci.log	\|	31	+++++++++++++++++++++++++++++++
A	test/1025-fibonacci.src	\|	20	++++++++++++++++++++