commit bfc333de1a5fa0062255bbe0e3bb1d348b33aed0
parent b80237b20cacb6453dd89e5b396bd43aa9425385
Author: Brian Swetland <swetland@frotz.net>
Date:   Fri, 10 Dec 2021 02:23:12 -0800
compiler2: disentangle lexer/parser/codegen a bit
- toss AST nodes BINOP and UNOP that stowed lexer tokens in ival
  in favor of distinct AST nodes for the various operations
- these nodes more closely track the operation names, avoid
  confusion between bitwise and binary ops, etc
- adjusted all places where BINOP/UNOP ival was used in favor
  of new distinct node types
- no more need for txnames table of printables for graph dumper
Diffstat:
2 files changed, 139 insertions(+), 141 deletions(-)
diff --git a/src/codegen-risc5-simple.c b/src/codegen-risc5-simple.c
@@ -149,7 +149,7 @@ void emit_bi(u32 op, u32 off) {
 
 u8 rel_op_to_cc_tab[6] = { EQ, NE, LT, LE, GT, GE };
 u32 add_op_to_ins_tab[4] = { ADD, SUB, IOR, XOR };
-u32 mul_op_to_ins_tab[7] = { MUL, DIV, MOD, AND, ANN, LSL, ASR };
+u32 mul_op_to_ins_tab[6] = { MUL, DIV, MOD, AND, LSL, ASR };
 
 // ------------------------------------------------------------------
 
@@ -400,7 +400,7 @@ u32 gen_relop(Ast node, u32 cc) {
 	return res;
 }
 
-u32 gen_logical_op(Ast node, u32 cc, u32 sc) {
+u32 gen_short_circuit_op(Ast node, u32 cc, u32 sc) {
 	u32 r = gen_expr(node->c0);
 	emit_mov(R11, r); // set z flag
 	put_reg(r);
@@ -459,11 +459,12 @@ u32 gen_expr(Ast node) {
 	err_ast = node;
 	gen_src_xref(node);
 	gen_trace("gen_expr()");
-	if (node->kind == AST_CONST) {
+	u32 kind = node->kind;
+	if (kind == AST_CONST) {
 		u32 r = get_reg_tmp();
 		emit_movi(r, node->ival);
 		return r;
-	} else if (node->kind == AST_SYMBOL) {
+	} else if (kind == AST_SYMBOL) {
 		u32 r = get_reg_tmp();
 		// XXX type checking here or before
 		if (node->sym->kind == SYM_CONST) {
@@ -475,42 +476,36 @@ u32 gen_expr(Ast node) {
 			emit_mem(LDW, r, base, offset);
 		}
 		return r;
-	} else if (node->kind == AST_BINOP) {
-		u32 op = node->ival;
-		if (op == tASSIGN) {
-			return gen_assign(node->c0, node->c1);
-		} else if ((op & tcMASK) == tcRELOP) {
-			return gen_relop(node, rel_op_to_cc_tab[op - tEQ]);
-		} else if ((op & tcMASK) == tcADDOP) {
-			return gen_binop(node, add_op_to_ins_tab[op - tPLUS]);
-		} else if ((op & tcMASK) == tcMULOP) {
-			return gen_binop(node, mul_op_to_ins_tab[op - tSTAR]);
-		} else if (op == tOR) {
-			return gen_logical_op(node, NE, 1);
-		} else if (op == tAND) {
-			return gen_logical_op(node, EQ, 0);
-		} else {
-			error("gen_expr cannot handle binop %s\n", tnames[op]);
-		}
-	} else if (node->kind == AST_UNOP) {
-		u32 op = node->ival;
+	} else if (ast_kind_is_relop(kind)) {
+		return gen_relop(node, rel_op_to_cc_tab[kind - AST_EQ]);
+	} else if (ast_kind_is_addop(kind)) {
+		return gen_binop(node, add_op_to_ins_tab[kind - AST_ADD]);
+	} else if (ast_kind_is_mulop(kind)) {
+		return gen_binop(node, mul_op_to_ins_tab[kind - AST_MUL]);
+	} else if (kind == AST_BOOL_OR) {
+		return gen_short_circuit_op(node, NE, 1);
+	} else if (kind == AST_BOOL_AND) {
+		return gen_short_circuit_op(node, EQ, 0);
+	} else if (kind == AST_ASSIGN) {
+		return gen_assign(node->c0, node->c1);
+	} else if (kind == AST_NEG) {
 		u32 r = gen_expr(node->c0);
-		if (op == tMINUS) {
-			emit_movi(R11, 0);
-			emit_op(SUB, r, R11, r);
-		} else if (op == tNOT) {
-			emit_opi(XOR, r, r, 0xffffffff);
-		} else if (op == tBANG) {
-			emit_opi(XOR, r, r, r);
-		} else {
-			error("gen_expr cannot handle unop %s\n", tnames[op]);
-		}
+		emit_movi(R11, 0);
+		emit_op(SUB, r, R11, r);
 		return r;
-	} else if (node->kind == AST_CALL) {
+	} else if (kind == AST_NOT) {
+		u32 r = gen_expr(node->c0);
+		emit_opi(XOR, r, r, 0xffffffff);
+		return r;
+	} else if (kind == AST_BOOL_NOT) {
+		u32 r = gen_expr(node->c0);
+		emit_opi(XOR, r, r, r);
+		return r;
+	} else if (kind == AST_CALL) {
 		return gen_call(node);
-	} else if (node->kind == AST_INDEX) {
+	} else if (kind == AST_INDEX) {
 		return gen_array_read(node);
-	} else if (node->kind == AST_FIELD) {
+	} else if (kind == AST_FIELD) {
 		return gen_struct_read(node);
 	} else {
 		error("gen_expr cannot handle %s\n", ast_kind[node->kind]);
diff --git a/src/compiler2.c b/src/compiler2.c
@@ -61,40 +61,66 @@ struct StringRec {
 };
 
 enum {
-// expression parts
-	AST_SYMBOL,
-	AST_CONST,
-	AST_STRING,
-	AST_BINOP,    // c0=EXPR c1=EXPR
-	AST_UNOP,     // c0=EXPR
-	AST_DEREF,    // c0=EXPR type: pointer-to-...
-	AST_INDEX,    // c0=EXPR type: array-of-...  c1=EXPR index
-	AST_FIELD,    // c0=EXPR type: struct        c1=SYMBOL field
-	AST_ADDROF,   // c0=EXPR type: lvalue
+// top node
+	AST_PROGRAM,  // c2=FUNC*
+// program components (chained into a list by c2)
+	AST_FUNC,     // c0=BLOCK
 // container of statements
 	AST_BLOCK,    // c0=STMT
 // statements (chained into a list by c2)
 	AST_EXPR,     // c0=EXPR
-	AST_CALL,     // c0=NAME c2=EXPR*
 	AST_WHILE,    // c0=EXPR c1=BLOCK
-	AST_IF,       // c0=IFELSE
-	AST_RETURN,   // c0=EXPR
 	AST_BREAK,
 	AST_CONTINUE,
+	AST_RETURN,   // c0=EXPR
+	AST_IF,       // c0=IFELSE
 // sub-part of if
 	AST_IFELSE,   // c0=EXPR c1=BLOCKthen c2=BLOCKelse|IFELSE
-// program components (chained into a list by c2)
-	AST_FUNC,     // c0=BLOCK
-// top node
-	AST_PROGRAM,  // c2=(TYPEDEF | ENUMDEF | FUNC | GLOBAL)*
+// expression parts
+	AST_SYMBOL,
+	AST_CONST,
+	AST_STRING,
+	AST_DEREF,    // c0=EXPR type: pointer-to-...
+	AST_INDEX,    // c0=EXPR type: array-of-...  c1=EXPR index
+	AST_FIELD,    // c0=EXPR type: struct        c1=SYMBOL field
+	AST_ADDROF,   // c0=EXPR type: lvalue
+	AST_CALL,     // c0=NAME c2=EXPR*
+	AST_ASSIGN,
+
+	// Rel Ops (maintain order matched w/ lexer)
+	AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE,
+	// Add Ops (maintain order matched w/ lexer)
+	AST_ADD, AST_SUB, AST_OR, AST_XOR,
+	// Mul Ops (maintain order matched w/ lexer)
+	AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR,
+	// uncategorized ops
+	AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG,
+	AST_KIND_COUNT
 };
 
-str ast_kind[AST_PROGRAM + 1] = {
-	"SYMBOL", "CONST", "STR", "BINOP", "UNOP",
-	"DEREF", "INDEX", "FIELD", "ADDROF",
-	"BLOCK", "EXPR", "CALL", "WHILE", "IF",
-	"RETURN", "BREAK", "CONTINUE", "IFELSE",
-	"FUNC", "PROGRAM",
+bool ast_kind_is_relop(u32 kind) {
+	return (kind >= AST_EQ) && (kind <= AST_GE);
+}
+bool ast_kind_is_addop(u32 kind) {
+	return (kind >= AST_ADD) && (kind <= AST_XOR);
+}
+bool ast_kind_is_mulop(u32 kind) {
+	return (kind >= AST_MUL) && (kind <= AST_LSR);
+}
+bool ast_kind_is_binop(u32 kind) {
+	return (kind >= AST_EQ) && (kind <= AST_LSR);
+}
+
+str ast_kind[AST_KIND_COUNT] = {
+	"PROGRAM", "FUNC",
+	"BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE",
+	"RETURN", "IF", "IFELSE",
+	"SYMBOL", "CONST", "STR",
+	"DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN",
+	"EQ", "NE", "LT", "LE", "GT", "GE",
+	"ADD", "SUB", "OR", "XOR",
+	"MUL", "DIV", "MOD", "AND", "LSL", "LSR",
+	"NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG",
 };
 
 struct AstRec {
@@ -298,15 +324,15 @@ Ast ast_make(ast_t kind, u32 ival, String name, Symbol sym, Type type) {
 	return a;
 }
 
-Ast ast_make_binop(u32 op, Ast left, Ast right) {
-	Ast node = ast_make(AST_BINOP, op, nil, nil, nil);
+Ast ast_make_binop(u32 kind, Ast left, Ast right) {
+	Ast node = ast_make(kind, 0, nil, nil, nil);
 	node->c0 = left;
 	node->c1 = right;
 	return node;
 }
 
-Ast ast_make_unop(u32 op, Ast child) {
-	Ast node = ast_make(AST_UNOP, op, nil, nil, nil);
+Ast ast_make_unop(u32 kind, Ast child) {
+	Ast node = ast_make(kind, 0, nil, nil, nil);
 	node->c0 = child;
 	return node;
 }
@@ -672,10 +698,10 @@ enum {
 	// AddOps (do not reorder)
 	tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
 	// MulOps (do not reorder)
-	tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F,
+	tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
 	// AsnOps (do not reorder)
 	tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
-	tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F,
+	tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
 	// Various, UnaryNot, LogicalOps,
 	tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
 	tASSIGN, tINC, tDEC,
@@ -708,25 +734,6 @@ str tnames[] = {
 	"<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
 };
 
-// used by ast graph printer
-str txnames[] = {
-	"<EOF>", "<EOL>", "{",  "}",  "[",   "]",   "(",   ")",
-	"eq",    "ne",    "lt", "le", "gt",  "ge",  "",    "",
-	"add",   "sub",   "or", "not","",    "",    "",    "",
-	"mul",   "div",   "mod","and","ann", "lsl", "lsr", "",
-	"add set", "sub set", "or set",  "not set", "", "", "", "",
-	"mul set", "div set", "mod set", "and set", "ann set", "lsl set", "lsr set", "",
-	";",  ":", "deref", ",", "bool not", "bool and", "bool or",  "bool not",
-	"set", "inc", "dec",
-	"type", "func", "struct", "var", "enum",
-	"if", "else", "while",
-	"break", "continue", "return",
-	"for", "switch", "case",
-	"true", "false", "nil",
-	"<ID>", "<NUM>", "<STR>",
-	"<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
-};
-
 u8 lextab[256] = {
 	tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
 	tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV,
@@ -965,7 +972,6 @@ token_t _next() {
 			if (nc == '-') { tok = tDEC; nc = scan(); }
 		} else if (tok == tAMP) {
 			if (nc == '&') { tok = tAND; nc = scan(); }
-			else if (nc == '~') { tok = tANDNOT; nc = scan(); }
 		} else if (tok == tPIPE) {
 			if (nc == '|') { tok = tOR; nc = scan(); }
 		} else if (tok == tGT) {
@@ -1074,39 +1080,39 @@ i32 ast_get_const_i32(Ast node) {
 			error("non-const symbol (%s) in constexpr\n", node->sym->name);
 		}
 		return node->sym->value;
-	} else if (node->kind == AST_BINOP) {
+	} else if (ast_kind_is_binop(node->kind)) {
 		i32 left = ast_get_const_i32(node->c0);
 		i32 right = ast_get_const_i32(node->c1);
-		u32 op = node->ival;
-		if (op == tPLUS) {
+		u32 op = node->kind;
+		if (op == AST_ADD) {
 			return left + right;
-		} else if (op == tMINUS) {
+		} else if (op == AST_SUB) {
 			return left - right;
-		} else if (op == tSTAR) {
+		} else if (op == AST_MUL) {
 			return left * right;
-		} else if (op == tSLASH) {
+		} else if (op == AST_DIV) {
 			return left / right;
-		} else if (op == tPERCENT) {
+		} else if (op == AST_MOD) {
 			return left % right;
-		} else if (op == tAMP) {
+		} else if (op == AST_AND) {
 			return left & right;
-		} else if (op == tPIPE) {
+		} else if (op == AST_OR) {
 			return left | right;
+		} else if (op == AST_XOR) {
+			return left | right;
+		} else if (op == AST_BOOL_AND) {
+			return left && right;
+		} else if (op == AST_BOOL_OR) {
+			return left || right;
 		} else {
-			error("unsupported const BINOP %s\n", tnames[op]);
-		}
-	} else if (node->kind == AST_UNOP) {
-		i32 left = ast_get_const_i32(node->c0);
-		u32 op = node->ival;
-		if (op == tPLUS) {
-			return left;
-		} else if (op == tMINUS) {
-			return -left;
-		} else if (op == tBANG) {
-			return !left;
-		} else {
-			error("unsupported const UNOP %s\n", tnames[op]);
-		}
+			error("unsupported const op %s\n", ast_kind[op]);
+		}
+	} else if (node->kind == AST_NEG) {
+		return -ast_get_const_i32(node->c0);
+	} else if (node->kind == AST_NOT) {
+		return ~ast_get_const_i32(node->c0);
+	} else if (node->kind == AST_BOOL_NOT) {
+		return !ast_get_const_i32(node->c0);
 	} else {
 		error("non-const expr (%s)\n", ast_kind[node->kind]);
 	}
@@ -1152,7 +1158,7 @@ Ast ast_require_struct_type(Ast node) {
 	}
 	if ((node->type->kind == TYPE_POINTER) &&
 	    (node->type->base->kind == TYPE_RECORD)) {
-	    	return ast_make_deref(node);
+		return ast_make_deref(node);
 	}
 	error("expected a struct");
 	return nil;
@@ -1279,19 +1285,25 @@ Ast parse_unary_expr() {
 	if (op == tPLUS) {
 		next();
 		return parse_unary_expr();
-	} else if ((op == tMINUS) || (op == tBANG) || (op == tNOT) || (op == tAMP)) {
-		u32 op = ctx.tok;
+	} else if (op == tMINUS) {
 		next();
-		Ast node = ast_make_unop(op, parse_unary_expr());
-		if (op == tAMP) {
-			node->type = type_make_ptr(node->c0->type);
-			node->kind = AST_ADDROF;
-			node->ival = 0;
-		} else if (op == tBANG) {
-			ast_type_compat(node, ctx.type_bool);
-		} else {
-			ast_type_compat(node, ctx.type_i32);
-		}
+		Ast node = ast_make_unop(AST_NEG, parse_unary_expr());
+		ast_type_compat(node, ctx.type_i32);
+		return node;
+	} else if (op == tBANG) {
+		next();
+		Ast node = ast_make_unop(AST_BOOL_NOT, parse_unary_expr());
+		ast_type_compat(node, ctx.type_bool);
+		return node;
+	} else if (op == tNOT) {
+		next();
+		Ast node = ast_make_unop(AST_NOT, parse_unary_expr());
+		ast_type_compat(node, ctx.type_i32);
+		return node;
+	} else if (op == tAMP) {
+		next();
+		Ast node = ast_make_unop(AST_ADDROF, parse_unary_expr());
+		node->type = type_make_ptr(node->c0->type);
 		return node;
 	} else {
 		return parse_primary_expr();
@@ -1301,7 +1313,7 @@ Ast parse_unary_expr() {
 Ast parse_mul_expr() {
 	Ast node = parse_unary_expr();
 	while ((ctx.tok & tcMASK) == tcMULOP) {
-		u32 op = ctx.tok;
+		u32 op = (ctx.tok - tSTAR) + AST_MUL;
 		next();
 		node = ast_make_binop(op, node, parse_unary_expr());
 		ast_type_compat(node, ctx.type_i32);
@@ -1312,7 +1324,7 @@ Ast parse_mul_expr() {
 Ast parse_add_expr() {
 	Ast node = parse_mul_expr();
 	while ((ctx.tok & tcMASK) == tcADDOP) {
-		u32 op = ctx.tok;
+		u32 op = (ctx.tok - tPLUS) + AST_ADD;
 		next();
 		node = ast_make_binop(op, node, parse_mul_expr());
 		ast_type_compat(node, ctx.type_i32);
@@ -1323,7 +1335,7 @@ Ast parse_add_expr() {
 Ast parse_rel_expr() {
 	Ast node = parse_add_expr();
 	if ((ctx.tok & tcMASK) == tcRELOP) {
-		u32 op = ctx.tok;
+		u32 op = (ctx.tok - tEQ) + AST_EQ;
 		next();
 		node = ast_make_binop(op, node, parse_add_expr());
 		ast_type_compat(node, ctx.type_bool);
@@ -1332,12 +1344,11 @@ Ast parse_rel_expr() {
 }
 
 Ast parse_and_expr() {
-	// XXX needs to handle short-circuit codegen etc
 	Ast node = parse_rel_expr();
 	if (ctx.tok == tAND) {
 		while (ctx.tok == tAND) {
 			next();
-			node = ast_make_binop(tAND, node, parse_rel_expr());
+			node = ast_make_binop(AST_BOOL_AND, node, parse_rel_expr());
 			ast_type_compat(node, ctx.type_bool);
 		}
 	}
@@ -1349,7 +1360,7 @@ Ast parse_expr() {
 	if (ctx.tok == tOR) {
 		while (ctx.tok == tOR) {
 			next();
-			node = ast_make_binop(tOR, node, parse_and_expr());
+			node = ast_make_binop(AST_BOOL_OR, node, parse_and_expr());
 			ast_type_compat(node, ctx.type_bool);
 		}
 	}
@@ -1618,7 +1629,7 @@ Ast parse_local_var() {
 	if (ctx.tok == tASSIGN) {
 		next();
 		node = ast_make_simple(AST_EXPR, 0);
-		node->c0 = ast_make_binop(tASSIGN, ast_make_symbol(name, sym), parse_expr());
+		node->c0 = ast_make_binop(AST_ASSIGN, ast_make_symbol(name, sym), parse_expr());
 		node->c0->type = node->c0->c1->type;
 	}
 	require(tSEMI);
@@ -1670,7 +1681,7 @@ Ast parse_expr_statement() {
 	if (ctx.tok == tASSIGN) {
 		next();
 		right = parse_expr();
-		node = ast_make_binop(tASSIGN, left, right);
+		node = ast_make_binop(AST_ASSIGN, left, right);
 		node->type = node->c1->type;
 	} else if ((ctx.tok & tcMASK) == tcAEQOP) {
 		u32 op = ctx.tok; // - tADDEQ;
@@ -1687,15 +1698,15 @@ Ast parse_expr_statement() {
 	} else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) {
 		u32 op;
 		if (ctx.tok == tINC) {
-			op = tPLUS;
+			op = AST_ADD;
 		} else {
-			op = tMINUS;
+			op = AST_SUB;
 		}
 		next();
 		right = ast_make_const(1, ctx.type_i32);
 		right = ast_make_binop(op, left, right);
 		right->type = ctx.type_i32;
-		node = ast_make_binop(tASSIGN, left, right);
+		node = ast_make_binop(AST_ASSIGN, left, right);
 		node->type = ctx.type_i32;
 		// TODO: check type?
 	} else {
@@ -2025,12 +2036,6 @@ int _ast_dump(FILE* fp, Ast node, u32 indent, bool dumplist, Ast mark) {
 			type_dump_compact(fp, node->type);
 		}
 		fprintf(fp, "\n");
-	} else if ((node->kind == AST_BINOP) || (node->kind == AST_UNOP)) {
-		fprintf(fp, "%s ", tnames[node->ival]);
-		if (node->type != nil) {
-			type_dump_compact(fp, node->type);
-		}
-		fprintf(fp, "\n");
 	} else if (node->kind == AST_CONST) {
 		fprintf(fp, "0x%x ", node->ival);
 		if (node->type != nil) {
@@ -2089,9 +2094,7 @@ void ast_dump(FILE* fp, Ast node, Ast mark) {
 void ast_dump_node(FILE* fp, Ast node, bool dump_c2) {
 	fprintf(fp, "\"%p\" [ label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n", node);
 	fprintf(fp, "<TR><TD PORT=\"p0\" COLSPAN=\"3\">%s", ast_kind[node->kind]);
-	if ((node->kind == AST_BINOP) || (node->kind == AST_UNOP)) {
-		fprintf(fp, " %s", txnames[node->ival]);
-	} else if (node->kind == AST_CONST) {
+	if (node->kind == AST_CONST) {
 		fprintf(fp, " 0x%x", node->ival);
 	} else if (node->name != nil) {
 		fprintf(fp, " %s", node->name->text);