compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit bfc333de1a5fa0062255bbe0e3bb1d348b33aed0
parent b80237b20cacb6453dd89e5b396bd43aa9425385
Author: Brian Swetland <swetland@frotz.net>
Date:   Fri, 10 Dec 2021 02:23:12 -0800

compiler2: disentangle lexer/parser/codegen a bit

- toss AST nodes BINOP and UNOP that stowed lexer tokens in ival
  in favor of distinct AST nodes for the various operations
- these nodes more closely track the operation names, avoid
  confusion between bitwise and binary ops, etc
- adjusted all places where BINOP/UNOP ival was used in favor
  of new distinct node types
- no more need for txnames table of printables for graph dumper

Diffstat:
Msrc/codegen-risc5-simple.c | 67+++++++++++++++++++++++++++++++------------------------------------
Msrc/compiler2.c | 213++++++++++++++++++++++++++++++++++++++++---------------------------------------
2 files changed, 139 insertions(+), 141 deletions(-)

diff --git a/src/codegen-risc5-simple.c b/src/codegen-risc5-simple.c @@ -149,7 +149,7 @@ void emit_bi(u32 op, u32 off) { u8 rel_op_to_cc_tab[6] = { EQ, NE, LT, LE, GT, GE }; u32 add_op_to_ins_tab[4] = { ADD, SUB, IOR, XOR }; -u32 mul_op_to_ins_tab[7] = { MUL, DIV, MOD, AND, ANN, LSL, ASR }; +u32 mul_op_to_ins_tab[6] = { MUL, DIV, MOD, AND, LSL, ASR }; // ------------------------------------------------------------------ @@ -400,7 +400,7 @@ u32 gen_relop(Ast node, u32 cc) { return res; } -u32 gen_logical_op(Ast node, u32 cc, u32 sc) { +u32 gen_short_circuit_op(Ast node, u32 cc, u32 sc) { u32 r = gen_expr(node->c0); emit_mov(R11, r); // set z flag put_reg(r); @@ -459,11 +459,12 @@ u32 gen_expr(Ast node) { err_ast = node; gen_src_xref(node); gen_trace("gen_expr()"); - if (node->kind == AST_CONST) { + u32 kind = node->kind; + if (kind == AST_CONST) { u32 r = get_reg_tmp(); emit_movi(r, node->ival); return r; - } else if (node->kind == AST_SYMBOL) { + } else if (kind == AST_SYMBOL) { u32 r = get_reg_tmp(); // XXX type checking here or before if (node->sym->kind == SYM_CONST) { @@ -475,42 +476,36 @@ u32 gen_expr(Ast node) { emit_mem(LDW, r, base, offset); } return r; - } else if (node->kind == AST_BINOP) { - u32 op = node->ival; - if (op == tASSIGN) { - return gen_assign(node->c0, node->c1); - } else if ((op & tcMASK) == tcRELOP) { - return gen_relop(node, rel_op_to_cc_tab[op - tEQ]); - } else if ((op & tcMASK) == tcADDOP) { - return gen_binop(node, add_op_to_ins_tab[op - tPLUS]); - } else if ((op & tcMASK) == tcMULOP) { - return gen_binop(node, mul_op_to_ins_tab[op - tSTAR]); - } else if (op == tOR) { - return gen_logical_op(node, NE, 1); - } else if (op == tAND) { - return gen_logical_op(node, EQ, 0); - } else { - error("gen_expr cannot handle binop %s\n", tnames[op]); - } - } else if (node->kind == AST_UNOP) { - u32 op = node->ival; + } else if (ast_kind_is_relop(kind)) { + return gen_relop(node, rel_op_to_cc_tab[kind - AST_EQ]); + } else if (ast_kind_is_addop(kind)) { + return gen_binop(node, add_op_to_ins_tab[kind - AST_ADD]); + } else if (ast_kind_is_mulop(kind)) { + return gen_binop(node, mul_op_to_ins_tab[kind - AST_MUL]); + } else if (kind == AST_BOOL_OR) { + return gen_short_circuit_op(node, NE, 1); + } else if (kind == AST_BOOL_AND) { + return gen_short_circuit_op(node, EQ, 0); + } else if (kind == AST_ASSIGN) { + return gen_assign(node->c0, node->c1); + } else if (kind == AST_NEG) { u32 r = gen_expr(node->c0); - if (op == tMINUS) { - emit_movi(R11, 0); - emit_op(SUB, r, R11, r); - } else if (op == tNOT) { - emit_opi(XOR, r, r, 0xffffffff); - } else if (op == tBANG) { - emit_opi(XOR, r, r, r); - } else { - error("gen_expr cannot handle unop %s\n", tnames[op]); - } + emit_movi(R11, 0); + emit_op(SUB, r, R11, r); return r; - } else if (node->kind == AST_CALL) { + } else if (kind == AST_NOT) { + u32 r = gen_expr(node->c0); + emit_opi(XOR, r, r, 0xffffffff); + return r; + } else if (kind == AST_BOOL_NOT) { + u32 r = gen_expr(node->c0); + emit_opi(XOR, r, r, r); + return r; + } else if (kind == AST_CALL) { return gen_call(node); - } else if (node->kind == AST_INDEX) { + } else if (kind == AST_INDEX) { return gen_array_read(node); - } else if (node->kind == AST_FIELD) { + } else if (kind == AST_FIELD) { return gen_struct_read(node); } else { error("gen_expr cannot handle %s\n", ast_kind[node->kind]); diff --git a/src/compiler2.c b/src/compiler2.c @@ -61,40 +61,66 @@ struct StringRec { }; enum { -// expression parts - AST_SYMBOL, - AST_CONST, - AST_STRING, - AST_BINOP, // c0=EXPR c1=EXPR - AST_UNOP, // c0=EXPR - AST_DEREF, // c0=EXPR type: pointer-to-... - AST_INDEX, // c0=EXPR type: array-of-... c1=EXPR index - AST_FIELD, // c0=EXPR type: struct c1=SYMBOL field - AST_ADDROF, // c0=EXPR type: lvalue +// top node + AST_PROGRAM, // c2=FUNC* +// program components (chained into a list by c2) + AST_FUNC, // c0=BLOCK // container of statements AST_BLOCK, // c0=STMT // statements (chained into a list by c2) AST_EXPR, // c0=EXPR - AST_CALL, // c0=NAME c2=EXPR* AST_WHILE, // c0=EXPR c1=BLOCK - AST_IF, // c0=IFELSE - AST_RETURN, // c0=EXPR AST_BREAK, AST_CONTINUE, + AST_RETURN, // c0=EXPR + AST_IF, // c0=IFELSE // sub-part of if AST_IFELSE, // c0=EXPR c1=BLOCKthen c2=BLOCKelse|IFELSE -// program components (chained into a list by c2) - AST_FUNC, // c0=BLOCK -// top node - AST_PROGRAM, // c2=(TYPEDEF | ENUMDEF | FUNC | GLOBAL)* +// expression parts + AST_SYMBOL, + AST_CONST, + AST_STRING, + AST_DEREF, // c0=EXPR type: pointer-to-... + AST_INDEX, // c0=EXPR type: array-of-... c1=EXPR index + AST_FIELD, // c0=EXPR type: struct c1=SYMBOL field + AST_ADDROF, // c0=EXPR type: lvalue + AST_CALL, // c0=NAME c2=EXPR* + AST_ASSIGN, + + // Rel Ops (maintain order matched w/ lexer) + AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE, + // Add Ops (maintain order matched w/ lexer) + AST_ADD, AST_SUB, AST_OR, AST_XOR, + // Mul Ops (maintain order matched w/ lexer) + AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR, + // uncategorized ops + AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG, + AST_KIND_COUNT }; -str ast_kind[AST_PROGRAM + 1] = { - "SYMBOL", "CONST", "STR", "BINOP", "UNOP", - "DEREF", "INDEX", "FIELD", "ADDROF", - "BLOCK", "EXPR", "CALL", "WHILE", "IF", - "RETURN", "BREAK", "CONTINUE", "IFELSE", - "FUNC", "PROGRAM", +bool ast_kind_is_relop(u32 kind) { + return (kind >= AST_EQ) && (kind <= AST_GE); +} +bool ast_kind_is_addop(u32 kind) { + return (kind >= AST_ADD) && (kind <= AST_XOR); +} +bool ast_kind_is_mulop(u32 kind) { + return (kind >= AST_MUL) && (kind <= AST_LSR); +} +bool ast_kind_is_binop(u32 kind) { + return (kind >= AST_EQ) && (kind <= AST_LSR); +} + +str ast_kind[AST_KIND_COUNT] = { + "PROGRAM", "FUNC", + "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE", + "RETURN", "IF", "IFELSE", + "SYMBOL", "CONST", "STR", + "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", + "EQ", "NE", "LT", "LE", "GT", "GE", + "ADD", "SUB", "OR", "XOR", + "MUL", "DIV", "MOD", "AND", "LSL", "LSR", + "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG", }; struct AstRec { @@ -298,15 +324,15 @@ Ast ast_make(ast_t kind, u32 ival, String name, Symbol sym, Type type) { return a; } -Ast ast_make_binop(u32 op, Ast left, Ast right) { - Ast node = ast_make(AST_BINOP, op, nil, nil, nil); +Ast ast_make_binop(u32 kind, Ast left, Ast right) { + Ast node = ast_make(kind, 0, nil, nil, nil); node->c0 = left; node->c1 = right; return node; } -Ast ast_make_unop(u32 op, Ast child) { - Ast node = ast_make(AST_UNOP, op, nil, nil, nil); +Ast ast_make_unop(u32 kind, Ast child) { + Ast node = ast_make(kind, 0, nil, nil, nil); node->c0 = child; return node; } @@ -672,10 +698,10 @@ enum { // AddOps (do not reorder) tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, // MulOps (do not reorder) - tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F, + tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, // AsnOps (do not reorder) tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, - tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F, + tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, // Various, UnaryNot, LogicalOps, tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, tASSIGN, tINC, tDEC, @@ -708,25 +734,6 @@ str tnames[] = { "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", }; -// used by ast graph printer -str txnames[] = { - "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", - "eq", "ne", "lt", "le", "gt", "ge", "", "", - "add", "sub", "or", "not","", "", "", "", - "mul", "div", "mod","and","ann", "lsl", "lsr", "", - "add set", "sub set", "or set", "not set", "", "", "", "", - "mul set", "div set", "mod set", "and set", "ann set", "lsl set", "lsr set", "", - ";", ":", "deref", ",", "bool not", "bool and", "bool or", "bool not", - "set", "inc", "dec", - "type", "func", "struct", "var", "enum", - "if", "else", "while", - "break", "continue", "return", - "for", "switch", "case", - "true", "false", "nil", - "<ID>", "<NUM>", "<STR>", - "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", -}; - u8 lextab[256] = { tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, @@ -965,7 +972,6 @@ token_t _next() { if (nc == '-') { tok = tDEC; nc = scan(); } } else if (tok == tAMP) { if (nc == '&') { tok = tAND; nc = scan(); } - else if (nc == '~') { tok = tANDNOT; nc = scan(); } } else if (tok == tPIPE) { if (nc == '|') { tok = tOR; nc = scan(); } } else if (tok == tGT) { @@ -1074,39 +1080,39 @@ i32 ast_get_const_i32(Ast node) { error("non-const symbol (%s) in constexpr\n", node->sym->name); } return node->sym->value; - } else if (node->kind == AST_BINOP) { + } else if (ast_kind_is_binop(node->kind)) { i32 left = ast_get_const_i32(node->c0); i32 right = ast_get_const_i32(node->c1); - u32 op = node->ival; - if (op == tPLUS) { + u32 op = node->kind; + if (op == AST_ADD) { return left + right; - } else if (op == tMINUS) { + } else if (op == AST_SUB) { return left - right; - } else if (op == tSTAR) { + } else if (op == AST_MUL) { return left * right; - } else if (op == tSLASH) { + } else if (op == AST_DIV) { return left / right; - } else if (op == tPERCENT) { + } else if (op == AST_MOD) { return left % right; - } else if (op == tAMP) { + } else if (op == AST_AND) { return left & right; - } else if (op == tPIPE) { + } else if (op == AST_OR) { return left | right; + } else if (op == AST_XOR) { + return left | right; + } else if (op == AST_BOOL_AND) { + return left && right; + } else if (op == AST_BOOL_OR) { + return left || right; } else { - error("unsupported const BINOP %s\n", tnames[op]); - } - } else if (node->kind == AST_UNOP) { - i32 left = ast_get_const_i32(node->c0); - u32 op = node->ival; - if (op == tPLUS) { - return left; - } else if (op == tMINUS) { - return -left; - } else if (op == tBANG) { - return !left; - } else { - error("unsupported const UNOP %s\n", tnames[op]); - } + error("unsupported const op %s\n", ast_kind[op]); + } + } else if (node->kind == AST_NEG) { + return -ast_get_const_i32(node->c0); + } else if (node->kind == AST_NOT) { + return ~ast_get_const_i32(node->c0); + } else if (node->kind == AST_BOOL_NOT) { + return !ast_get_const_i32(node->c0); } else { error("non-const expr (%s)\n", ast_kind[node->kind]); } @@ -1152,7 +1158,7 @@ Ast ast_require_struct_type(Ast node) { } if ((node->type->kind == TYPE_POINTER) && (node->type->base->kind == TYPE_RECORD)) { - return ast_make_deref(node); + return ast_make_deref(node); } error("expected a struct"); return nil; @@ -1279,19 +1285,25 @@ Ast parse_unary_expr() { if (op == tPLUS) { next(); return parse_unary_expr(); - } else if ((op == tMINUS) || (op == tBANG) || (op == tNOT) || (op == tAMP)) { - u32 op = ctx.tok; + } else if (op == tMINUS) { next(); - Ast node = ast_make_unop(op, parse_unary_expr()); - if (op == tAMP) { - node->type = type_make_ptr(node->c0->type); - node->kind = AST_ADDROF; - node->ival = 0; - } else if (op == tBANG) { - ast_type_compat(node, ctx.type_bool); - } else { - ast_type_compat(node, ctx.type_i32); - } + Ast node = ast_make_unop(AST_NEG, parse_unary_expr()); + ast_type_compat(node, ctx.type_i32); + return node; + } else if (op == tBANG) { + next(); + Ast node = ast_make_unop(AST_BOOL_NOT, parse_unary_expr()); + ast_type_compat(node, ctx.type_bool); + return node; + } else if (op == tNOT) { + next(); + Ast node = ast_make_unop(AST_NOT, parse_unary_expr()); + ast_type_compat(node, ctx.type_i32); + return node; + } else if (op == tAMP) { + next(); + Ast node = ast_make_unop(AST_ADDROF, parse_unary_expr()); + node->type = type_make_ptr(node->c0->type); return node; } else { return parse_primary_expr(); @@ -1301,7 +1313,7 @@ Ast parse_unary_expr() { Ast parse_mul_expr() { Ast node = parse_unary_expr(); while ((ctx.tok & tcMASK) == tcMULOP) { - u32 op = ctx.tok; + u32 op = (ctx.tok - tSTAR) + AST_MUL; next(); node = ast_make_binop(op, node, parse_unary_expr()); ast_type_compat(node, ctx.type_i32); @@ -1312,7 +1324,7 @@ Ast parse_mul_expr() { Ast parse_add_expr() { Ast node = parse_mul_expr(); while ((ctx.tok & tcMASK) == tcADDOP) { - u32 op = ctx.tok; + u32 op = (ctx.tok - tPLUS) + AST_ADD; next(); node = ast_make_binop(op, node, parse_mul_expr()); ast_type_compat(node, ctx.type_i32); @@ -1323,7 +1335,7 @@ Ast parse_add_expr() { Ast parse_rel_expr() { Ast node = parse_add_expr(); if ((ctx.tok & tcMASK) == tcRELOP) { - u32 op = ctx.tok; + u32 op = (ctx.tok - tEQ) + AST_EQ; next(); node = ast_make_binop(op, node, parse_add_expr()); ast_type_compat(node, ctx.type_bool); @@ -1332,12 +1344,11 @@ Ast parse_rel_expr() { } Ast parse_and_expr() { - // XXX needs to handle short-circuit codegen etc Ast node = parse_rel_expr(); if (ctx.tok == tAND) { while (ctx.tok == tAND) { next(); - node = ast_make_binop(tAND, node, parse_rel_expr()); + node = ast_make_binop(AST_BOOL_AND, node, parse_rel_expr()); ast_type_compat(node, ctx.type_bool); } } @@ -1349,7 +1360,7 @@ Ast parse_expr() { if (ctx.tok == tOR) { while (ctx.tok == tOR) { next(); - node = ast_make_binop(tOR, node, parse_and_expr()); + node = ast_make_binop(AST_BOOL_OR, node, parse_and_expr()); ast_type_compat(node, ctx.type_bool); } } @@ -1618,7 +1629,7 @@ Ast parse_local_var() { if (ctx.tok == tASSIGN) { next(); node = ast_make_simple(AST_EXPR, 0); - node->c0 = ast_make_binop(tASSIGN, ast_make_symbol(name, sym), parse_expr()); + node->c0 = ast_make_binop(AST_ASSIGN, ast_make_symbol(name, sym), parse_expr()); node->c0->type = node->c0->c1->type; } require(tSEMI); @@ -1670,7 +1681,7 @@ Ast parse_expr_statement() { if (ctx.tok == tASSIGN) { next(); right = parse_expr(); - node = ast_make_binop(tASSIGN, left, right); + node = ast_make_binop(AST_ASSIGN, left, right); node->type = node->c1->type; } else if ((ctx.tok & tcMASK) == tcAEQOP) { u32 op = ctx.tok; // - tADDEQ; @@ -1687,15 +1698,15 @@ Ast parse_expr_statement() { } else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) { u32 op; if (ctx.tok == tINC) { - op = tPLUS; + op = AST_ADD; } else { - op = tMINUS; + op = AST_SUB; } next(); right = ast_make_const(1, ctx.type_i32); right = ast_make_binop(op, left, right); right->type = ctx.type_i32; - node = ast_make_binop(tASSIGN, left, right); + node = ast_make_binop(AST_ASSIGN, left, right); node->type = ctx.type_i32; // TODO: check type? } else { @@ -2025,12 +2036,6 @@ int _ast_dump(FILE* fp, Ast node, u32 indent, bool dumplist, Ast mark) { type_dump_compact(fp, node->type); } fprintf(fp, "\n"); - } else if ((node->kind == AST_BINOP) || (node->kind == AST_UNOP)) { - fprintf(fp, "%s ", tnames[node->ival]); - if (node->type != nil) { - type_dump_compact(fp, node->type); - } - fprintf(fp, "\n"); } else if (node->kind == AST_CONST) { fprintf(fp, "0x%x ", node->ival); if (node->type != nil) { @@ -2089,9 +2094,7 @@ void ast_dump(FILE* fp, Ast node, Ast mark) { void ast_dump_node(FILE* fp, Ast node, bool dump_c2) { fprintf(fp, "\"%p\" [ label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n", node); fprintf(fp, "<TR><TD PORT=\"p0\" COLSPAN=\"3\">%s", ast_kind[node->kind]); - if ((node->kind == AST_BINOP) || (node->kind == AST_UNOP)) { - fprintf(fp, " %s", txnames[node->ival]); - } else if (node->kind == AST_CONST) { + if (node->kind == AST_CONST) { fprintf(fp, " 0x%x", node->ival); } else if (node->name != nil) { fprintf(fp, " %s", node->name->text);