compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit b80237b20cacb6453dd89e5b396bd43aa9425385
parent 8125c998aab45c700e83c3e735a0625190c82f09
Author: Brian Swetland <swetland@frotz.net>
Date:   Thu,  9 Dec 2021 23:59:43 -0800

compiler2: all tests pass

- rig up type tracking across expressions
  (expr AST nodes should now all have a valid type)
- remove TYPEDEF, ENUMDEF, and GLOBAL AST nodes.
- types, enums, globals are managed by the symbol table(s) / scope / etc
- FIELD is now the access-a-struct's-field operation
- ADDROF is &'s explicit "give me an address" operation
- toss SYM_IS_REFERENCE hack in favor of type tracking
- tidy up type comparison
- start enforcing some type compatibility
- some more AST node type validation
- clean up gen_addr_expr()
- simplify gen_assign()
- implement struct field access

Diffstat:
Msrc/codegen-risc5-simple.c | 153+++++++++++++++++++++++++++-----------------------------------------------------
Msrc/compiler2.c | 212++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
2 files changed, 201 insertions(+), 164 deletions(-)

diff --git a/src/codegen-risc5-simple.c b/src/codegen-risc5-simple.c @@ -249,86 +249,50 @@ void sym_get_loc(Symbol sym, u32* base, i32* offset) { } } -u32 gen_addr_expr(Ast expr, Type type) { - if (expr->kind == AST_SYMBOL) { +u32 gen_addr_expr(Ast node) { + if (node->kind == AST_DEREF) { + u32 r = gen_addr_expr(node->c0); + emit_mem(LDW, r, r, 0); + return r; + } else if (node->kind == AST_INDEX) { + u32 raddr = gen_addr_expr(node->c0); + u32 roff = gen_expr(node->c1); + // MUL BY SIZE + emit_op(ADD, raddr, raddr, roff); + put_reg(roff); + return raddr; + } else if (node->kind == AST_FIELD) { + u32 raddr = gen_addr_expr(node->c0); + u32 off = node->c1->sym->value; + // HANDLE non-word-sized + emit_opi(ADD, raddr, raddr, off); + return raddr; + } else if (node->kind == AST_SYMBOL) { u32 base; i32 offset; - sym_get_loc(expr->sym, &base, &offset); + sym_get_loc(node->sym, &base, &offset); u32 r = get_reg_tmp(); - if (expr->sym->flags & SYM_IS_REFERENCE) { - // reference variable, so its content is an address - // return that - emit_mem(LDW, r, base, offset); - } else { - // inline variable, so its base + offset is the - // address for its content - emit_opi(ADD, r, base, offset); - } + emit_opi(ADD, r, base, offset); return r; + } else if (node->kind == AST_ADDROF) { + return gen_addr_expr(node->c0); } else { - err_ast = expr; - error("gen_addr_expr cannot handle %s", ast_kind[expr->kind]); + err_ast = node; + error("gen_addr_expr cannot handle %s", ast_kind[node->kind]); } return 0; } -u32 gen_assign_expr(Ast expr, Symbol sym) { - if (sym->flags & SYM_IS_REFERENCE) { - return gen_addr_expr(expr, sym->type); - } else if (sym->type->kind == TYPE_POINTER) { - return gen_addr_expr(expr, sym->type->base); - } else { - return gen_expr(expr); - } -} - -#if 0 -u32 gen_assign_loc(Ast lhs) { - if (lhs->kind == AST_NAME) { - u32 base; - i32 offset; - sym_get_loc(lhs->sym, &base, &offset); - r = get_reg_tmp(); - if (lhs->sym->flags & SYM_IS_REFERNCE) { - gen_opi(ADD, r, base, offset); - - return r; - } else if (lhs->kind == AST_INDEX) { - u32 ar = gen_assign_loc(lhs->c0); - u32 r = gen_expr(lhs->c1); - - u32 lbase; - i32 loffset; - u32 kind = gen_assign_loc(lhs->c0, &lbase, &loffset); - - } else if (lhs->kind == AST_DEREF) { - } else { - err_ast = lhs; - error("invalid lhs expr %s", ast_kind[lhs->kind]); - } -} -#endif - u32 gen_assign(Ast lhs, Ast expr) { gen_trace("gen_assign()"); - if (lhs->kind == AST_SYMBOL) { - u32 base; - i32 offset; - sym_get_loc(lhs->sym, &base, &offset); - //XXX type compat - u32 r = gen_assign_expr(expr, lhs->sym); - emit_mem(STW, r, base, offset); - return r; - } else if (lhs->kind == AST_INDEX) { - error("wip"); - } else if (lhs->kind == AST_DEREF) { + u32 raddr = gen_addr_expr(lhs); + u32 rval = gen_expr(expr); - } else { - err_ast = lhs; - error("illegal on lhs (%s)", ast_kind[lhs->kind]); - } - return 0; + // WIDTH?! + emit_mem(STW, rval, raddr, 0); + put_reg(raddr); + return rval; } u32 reg_save(u32 base) { @@ -383,9 +347,9 @@ u32 gen_call(Ast node) { u32 n = 0; while (arg != nil) { u32 r; - if (param->flags & SYM_IS_REFERENCE) { + if (param->type->kind == TYPE_POINTER) { // XXX or ptr type? - r = gen_addr_expr(arg, param->type); + r = gen_addr_expr(arg); } else { r = gen_expr(arg); } @@ -411,10 +375,6 @@ u32 gen_call(Ast node) { return r; } -u32 gen_lexpr(Ast node) { - return 0; -} - u32 gen_binop(Ast node, u32 op) { gen_trace( "gen_binop()"); u32 left = gen_expr(node->c0); @@ -462,36 +422,8 @@ u32 gen_logical_op(Ast node, u32 cc, u32 sc) { return r; } -u32 gen_array_addr(Ast node) { - err_ast = node; - if (node->type->kind != TYPE_ARRAY) { - error("cannot deref non-array type"); - } - if (node->kind == AST_SYMBOL) { - u32 base; - i32 offset; - sym_get_loc(node->sym, &base, &offset); - u32 r = get_reg_tmp(); - if (node->sym->flags & SYM_IS_REFERENCE) { - // some symbols are tagged as by reference, - // in which case we load the address - emit_mem(LDW, r, base, offset); - } else { - // otherwise the array is inline, so we - // just fixup the offset - emit_opi(ADD, r, base, offset); - } - return r; - } else if (node->kind == AST_INDEX) { - error("not ready for [][]"); - } else { - error("cannot dereference this"); - } - return 0; -} - u32 gen_array_read(Ast node) { - u32 raddr = gen_array_addr(node->c0); + u32 raddr = gen_addr_expr(node->c0); u32 roff = gen_expr(node->c1); u32 sz = node->c0->type->base->size; @@ -508,6 +440,21 @@ u32 gen_array_read(Ast node) { return roff; } +u32 gen_struct_read(Ast node) { + u32 raddr = gen_addr_expr(node->c0); + u32 off = node->c1->sym->value; + u32 sz = node->c1->type->size; + if (sz == 1) { + emit_mem(LDB, raddr, raddr, off); + } else if (sz == 4) { + emit_mem(LDW, raddr, raddr, off); + } else { + err_ast = node; + error("unsupported field size"); + } + return raddr; +} + u32 gen_expr(Ast node) { err_ast = node; gen_src_xref(node); @@ -563,6 +510,8 @@ u32 gen_expr(Ast node) { return gen_call(node); } else if (node->kind == AST_INDEX) { return gen_array_read(node); + } else if (node->kind == AST_FIELD) { + return gen_struct_read(node); } else { error("gen_expr cannot handle %s\n", ast_kind[node->kind]); } diff --git a/src/compiler2.c b/src/compiler2.c @@ -67,8 +67,10 @@ enum { AST_STRING, AST_BINOP, // c0=EXPR c1=EXPR AST_UNOP, // c0=EXPR - AST_DEREF, - AST_INDEX, + AST_DEREF, // c0=EXPR type: pointer-to-... + AST_INDEX, // c0=EXPR type: array-of-... c1=EXPR index + AST_FIELD, // c0=EXPR type: struct c1=SYMBOL field + AST_ADDROF, // c0=EXPR type: lvalue // container of statements AST_BLOCK, // c0=STMT // statements (chained into a list by c2) @@ -81,22 +83,18 @@ enum { AST_CONTINUE, // sub-part of if AST_IFELSE, // c0=EXPR c1=BLOCKthen c2=BLOCKelse|IFELSE -// top node - AST_PROGRAM, // c2=(TYPEDEF | ENUMDEF | FUNC | GLOBAL)* // program components (chained into a list by c2) - AST_TYPEDEF, - AST_ENUMDEF, // c2=FIELD* AST_FUNC, // c0=BLOCK - AST_GLOBAL, // c0=EXPR - AST_FIELD, +// top node + AST_PROGRAM, // c2=(TYPEDEF | ENUMDEF | FUNC | GLOBAL)* }; -str ast_kind[AST_FIELD + 1] = { - "SYMBOL", "CONST", "STR", "BINOP", "UNOP", "DEREF", "INDEX", +str ast_kind[AST_PROGRAM + 1] = { + "SYMBOL", "CONST", "STR", "BINOP", "UNOP", + "DEREF", "INDEX", "FIELD", "ADDROF", "BLOCK", "EXPR", "CALL", "WHILE", "IF", "RETURN", "BREAK", "CONTINUE", "IFELSE", - "PROGRAM", "TYPEDEF", "ENUMDEF", "FUNCDEF", - "GLOBAL", "FIELD", + "FUNC", "PROGRAM", }; struct AstRec { @@ -162,7 +160,6 @@ enum { SYM_IS_DEFINED = 0x04, SYM_IS_BUILTIN = 0x08, SYM_IS_PLACED = 0x10, // exists at addr in sym->value - SYM_IS_REFERENCE = 0x20, // stored as pointer-to-type }; struct SymbolRec { @@ -322,7 +319,7 @@ Ast ast_make_const(u32 value, Type type) { return ast_make(AST_CONST, value, nil, nil, type); } -Ast ast_make_name(String name, Symbol sym) { +Ast ast_make_symbol(String name, Symbol sym) { return ast_make(AST_SYMBOL, 0, name, sym, sym->type); } @@ -374,6 +371,10 @@ Type type_make(type_t kind, Type base, u32 len, u32 size) { return t; } +Type type_make_ptr(Type type) { + return type_make(TYPE_POINTER, type, 4, 0); +} + void type_add(Type type, String name) { //fprintf(stderr,"type_add(%s, %s)\n",type_id_tab[type->kind],name->text); Symbol sym = symbol_make(SYM_TYPE, 0, name, type, 0); @@ -406,6 +407,9 @@ Type type_setup(const char* text, u32 tlen, u32 kind, u32 size) { } bool type_is_same(Type a, Type b) { + if (a == b) { + return true; + } if (a->kind != b->kind) { return false; } @@ -430,6 +434,24 @@ bool type_is_same(Type a, Type b) { return true; } +bool type_is_i32_shape(Type type) { + if ((type->kind == TYPE_I32) || + (type->kind == TYPE_BYTE) || + (type->kind == TYPE_BOOL)) { + return true; + } else { + return false; + } +} + +bool type_is_compatible(Type dst, Type src) { + if (type_is_i32_shape(dst) && type_is_i32_shape(src)) { + return true; + } + return type_is_same(dst, src); + // check if deref of src is same as dst? +} + void type_dump(FILE* fp, Type type, bool use_short_name) { if (use_short_name && (type->sym != nil)) { fprintf(fp, "%s", type->sym->name->text); @@ -1091,6 +1113,51 @@ i32 ast_get_const_i32(Ast node) { return 0; } +void ast_type_compat(Ast node, Type type) { + if (node->c0) { + if (!type_is_compatible(type, node->c0->type)) { + error("incompatible types"); + } + } + if (node->c1) { + if (!type_is_compatible(type, node->c1->type)) { + error("incompatible types"); + } + } + node->type = type; +} + +Ast ast_make_deref(Ast child) { + Ast node = ast_make_simple(AST_DEREF, 0); + node->c0 = child; + node->type = child->type->base; + return node; +} + +Ast ast_require_array_type(Ast node) { + if (node->type->kind == TYPE_ARRAY) { + return node; + } + if ((node->type->kind == TYPE_POINTER) && + (node->type->base->kind == TYPE_ARRAY)) { + return ast_make_deref(node); + } + error("expected an array"); + return nil; +} + +Ast ast_require_struct_type(Ast node) { + if (node->type->kind == TYPE_RECORD) { + return node; + } + if ((node->type->kind == TYPE_POINTER) && + (node->type->base->kind == TYPE_RECORD)) { + return ast_make_deref(node); + } + error("expected a struct"); + return nil; +} + String parse_name(const char* what) { if (ctx.tok != tIDN) { error("expected %s, found %s %u", what, tnames[ctx.tok], ctx.tok); @@ -1126,7 +1193,7 @@ Ast parse_operand() { if (sym == nil) { error("undefined identifier '%s'", ctx.ident->text); } - node = ast_make_name(ctx.ident, sym); + node = ast_make_symbol(ctx.ident, sym); } else { error("invalid expression"); } @@ -1134,15 +1201,29 @@ Ast parse_operand() { return node; } +Symbol type_find_field(Type type, String name) { + Symbol field = type->first; + while (field != nil) { + if (field->name == name) { + return field; + } + field = field->next; + } + error("struct has no such field '%s'", name->text); + return nil; +} + Ast parse_primary_expr() { Ast node = parse_operand(); while (true) { if (ctx.tok == tOPAREN) { next(); //VALIDATE as func or ptr-to-func - if ((node->kind != AST_SYMBOL) || - (node->sym->kind != SYM_FUNC)) { - error("cannot call non-function"); + if (node->kind != AST_SYMBOL) { + error("cannot call '%s'", ast_kind[node->kind]); + } + if (node->sym->kind != SYM_FUNC) { + error("cannot call non-function '%s'", node->sym->name->text); } u32 n = 0; @@ -1152,6 +1233,7 @@ Ast parse_primary_expr() { Ast last = call; while (ctx.tok != tCPAREN) { + // VALIDATE compatibility Ast param = parse_expr(); last->c2 = param; last = param; @@ -1165,18 +1247,23 @@ Ast parse_primary_expr() { node = call; } else if (ctx.tok == tDOT) { next(); + node = ast_require_struct_type(node); String name = parse_name("field name"); - // VALIDATE appropriate name for type - // VALIDATE type is ptr or struct - Ast dot = ast_make_simple(AST_DEREF, 0); + Ast dot = ast_make_simple(AST_FIELD, 0); dot->c0 = node; - dot->name = name; + dot->c1 = ast_make_simple(AST_SYMBOL, 0); + dot->c1->name = name; + dot->c1->sym = type_find_field(node->type, name); + dot->c1->type = dot->c1->sym->type; + dot->type = dot->c1->type; node = dot; } else if (ctx.tok == tOBRACK) { next(); + node = ast_require_array_type(node); Ast index = ast_make_simple(AST_INDEX, 0); index->c0 = node; index->c1 = parse_expr(); + index->type = node->type->base; node = index; require(tCBRACK); //VALIDATE lhs is array, rhs is appropriate numeric @@ -1192,10 +1279,20 @@ Ast parse_unary_expr() { if (op == tPLUS) { next(); return parse_unary_expr(); - } else if ((op == tMINUS) || (op == tBANG) || (op == tNOT) || (op== tAMP)) { + } else if ((op == tMINUS) || (op == tBANG) || (op == tNOT) || (op == tAMP)) { u32 op = ctx.tok; next(); - return ast_make_unop(op, parse_unary_expr()); + Ast node = ast_make_unop(op, parse_unary_expr()); + if (op == tAMP) { + node->type = type_make_ptr(node->c0->type); + node->kind = AST_ADDROF; + node->ival = 0; + } else if (op == tBANG) { + ast_type_compat(node, ctx.type_bool); + } else { + ast_type_compat(node, ctx.type_i32); + } + return node; } else { return parse_primary_expr(); } @@ -1207,6 +1304,7 @@ Ast parse_mul_expr() { u32 op = ctx.tok; next(); node = ast_make_binop(op, node, parse_unary_expr()); + ast_type_compat(node, ctx.type_i32); } return node; } @@ -1217,6 +1315,7 @@ Ast parse_add_expr() { u32 op = ctx.tok; next(); node = ast_make_binop(op, node, parse_mul_expr()); + ast_type_compat(node, ctx.type_i32); } return node; } @@ -1227,6 +1326,7 @@ Ast parse_rel_expr() { u32 op = ctx.tok; next(); node = ast_make_binop(op, node, parse_add_expr()); + ast_type_compat(node, ctx.type_bool); } return node; } @@ -1238,6 +1338,7 @@ Ast parse_and_expr() { while (ctx.tok == tAND) { next(); node = ast_make_binop(tAND, node, parse_rel_expr()); + ast_type_compat(node, ctx.type_bool); } } return node; @@ -1249,6 +1350,7 @@ Ast parse_expr() { while (ctx.tok == tOR) { next(); node = ast_make_binop(tOR, node, parse_and_expr()); + ast_type_compat(node, ctx.type_bool); } } return node; @@ -1516,14 +1618,15 @@ Ast parse_local_var() { if (ctx.tok == tASSIGN) { next(); node = ast_make_simple(AST_EXPR, 0); - node->c0 = ast_make_binop(tASSIGN, ast_make_name(name, sym), parse_expr()); + node->c0 = ast_make_binop(tASSIGN, ast_make_symbol(name, sym), parse_expr()); + node->c0->type = node->c0->c1->type; } require(tSEMI); return node; } -Ast parse_global_var() { +void parse_global_var() { String name = parse_name("variable name"); // TODO: allow type inference Type type = parse_type(false); @@ -1535,8 +1638,6 @@ Ast parse_global_var() { ctx.gp = ctx.gp + type->size; ctx.gp = (ctx.gp + 3) & (~3); - Ast node = ast_make_simple(AST_GLOBAL, 0); - if (ctx.tok == tASSIGN) { next(); u32* data = ctx.data + (sym->value >> 2); @@ -1552,16 +1653,12 @@ Ast parse_global_var() { } else { Ast expr = parse_expr(); i32 v = ast_get_const_i32(expr); - node->c0 = expr; + // DISCARD expr // VALIDATE compatible integer type ctx.data[sym->value >> 2] = v; } } require(tSEMI); - node->name = name; - node->type = type; - node->sym = sym; - return node; } Ast parse_expr_statement() { @@ -1574,16 +1671,19 @@ Ast parse_expr_statement() { next(); right = parse_expr(); node = ast_make_binop(tASSIGN, left, right); + node->type = node->c1->type; } else if ((ctx.tok & tcMASK) == tcAEQOP) { u32 op = ctx.tok; // - tADDEQ; next(); right = parse_expr(); node = ast_make_binop(op, left, right); + // TODO: type prop, convert x op= y to x = x op y } else if ((ctx.tok & tcMASK) == tcMEQOP) { u32 op = ctx.tok; // - tMULEQ; next(); right = parse_expr(); node = ast_make_binop(op, left, right); + // TODO: type prop, convert x op= y to x = x op y } else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) { u32 op; if (ctx.tok == tINC) { @@ -1594,7 +1694,10 @@ Ast parse_expr_statement() { next(); right = ast_make_const(1, ctx.type_i32); right = ast_make_binop(op, left, right); + right->type = ctx.type_i32; node = ast_make_binop(tASSIGN, left, right); + node->type = ctx.type_i32; + // TODO: check type? } else { node = left; } @@ -1670,13 +1773,14 @@ Ast parse_function_body(Symbol fn) { Symbol parse_param(String fname, u32 n, Symbol first, Symbol last) { String pname = parse_name("parameter name"); Type ptype = parse_type(false); - Symbol param = symbol_make(SYM_PARAM, 0, pname, ptype, /* 4 + */ n * 4); // arrays and structs are always passed as reference parameters if ((ptype->kind == TYPE_ARRAY) || (ptype->kind == TYPE_RECORD)) { - param->flags |= SYM_IS_REFERENCE; + ptype = type_make_ptr(ptype); } + Symbol param = symbol_make(SYM_PARAM, 0, pname, ptype, /* 4 + */ n * 4); + Symbol sym = first; while (sym != nil) { if (sym->name == param->name) { @@ -1782,7 +1886,7 @@ Ast parse_function() { return nil; } -Ast parse_type_def() { +void parse_type_def() { String name = parse_name("type name"); Type type = parse_type(false); Type prev = type_find(name); @@ -1802,16 +1906,9 @@ Ast parse_type_def() { type = prev; } require(tSEMI); - Ast node = ast_make_simple(AST_TYPEDEF, 0); - node->name = name; - node->type = type; - return node; } -Ast parse_enum_def() { - Ast decl = ast_make_simple(AST_ENUMDEF, 0); - Ast last = decl; - +void parse_enum_def() { require(tOBRACE); u32 val = 0; while (ctx.tok != tCBRACE) { @@ -1824,47 +1921,42 @@ Ast parse_enum_def() { next(); Ast expr = parse_expr(); val = ast_get_const_i32(expr); - //XXX ensure const and set val + // typecheck? discard subtree } require(tCOMMA); - Ast node = ast_make(AST_FIELD, val, name, sym, ctx.type_i32); - last->c2 = node; - last = node; symbol_add_global(symbol_make(SYM_CONST, 0, name, ctx.type_i32, val)); val++; } require(tCBRACE); require(tSEMI); - return decl; } Ast parse_program() { Ast program = ast_make_simple(AST_PROGRAM, 0); Ast last = program; - Ast node = nil; next(); for (;;) { if (ctx.tok == tENUM) { next(); - node = parse_enum_def(); + parse_enum_def(); } else if (ctx.tok == tTYPE) { next(); - node = parse_type_def(); + parse_type_def(); } else if (ctx.tok == tFUNC) { next(); - node = parse_function(); + Ast node = parse_function(); + if (node != nil) { + last->c2 = node; + last = node; + } } else if (ctx.tok == tVAR) { next(); - node = parse_global_var(); + parse_global_var(); } else if (ctx.tok == tEOF) { return program; } else { expected("function, variable, or type definition"); } - if (node != nil) { - last->c2 = node; - last = node; - } } } @@ -1945,10 +2037,6 @@ int _ast_dump(FILE* fp, Ast node, u32 indent, bool dumplist, Ast mark) { type_dump_compact(fp, node->type); } fprintf(fp, "\n"); - } else if (node->kind == AST_TYPEDEF) { - fprintf(fp, "'%s' ", node->name->text); - type_dump_compact(fp, node->type); - fprintf(fp, "\n"); } else { if (node->name) { fprintf(fp,"'%s' ",node->name->text);