spl

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 6eb5ce36eb2f82ce70a6b73b4c2644089f0a2d90
parent 49e90c695489ce2c08f5f11fd4d1586ef9c9a4ca
Author: Brian Swetland <swetland@frotz.net>
Date:   Thu, 19 Oct 2023 18:26:28 -0700

compiler: parse and dump AST

Diffstat:
Mcompiler/compiler.spl | 388++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 285 insertions(+), 103 deletions(-)

diff --git a/compiler/compiler.spl b/compiler/compiler.spl @@ -22,6 +22,14 @@ fn strcpyn(dst str, src str, len u32) { } } +fn strlen(s str) u32 { + var n u32 = 0; + while s[n] != 0 { + n++; + } + return n; +} + // ================================================================ // data types @@ -103,19 +111,20 @@ enum AstKind { AST_BREAK, AST_CONTINUE, AST_RETURN, // l=EXPR - AST_IF, // l=EXPR r=THENELSE + AST_IF, // l=CASE* // sub-part of if - AST_THENELSE, // l=BLOCK r=BLOCK|IF + AST_CASE, // l=EXPR|nil r=BLOCK // expressions AST_SYMBOL, - AST_CONST, - AST_STRING, + AST_CONST, // numeric constant + AST_STRING, // string constant AST_DEREF, // l=EXPR type: pointer-to-... - AST_INDEX, // l=EXPR type: array-of-... c1=EXPR index - AST_FIELD, // l=EXPR type: struct c1=SYMBOL field + AST_INDEX, // l=EXPR type: array-of-... r=EXPR index + AST_FIELD, // l=EXPR type: struct r=SYMBOL field AST_ADDROF, // l=EXPR type: lvalue AST_CALL, // l=NAME r=EXPR* - AST_ASSIGN, + AST_ASSIGN, // l=lhsEXPR r=rhsEXPR + AST_NEW, // l=TYPE // binary expressions // Rel Ops (maintain order matched w/ lexer) AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE, @@ -131,9 +140,9 @@ enum AstKind { var ast_kind []str = { "PROGRAM", "FUNC", "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE", - "RETURN", "IF", "THENELSE", + "RETURN", "IF", "CASE", "SYMBOL", "CONST", "STRING", - "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", + "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW", "EQ", "NE", "LT", "LE", "GT", "GE", "ADD", "SUB", "OR", "XOR", "MUL", "DIV", "MOD", "AND", "LSL", "LSR", @@ -161,7 +170,7 @@ fn ast_is_expr(kind AstKind) bool { } fn ast_is_stmt(kind AstKind) bool { - return (kind >= AST_EXPR) && (kind <= AST_THENELSE); + return (kind >= AST_EXPR) && (kind <= AST_CASE); } struct Ast { @@ -258,8 +267,8 @@ struct Context { typelist *Type, // all types scope *Scope, // top of Scope stack - cur_fn *Scope, // args of fn being parsed global *Scope, // the global scope + cur_fn *Symbol, // fn being parsed idn_if *String, // identifier strings idn_fn *String, @@ -282,6 +291,7 @@ struct Context { type_void *Type, type_bool *Type, type_str *Type, + type_nil *Type, type_u32 *Type, type_i32 *Type, type_u8 *Type, @@ -454,7 +464,12 @@ fn ast_make_const(value u32, type Type) Ast { } fn ast_make_symbol(name String, sym Symbol) Ast { - return ast_make(AST_SYMBOL, 0, name, sym, sym.type); + // TODO maybe handle nil sym differently? + var type Type = nil; + if sym != nil { + type = sym.type; + } + return ast_make(AST_SYMBOL, 0, name, sym, type); } fn ctx_init() { @@ -480,6 +495,7 @@ fn ctx_init() { ctx.type_void = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0); ctx.type_bool = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0); + ctx.type_nil = type_make(string_make("nil", 3), TYPE_NIL, nil, nil, 0); ctx.type_str = type_make(string_make("str", 3), TYPE_STR, nil, nil, 0); ctx.type_u32 = type_make(string_make("u32", 3), TYPE_U32, nil, nil, 0); ctx.type_i32 = type_make(string_make("i32", 3), TYPE_I32, nil, nil, 0); @@ -796,11 +812,11 @@ fn _next() Token { } } -fn token_printstr(fd i32) { +fn printstr(fd i32, s str) { var n u32 = 0; writec(fd, '"'); - while n < 256 { - var ch u32 = ctx.tmp[n]; + while true { + var ch u32 = s[n]; if ch == 0 { break; } else if (ch < ' ') || (ch > '~') { @@ -826,7 +842,7 @@ fn token_print(fd i32) { } else if ctx.tok == tEOL { writec(fd, '\n'); } else if ctx.tok == tSTR { - token_printstr(fd); + printstr(fd, ctx.tmp); } else { writes(fd, tnames[ctx.tok]); } @@ -865,153 +881,177 @@ fn parse_name(what str) String { return s; } -fn parse_ident() { - var name String = ctx.ident; +fn parse_symbol(what str) Ast { + var name String = parse_name(what); var sym Symbol = symbol_find(name); - next(); + return ast_make_symbol(name, sym); +} - if (sym == nil) && (ctx.tok != tOPAREN) { - error("undefined identifier '", @str name.text, "'"); +fn parse_ident() Ast { + var node Ast = parse_symbol("identifier"); + + if (node.sym == nil) && (ctx.tok != tOPAREN) { + error("undefined identifier '", @str node.name.text, "'"); } if ctx.tok == tOPAREN { // function call next(); + node = ast_make_unop(AST_CALL, node); + var last Ast = nil; + while ctx.tok != tCPAREN { if ctx.tok == tAT { // type annotation for varargs hack next(); parse_type(false); } - parse_expr(); + + var expr Ast = parse_expr(); + if last != nil { + last.next = expr; + } else { + node.right = expr; + } + last = expr; + if ctx.tok != tCPAREN { require(tCOMMA); } } next(); - } else { - if sym.kind == SYMBOL_DEF { - // constant - } else { - // variable - } } while true { if ctx.tok == tDOT { // field access next(); - parse_name("field name"); + node = ast_make_binop(AST_FIELD, node, parse_symbol("field name")); } else if ctx.tok == tOBRACK { // array access next(); - parse_expr(); + node = ast_make_binop(AST_INDEX, node, parse_expr()); require(tCBRACK); } else { - return; + return node; } } + + return nil; // unreachable } -fn parse_primary_expr() { +fn parse_primary_expr() Ast { + var node Ast; if ctx.tok == tNUM { - // number + node = ast_make_const(ctx.num, ctx.type_i32); + next(); } else if ctx.tok == tSTR { - // string + node = ast_make_simple(AST_STRING, 0); + node.name = string_make(ctx.tmp, strlen(ctx.tmp)); + next(); } else if ctx.tok == tTRUE { - // 1 + node = ast_make_const(1, ctx.type_bool); + next(); } else if ctx.tok == tFALSE { - // 0 + node = ast_make_const(0, ctx.type_bool); + next(); } else if ctx.tok == tNIL { - // 0 + node = ast_make_const(0, ctx.type_nil); + next(); } else if ctx.tok == tOPAREN { next(); - parse_expr(); + node = parse_expr(); require(tCPAREN); - return; } else if ctx.tok == tNEW { next(); require(tOPAREN); - parse_name("type name"); + node = ast_make_simple(AST_NEW, 0); + node.name = parse_name("type name"); require(tCPAREN); - return; } else if ctx.tok == tIDN { - parse_ident(); - return; + node = parse_ident(); } else { error("invalid expression"); } - next(); + return node; } -fn parse_unary_expr() { +fn parse_unary_expr() Ast { var op u32 = ctx.tok; if op == tPLUS { next(); - parse_unary_expr(); + return parse_unary_expr(); } else if op == tMINUS { next(); - parse_unary_expr(); + return ast_make_unop(AST_NEG, parse_unary_expr()); } else if op == tBANG { next(); - parse_unary_expr(); + return ast_make_unop(AST_BOOL_NOT, parse_unary_expr()); } else if op == tNOT { next(); - parse_unary_expr(); + return ast_make_unop(AST_NOT, parse_unary_expr()); } else if op == tAMP { error("dereference not supported"); - next(); - parse_unary_expr(); + //next(); + //parse_unary_expr(); } else { - parse_primary_expr(); + return parse_primary_expr(); } + return nil; // unreachable } -fn parse_mul_expr() { - parse_unary_expr(); +fn parse_mul_expr() Ast { + var node Ast = parse_unary_expr(); while ctx.tok & tcMASK == tcMULOP { + var op u32 = (ctx.tok - tSTAR) + AST_MUL; next(); - parse_unary_expr(); + node = ast_make_binop(op, node, parse_unary_expr()); } + return node; } -fn parse_add_expr() { - parse_mul_expr(); +fn parse_add_expr() Ast { + var node Ast = parse_mul_expr(); while ctx.tok & tcMASK == tcADDOP { + var op u32 = (ctx.tok - tPLUS) + AST_ADD; next(); - parse_mul_expr(); + node = ast_make_binop(op, node, parse_mul_expr()); } + return node; } -fn parse_rel_expr() { - parse_add_expr(); +fn parse_rel_expr() Ast { + var node Ast = parse_add_expr(); if ctx.tok & tcMASK == tcRELOP { + var op u32 = (ctx.tok - tEQ) + AST_EQ; next(); - parse_add_expr(); + node = ast_make_binop(op, node, parse_add_expr()); } + return node; } -fn parse_and_expr() { - parse_rel_expr(); +fn parse_and_expr() Ast { + var node Ast = parse_rel_expr(); if ctx.tok == tAND { while ctx.tok == tAND { next(); - parse_rel_expr(); + node = ast_make_binop(AST_BOOL_AND, node, parse_rel_expr()); } } + return node; } -fn parse_expr() { - parse_and_expr(); +fn parse_expr() Ast { + var node Ast = parse_and_expr(); if ctx.tok == tOR { while ctx.tok == tOR { next(); - parse_and_expr(); + node = ast_make_binop(AST_BOOL_OR, node, parse_and_expr()); } } + return node; } - fn parse_struct_type(name String) Type { var type Type = type_find(name); @@ -1104,71 +1144,85 @@ fn parse_type(fwd_ref_ok u32) Type { return nil; } -fn parse_while() { +fn parse_while() Ast { // while expr { block } - parse_expr(); + var expr Ast = parse_expr(); require(tOBRACE); scope_push(SCOPE_LOOP); - parse_block(); + var block Ast = parse_block(); scope_pop(); + return ast_make_binop(AST_WHILE, expr, block); } -fn parse_if() { +fn parse_if() Ast { // if expr { block } - parse_expr(); + + var expr Ast = parse_expr(); require(tOBRACE); scope_push(SCOPE_BLOCK); - parse_block(); + var block Ast = parse_block(); scope_pop(); + + var last Ast = ast_make_binop(AST_CASE, expr, block); + var stmt Ast = ast_make_unop(AST_IF, last); + while ctx.tok == tELSE { // ... else ... next(); if ctx.tok == tIF { // ... if expr { block } next(); - parse_expr(); + expr = parse_expr(); require(tOBRACE); scope_push(SCOPE_BLOCK); - parse_block(); + block = parse_block(); scope_pop(); + last.next = ast_make_binop(AST_CASE, expr, block); + last = last.next; } else { // ... { block } require(tOBRACE); scope_push(SCOPE_BLOCK); - parse_block(); + block = parse_block(); scope_pop(); + last.next = ast_make_binop(AST_CASE, nil, block); break; } } + return stmt; } -fn parse_return() { +fn parse_return() Ast { // TODO check for return required/type + var node Ast = ast_make_simple(AST_RETURN, 0); if ctx.tok == tSEMI { next(); } else { // error("return types do not match"); - parse_expr(); + node.left = parse_expr(); require(tSEMI); } + return node; } -fn parse_break() { +fn parse_break() Ast { // TODO: break-to-labeled-loop support var scope Scope = scope_find(SCOPE_LOOP); if scope == nil { error("break must be used from inside a looping construct"); } require(tSEMI); + return ast_make_simple(AST_BREAK, 0); } -fn parse_continue() { +fn parse_continue() Ast { // TODO: continue-to-labeled-loop support var scope Scope = scope_find(SCOPE_LOOP); if scope == nil { error("continue must be used from inside a looping construct"); } require(tSEMI); + return ast_make_simple(AST_CONTINUE, 0); } fn parse_struct_init(sym Symbol) { @@ -1215,6 +1269,8 @@ fn parse_array_init(sym Symbol) { } fn parse_var() { + // TODO: global vs local + // TODO: generate assignment/initialization var name String = parse_name("variable name"); var type Type = parse_type(false); var sym Symbol = symbol_make(name, type); @@ -1240,43 +1296,71 @@ fn parse_var() { require(tSEMI); } -fn parse_expr_statement() { - parse_expr(); +fn _parse_expr_statement() Ast { + var node Ast = parse_expr(); + var expr Ast = nil; + var op u32; + if ctx.tok == tASSIGN { + // basic assignment next(); - parse_expr(); + return ast_make_binop(AST_ASSIGN, node, parse_expr()); } else if (ctx.tok & tcMASK) == tcAEQOP { + // +=, etc + op = (ctx.tok - tADDEQ) + AST_ADD; next(); - parse_expr(); + expr = parse_expr(); } else if (ctx.tok & tcMASK) == tcMEQOP { + // *=, etc + op = (ctx.tok - tMULEQ) + AST_MUL; next(); - parse_expr(); - } else if (ctx.tok == tINC) || (ctx.tok == tDEC) { + expr = parse_expr(); + } else if (ctx.tok == tINC) { + op = AST_ADD; next(); + expr = ast_make_const(1, ctx.type_i32); + } else if (ctx.tok == tDEC) { + op = AST_SUB; + expr = ast_make_const(1, ctx.type_i32); + } else { + // simple expression + return node; } + + // TODO duplicate node instead of sharing it + expr = ast_make_binop(op, node, expr); + return ast_make_binop(AST_ASSIGN, node, expr); +} + +fn parse_expr_statement() Ast { + var stmt Ast = ast_make_unop(AST_EXPR, _parse_expr_statement()); require(tSEMI); + return stmt; } -fn parse_block() { +fn parse_block() Ast { + var block Ast = ast_make_simple(AST_BLOCK, 0); + var last Ast = nil; + var node Ast; while true { if ctx.tok == tCBRACE { next(); break; } else if ctx.tok == tRETURN { next(); - parse_return(); + node = parse_return(); } else if ctx.tok == tBREAK { next(); - parse_break(); + node = parse_break(); } else if ctx.tok == tCONTINUE { next(); - parse_continue(); + node = parse_continue(); } else if ctx.tok == tWHILE { next(); - parse_while(); + node = parse_while(); } else if ctx.tok == tIF { next(); - parse_if(); + node = parse_if(); } else if ctx.tok == tVAR { next(); parse_var(); @@ -1285,9 +1369,16 @@ fn parse_block() { // empty statement continue; } else { - parse_expr_statement(); + node = parse_expr_statement(); + } + if last == nil { + block.left = node; + } else { + last.next = node; } + last = node; } + return block; } fn parse_param(fname String) Symbol { @@ -1299,7 +1390,19 @@ fn parse_param(fname String) Symbol { return symbol_make(pname, ptype); } -fn parse_fn() { +fn parse_fn_body(sym Symbol) Ast { + ctx.cur_fn = sym; + require(tOBRACE); + scope_push(SCOPE_FUNC); // parameters, from fn + scope_push(SCOPE_BLOCK); + var node Ast = parse_block(); + scope_pop(); // stow in node? + scope_pop(); + ctx.cur_fn = nil; + return node; +} + +fn parse_fn() Ast { var fname String = parse_name("function name"); var rtype Type = ctx.type_void; @@ -1322,12 +1425,14 @@ fn parse_fn() { var sym Symbol = symbol_make_global(fname, rtype); sym.kind = SYMBOL_FN; - require(tOBRACE); - scope_push(SCOPE_BLOCK); - parse_block(); - scope_pop(); + var node Ast = ast_make_simple(AST_FUNC, 0); + node.name = fname; + node.sym = sym; + node.left = parse_fn_body(sym); scope_pop(); + + return node; } fn parse_enum_def() { @@ -1349,6 +1454,7 @@ fn parse_enum_def() { if ctx.tok == tASSIGN { next(); parse_expr(); + // TODO val <- expr } else { val++; } @@ -1358,7 +1464,10 @@ fn parse_enum_def() { require(tSEMI); } -fn parse_program() { +fn parse_program() Ast { + var program Ast = ast_make_simple(AST_PROGRAM, 0); + var last Ast = nil; + while true { if ctx.tok == tENUM { next(); @@ -1370,16 +1479,87 @@ fn parse_program() { require(tSEMI); } else if ctx.tok == tFN { next(); - parse_fn(); + var node Ast = parse_fn(); + if last == nil { + program.left = node; + } else { + last.next = node; + } + last = node; } else if ctx.tok == tVAR { next(); parse_var(); } else if ctx.tok == tEOF { - return; + break; } else { expected("function, variable, or type definition"); } } + return program; +} + +var indent u32 = 0; + +fn print_indent(fd i32) { + var n u32 = 0; + while n < indent { + writes(fd, " "); + n++; + } +} + +fn dump_ast_node(fd i32, node Ast) { + var kind AstKind = node.kind; + var child Ast = nil; + + print_indent(fd); + writes(fd, ast_kind[kind]); + writes(fd, " "); + indent++; + if kind == AST_CONST { + writex(fd, node.ival); + } else if kind == AST_FUNC { + writes(fd, node.name.text); + } else if kind == AST_STRING { + printstr(fd, node.name.text); + } else if kind == AST_SYMBOL { + writes(fd, node.name.text); + } + writes(fd, "\n"); + + if kind == AST_CALL { + dump_ast_node(fd, node.left); + child = node.right; + } else if kind == AST_CASE { + if node.left == nil { + print_indent(fd); + writes(fd, "ELSE\n"); + } else { + dump_ast_node(fd, node.left); + } + child = node.right; + } else if (kind == AST_PROGRAM) || (kind == AST_BLOCK) || (kind == AST_IF) { + child = node.left; + } + + if child != nil { + while child != nil { + dump_ast_node(fd, child); + child = child.next; + } + } else { + if node.left != nil { + dump_ast_node(fd, node.left); + } + if node.right != nil { + dump_ast_node(fd, node.right); + } + } + indent = indent - 1; +} + +fn dump_ast(node Ast) { + dump_ast_node(1, node); } fn start() i32 { @@ -1387,7 +1567,9 @@ fn start() i32 { scan(); next(); - parse_program(); + var ast Ast = parse_program(); + + dump_ast(ast); //while(next() != tEOF) { // token_print(1);