spl

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit ad7b5d853250e600a291b35eab85c2e20af508e3
parent e364f81a9c842e5f90fbf91da782716599839982
Author: Brian Swetland <swetland@frotz.net>
Date:   Fri, 20 Oct 2023 17:03:40 -0700

compiler1: split into multiple source files

Diffstat:
Dcompiler/compiler.spl | 1583-------------------------------------------------------------------------------
Acompiler/lexer.spl | 349+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acompiler/main.spl | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acompiler/parser.spl | 651+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acompiler/stdlib.spl | 29+++++++++++++++++++++++++++++
Acompiler/types.spl | 483+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 1587 insertions(+), 1583 deletions(-)

diff --git a/compiler/compiler.spl b/compiler/compiler.spl @@ -1,1583 +0,0 @@ -// Copyright 2023, Brian Swetland <swetland@frotz.net> -// Licensed under the Apache License, Version 2.0. - -// utility functions - -fn strneq(s1 str, s2 str, len u32) bool { - var n u32 = 0; - while n < len { - if s1[n] != s2[n] { - return false; - } - n++; - } - return true; -} - -fn strcpyn(dst str, src str, len u32) { - var n u32 = 0; - while n < len { - dst[n] = src[n]; - n++; - } -} - -fn strlen(s str) u32 { - var n u32 = 0; - while s[n] != 0 { - n++; - } - return n; -} - -// ================================================================ -// data types - -struct String { - next *String, - len u32, - text [256]u8, -}; - -enum SymbolKind { - SYMBOL_VAR, - SYMBOL_FLD, // struct field - SYMBOL_PTR, // struct *field - SYMBOL_DEF, // enum - SYMBOL_FN, -}; - -struct Symbol { - next *Symbol, - name *String, - type *Type, - kind SymbolKind, -}; - -enum ScopeKind { - SCOPE_GLOBAL, - SCOPE_FUNC, - SCOPE_BLOCK, - SCOPE_LOOP, - SCOPE_STRUCT, -}; - -struct Scope { - parent *Scope, - first *Symbol, - last *Symbol, - kind ScopeKind, -}; - -enum TypeKind { - TYPE_VOID, - TYPE_BOOL, - TYPE_U8, - TYPE_U32, - TYPE_I32, - TYPE_NIL, - TYPE_POINTER, - TYPE_ARRAY, - TYPE_SLICE, - TYPE_STR, - TYPE_STRUCT, - TYPE_FN, - TYPE_ENUM, - TYPE_UNDEFINED, -}; - -struct Type { - next *Type, - name *String, - of *Type, // for slice, array, ptr, fn (return type) - list *Symbol, // for struct (fields), fn (params) - kind TypeKind, - count u32, -}; - -// ================================================================ -// Abstract Syntax Tree - -enum AstKind { -// top node - AST_PROGRAM, // l=FUNC* -// program components (chained into a list by next) - AST_FUNC, // l=BLOCK -// container of statements - AST_BLOCK, // l=STMT* -// statements (chained into a list by next) - AST_EXPR, // l=EXPR - AST_WHILE, // l=EXPR r=BLOCK - AST_BREAK, - AST_CONTINUE, - AST_RETURN, // l=EXPR - AST_IF, // l=CASE* -// sub-parts of if - AST_CASE, // l=EXPR r=BLOCK - AST_ELSE, // l=BLOCK -// expressions - AST_SYMBOL, - AST_CONST, // numeric constant - AST_STRING, // string constant - AST_DEREF, // l=EXPR type: pointer-to-... - AST_INDEX, // l=EXPR type: array-of-... r=EXPR index - AST_FIELD, // l=EXPR type: struct r=SYMBOL field - AST_ADDROF, // l=EXPR type: lvalue - AST_CALL, // l=NAME r=EXPR* - AST_ASSIGN, // l=lhsEXPR r=rhsEXPR - AST_NEW, // l=TYPE -// binary expressions - // Rel Ops (maintain order matched w/ lexer) - AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE, - // Add Ops (maintain order matched w/ lexer) - AST_ADD, AST_SUB, AST_OR, AST_XOR, - // Mul Ops (maintain order matched w/ lexer) - AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR, - // uncategorized ops - AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG, - AST_KIND_COUNT, -}; - -var ast_kind []str = { - "PROGRAM", "FUNC", - "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE", - "RETURN", "IF", "CASE", "ELSE", - "SYMBOL", "CONST", "STRING", - "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW", - "EQ", "NE", "LT", "LE", "GT", "GE", - "ADD", "SUB", "OR", "XOR", - "MUL", "DIV", "MOD", "AND", "LSL", "LSR", - "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG", -}; - -fn ast_is_relop(kind AstKind) bool { - return (kind >= AST_EQ) && (kind <= AST_GE); -} - -fn ast_is_addop(kind AstKind) bool { - return (kind >= AST_ADD) && (kind <= AST_XOR); -} - -fn ast_is_mulop(kind AstKind) bool { - return (kind >= AST_MUL) && (kind <= AST_LSR); -} - -fn ast_is_binop(kind AstKind) bool { - return (kind >= AST_EQ) && (kind <= AST_LSR); -} - -fn ast_is_expr(kind AstKind) bool { - return (kind >= AST_SYMBOL); -} - -fn ast_is_stmt(kind AstKind) bool { - return (kind >= AST_EXPR) && (kind <= AST_ELSE); -} - -struct Ast { - kind AstKind, - - left *Ast, - right *Ast, - next *Ast, // intrusive list - - ival u32, - name *String, - sym *Symbol, - type *Type, - - srcloc u32, // linenumber for now -}; - - -// ================================================================ -// lexical scanner tokens - -// token classes (tok & tcMASK) -enum TokenClass{ - tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, - tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, -}; - -enum Token { - // EndMarks, Braces, Brackets Parens - tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, - // RelOps (do not reorder) - tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, - // AddOps (do not reorder) - tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, - // MulOps (do not reorder) - tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, - // AsnOps (do not reorder) - tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, - tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, - // Various, UnaryNot, LogicalOps, - tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, - tASSIGN, tINC, tDEC, - tAT, - // Keywords - tNEW, tFN, tSTRUCT, tVAR, tENUM, - tIF, tELSE, tWHILE, - tBREAK, tCONTINUE, tRETURN, - tFOR, tSWITCH, tCASE, - tTRUE, tFALSE, tNIL, - tIDN, tNUM, tSTR, - // used internal to the lexer but never returned - tSPC, tINV, tDQT, tSQT, tMSC, -}; - -var tnames []str = { - "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", - "==", "!=", "<", "<=", ">", ">=", "", "", - "+", "-", "|", "^", "", "", "", "", - "*", "/", "%", "&", "<<", ">>", "", "", - "+=", "-=", "|=", "^=", "", "", "", "", - "*=", "/=", "%=", "&=", "<<=", ">>=", "", "", - ";", ":", ".", ",", "~", "&&", "||", "!", - "=", "++", "--", - "@", - "new", "fn", "struct", "var", "enum", - "if", "else", "while", - "break", "continue", "return", - "for", "switch", "case", - "true", "false", "nil", - "<ID>", "<NUM>", "<STR>", - "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", -}; - - -// ================================================================ -// lexer / parser / compiler context - -struct Context { - filename str, // filename of active source - outname str, // base name for output files - - linenumber u32, // line number of most recent line - lineoffset u32, // position of start of most recent line - byteoffset u32, // position of the most recent character - flags u32, - cc u32, // scanner: next character - - tok Token, // most recent token - num u32, // for tNUM - tmp [256]u8, // for tIDN, tSTR - ident *String, // for tSTR - - stringlist *String, // intern table - typelist *Type, // all types - - scope *Scope, // top of Scope stack - global *Scope, // the global scope - cur_fn *Symbol, // fn being parsed - - idn_if *String, // identifier strings - idn_fn *String, - idn_for *String, - idn_var *String, - idn_nil *String, - idn_new *String, - idn_case *String, - idn_else *String, - idn_enum *String, - idn_true *String, - idn_break *String, - idn_while *String, - idn_false *String, - idn_switch *String, - idn_struct *String, - idn_return *String, - idn_continue *String, - - type_void *Type, - type_bool *Type, - type_str *Type, - type_nil *Type, - type_u32 *Type, - type_i32 *Type, - type_u8 *Type, -}; - -var ctx Context; - -fn string_make(text str, len u32) String { - var s String = ctx.stringlist; - while s != nil { - if (s.len == len) && strneq(text, s.text, len) { - return s; - } - s = s.next; - } - s = new(String); - s.len = len; - strcpyn(s.text, text, len + 1); - s.next = ctx.stringlist; - ctx.stringlist = s; - return s; -} - -fn scope_push(kind ScopeKind) Scope { - var scope Scope = new(Scope); - scope.first = nil; - scope.last = nil; - scope.parent = ctx.scope; - scope.kind = kind; - ctx.scope = scope; - return scope; -} - -// returns symbol list for the popped scope -fn scope_pop() Symbol { - var scope Scope = ctx.scope; - ctx.scope = scope.parent; - return scope.first; -} - -fn scope_find(kind ScopeKind) Scope { - var scope Scope = ctx.scope; - while scope != nil { - if scope.kind == kind { - return scope; - } - scope = scope.parent; - } - return nil; -} - -fn symbol_find_in(name String, scope Scope) Symbol { - var sym Symbol = scope.first; - while sym != nil { - if sym.name == name { - return sym; - } - sym = sym.next; - } - return nil; -} - -// find the first surrounding scope of a specified kind -fn symbol_find(name String) Symbol { - var scope Scope = ctx.scope; - while scope != nil { - var sym Symbol = symbol_find_in(name, scope); - if sym != nil { - return sym; - } - scope = scope.parent; - } - return nil; -} - -fn symbol_make_in_scope(name String, type Type, scope Scope) Symbol { - var sym Symbol = new(Symbol); - sym.name = name; - sym.type = type; - sym.next = nil; - sym.kind = SYMBOL_VAR; - if scope.first == nil { - scope.first = sym; - } else { - scope.last.next = sym; - } - scope.last = sym; - return sym; -} - -fn symbol_make_global(name String, type Type) Symbol { - return symbol_make_in_scope(name, type, ctx.global); -} - -fn symbol_make(name String, type Type) Symbol { - return symbol_make_in_scope(name, type, ctx.scope); -} - -fn type_make(name String, kind TypeKind, of Type, list Symbol, count u32) Type { - var type Type = new(Type); - type.name = name; - type.of = of; - type.list = list; - type.kind = kind; - type.count = count; - if name != nil { - type.next = ctx.typelist; - ctx.typelist = type; - } else { - type.next = nil; - } - return type; -} - -fn type_find(name String) Type { - var t Type = ctx.typelist; - while t != nil { - if t.name == name { - return t; - } - t = t.next; - } - return nil; -} - -fn type_find_field(type Type, name String) Symbol { - if type.kind != TYPE_STRUCT { - error("not a struct"); - } - var s Symbol = type.list; - while s != nil { - if s.name == name { - return s; - } - s = s.next; - } - error("struct has no such field '", @str name.text, "'"); - return nil; -} - -fn ast_make(kind AstKind, ival u32, name String, sym Symbol, type Type) Ast { - var node Ast = new(Ast); - node.kind = kind; - node.ival = ival; - node.name = name; - node.sym = sym; - node.type = type; - node.srcloc = ctx.linenumber; - return node; -} - -fn ast_make_lr(kind AstKind, left Ast, right Ast) Ast { - var node Ast = ast_make(kind, 0, nil, nil, nil); - node.left = left; - node.right = right; - return node; -} - -fn ast_make_l(kind AstKind, child Ast) Ast { - var node Ast = ast_make(kind, 0, nil, nil, nil); - node.left = child; - return node; -} - -fn ast_make_simple(kind AstKind, x u32) Ast { - return ast_make(kind, x, nil, nil, nil); -} - -fn ast_make_const(value u32, type Type) Ast { - return ast_make(AST_CONST, value, nil, nil, type); -} - -fn ast_make_symbol(name String, sym Symbol) Ast { - // TODO maybe handle nil sym differently? - var type Type = nil; - if sym != nil { - type = sym.type; - } - return ast_make(AST_SYMBOL, 0, name, sym, type); -} - -fn ctx_init() { - ctx = new(Context); - - ctx.idn_if = string_make("if", 2); - ctx.idn_fn = string_make("fn", 2); - ctx.idn_for = string_make("for", 3); - ctx.idn_var = string_make("var", 3); - ctx.idn_nil = string_make("nil", 3); - ctx.idn_new = string_make("new", 3); - ctx.idn_case = string_make("case", 4); - ctx.idn_else = string_make("else", 4); - ctx.idn_enum = string_make("enum", 4); - ctx.idn_true = string_make("true", 4); - ctx.idn_break = string_make("break", 5); - ctx.idn_while = string_make("while", 5); - ctx.idn_false = string_make("false", 5); - ctx.idn_switch = string_make("switch", 6); - ctx.idn_struct = string_make("struct", 6); - ctx.idn_return = string_make("return", 6); - ctx.idn_continue = string_make("continue", 8); - - ctx.type_void = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0); - ctx.type_bool = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0); - ctx.type_nil = type_make(string_make("nil", 3), TYPE_NIL, nil, nil, 0); - ctx.type_str = type_make(string_make("str", 3), TYPE_STR, nil, nil, 0); - ctx.type_u32 = type_make(string_make("u32", 3), TYPE_U32, nil, nil, 0); - ctx.type_i32 = type_make(string_make("i32", 3), TYPE_I32, nil, nil, 0); - ctx.type_u8 = type_make(string_make("u8", 2), TYPE_U8, nil, nil, 0); - - scope_push(SCOPE_GLOBAL); - ctx.global = ctx.scope; - - ctx.linenumber = 1; - ctx.filename = "<stdin>"; -} - -fn error_begin() i32 { - writes(2, "\n"); - writes(2, ctx.filename); - writes(2, ":"); - writei(2, ctx.linenumber); - writes(2, ": error: "); - return 2; -} - -fn error_end() { - writes(2, "\n"); - os_exit(1); -} - -// ================================================================ -// lexical scanner - -// currently unrecognized: # $ ? \ ` -var lextab [256]u8 = { - tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, -// ! " # $ % & ' - tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, -// ( ) * + , - . / - tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, -// 0 1 2 3 4 5 6 7 - tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, -// 8 9 : ; < = > ? - tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, -// @ A B C D E F G - tAT, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, -// H I J K L M N O - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, -// P Q R S T U V W - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, -// X Y Z [ \ ] ^ _ - tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, -// ` a b c d e f g - tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, -// h i j k l m n o - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, -// p q r s t u v w - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, -// x y z { | } ~ - tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, -}; - -fn unhex(ch u32) i32 { - if (ch >= '0') && (ch <= '9') { - return ch - '0'; - } - if (ch >= 'a') && (ch <= 'f') { - return ch - 'a' + 10; - } - if (ch >= 'A') && (ch <= 'F') { - return ch - 'A' + 10; - } - return -1; -} - -fn scan() Token { - ctx.byteoffset++; - var ch i32 = readc(0); - if ch < 0 { - ctx.cc = 0; - } else { - ctx.cc = ch; - } - return ctx.cc; -} - -fn unescape(n u32) u32 { - if n == 'n' { - return 10; - } else if n == 'r' { - return 13; - } else if n == 't' { - return 9; - } else if n == '"' { - return '"'; - } else if n == '\'' { - return '\''; - } else if n == '\\' { - return '\\'; - } else if n == 'x' { - var x0 u32 = unhex(scan()); - var x1 u32 = unhex(scan()); - if (x0 < 0) || (x1 < 0) { - error("invalid hex escape"); - } - return (x0 << 4) | x1; - } else { - error("invalid escape ", n); - return 0; - } -} - -fn scan_string(cc u32, nc u32) Token { - var n u32 = 0; - while true { - if nc == '"' { - nc = scan(); - break; - } else if nc == 0 { - error("unterminated string"); - } else if nc == '\\' { - ctx.tmp[n] = unescape(scan()); - } else { - ctx.tmp[n] = nc; - } - nc = scan(); - n++; - if n == 255 { - error("constant string too large"); - } - } - ctx.tmp[n] = 0; - return tSTR; -} - -fn scan_keyword(len u32) Token { - ctx.tmp[len] = 0; - var idn String = string_make(ctx.tmp, len); - ctx.ident = idn; - - if len == 2 { - if idn == ctx.idn_if { return tIF; }; - if idn == ctx.idn_fn { return tFN; } - } else if len == 3 { - if idn == ctx.idn_for { return tFOR; } - if idn == ctx.idn_var { return tVAR; } - if idn == ctx.idn_nil { return tNIL; } - if idn == ctx.idn_new { return tNEW; } - } else if len == 4 { - if idn == ctx.idn_case { return tCASE; } - if idn == ctx.idn_else { return tELSE; } - if idn == ctx.idn_enum { return tENUM; } - if idn == ctx.idn_true { return tTRUE; } - } else if len == 5 { - if idn == ctx.idn_break { return tBREAK; } - if idn == ctx.idn_while { return tWHILE; } - if idn == ctx.idn_false { return tFALSE; } - } else if len == 6 { - if idn == ctx.idn_switch { return tSWITCH; } - if idn == ctx.idn_struct { return tSTRUCT; } - if idn == ctx.idn_return { return tRETURN; } - } else if len == 8 { - if idn == ctx.idn_continue { return tCONTINUE; } - } - return tIDN; -} - -fn scan_number(cc u32, nc u32) Token { - var n u32 = 1; - var val u32 = cc - '0'; - - if (cc == '0') && (nc == 'b') { // binary - nc = scan(); - while (nc == '0') || (nc == '1') { - val = (val << 1) | (nc - '0'); - nc = scan(); - n++; - if (n == 34) { - error("binary constant too large"); - } - } - } else if (cc == '0') && (nc == 'x') { // hex - nc = scan(); - while true { - var tmp i32 = unhex(nc); - if tmp == -1 { - break; - } - val = (val << 4) | tmp; - nc = scan(); - n++; - if n == 10 { - error("hex constant too large"); - } - } - } else { // decimal - while lextab[nc] == tNUM { - var tmp u32 = (val * 10) + (nc - '0'); - if tmp <= val { - error("decimal constant too large"); - } - val = tmp; - nc = scan(); - n++; - } - } - ctx.num = val; - return tNUM; -} - -fn scan_ident(cc u32, nc u32) Token { - ctx.tmp[0] = cc; - var n u32 = 1; - - while true { - var tok Token = lextab[nc]; - if (tok == tIDN) || (tok == tNUM) { - ctx.tmp[n] = nc; - n++; - if (n == 32) { error("identifier too large"); } - nc = scan(); - } else { - break; - } - } - return scan_keyword(n); -} - -fn _next() Token { - var nc u8 = ctx.cc; - while true { - var cc u8 = nc; - nc = scan(); - var tok Token = lextab[cc]; - if tok == tNUM { // 0..9 - return scan_number(cc, nc); - } else if tok == tIDN { // _ A..Z a..z - return scan_ident(cc, nc); - } else if tok == tDQT { // " - return scan_string(cc, nc); - } else if tok == tSQT { // ' - ctx.num = nc; - if nc == '\\' { - ctx.num = unescape(scan()); - } - nc = scan(); - if nc != '\'' { - error("unterminated character constant"); - } - nc = scan(); - return tNUM; - } else if tok == tPLUS { - if nc == '+' { tok = tINC; nc = scan(); } - } else if tok == tMINUS { - if nc == '-' { tok = tDEC; nc = scan(); } - } else if tok == tAMP { - if nc == '&' { tok = tAND; nc = scan(); } - } else if tok == tPIPE { - if nc == '|' { tok = tOR; nc = scan(); } - } else if tok == tGT { - if nc == '=' { tok = tGE; nc = scan(); } - else if nc == '>' { tok = tRIGHT; nc = scan(); } - } else if tok == tLT { - if nc == '=' { tok = tLE; nc = scan(); } - else if nc == '<' { tok = tLEFT; nc = scan(); } - } else if tok == tASSIGN { - if nc == '=' { tok = tEQ; nc = scan(); } - } else if tok == tBANG { - if nc == '=' { tok = tNE; nc = scan(); } - } else if tok == tSLASH { - if nc == '/' { - // comment -- consume until EOL or EOF - while (nc != '\n') && (nc != 0) { - nc = scan(); - } - continue; - } - } else if tok == tEOL { - ctx.linenumber++; - ctx.lineoffset = ctx.byteoffset; - //if ctx.flags & cfVisibleEOL { - // return tEOL; - //} - continue; - } else if tok == tSPC { - continue; - } else if (tok == tMSC) || (tok == tINV) { - error("unknown character 0x%02x", cc); - } - - // if we're an AddOp or MulOp, followed by an '=' - if ((tok & 0xF0) == 0x20) && (nc == '=') { - nc = scan(); - // transform us to a XEQ operation - tok = tok + 0x10; - } - - return tok; - } -} - -fn printstr(fd i32, s str) { - var n u32 = 0; - writec(fd, '"'); - while true { - var ch u32 = s[n]; - if ch == 0 { - break; - } else if (ch < ' ') || (ch > '~') { - writex(fd, ch); - } else if (ch == '"') || (ch == '\\') { - writec(fd, '\\'); - writec(fd, ch); - } else { - writec(fd, ch); - } - n++; - } - writec(fd, '"'); -} - -fn token_print(fd i32) { - if ctx.tok == tNUM { - writec(fd, '#'); - writex(fd, ctx.num); - } else if ctx.tok == tIDN { - writec(fd, '@'); - writes(fd, ctx.tmp); - } else if ctx.tok == tEOL { - writec(fd, '\n'); - } else if ctx.tok == tSTR { - printstr(fd, ctx.tmp); - } else { - writes(fd, tnames[ctx.tok]); - } - writec(fd, ' '); -} - -fn next() Token { - ctx.tok = _next(); - return ctx.tok; -} - -// ================================================================ -// parser - -fn expected(what str) { - error("expected ", what, ", found ", @str tnames[ctx.tok]); -} - -fn expect(tok Token) { - if ctx.tok != tok { - expected(tnames[tok]); - } -} - -fn require(tok Token) { - expect(tok); - next(); -} - -fn parse_name(what str) String { - if ctx.tok != tIDN { - expected(what); - } - var s String = ctx.ident; - next(); - return s; -} - -fn parse_symbol(what str) Ast { - var name String = parse_name(what); - var sym Symbol = symbol_find(name); - return ast_make_symbol(name, sym); -} - -fn parse_ident() Ast { - var node Ast = parse_symbol("identifier"); - - if (node.sym == nil) && (ctx.tok != tOPAREN) { - error("undefined identifier '", @str node.name.text, "'"); - } - - if ctx.tok == tOPAREN { - // function call - next(); - node = ast_make_l(AST_CALL, node); - var last Ast = nil; - - while ctx.tok != tCPAREN { - if ctx.tok == tAT { - // type annotation for varargs hack - next(); - parse_type(false); - } - - var expr Ast = parse_expr(); - if last != nil { - last.next = expr; - } else { - node.right = expr; - } - last = expr; - - if ctx.tok != tCPAREN { - require(tCOMMA); - } - } - next(); - } - - while true { - if ctx.tok == tDOT { - // field access - next(); - node = ast_make_lr(AST_FIELD, node, parse_symbol("field name")); - } else if ctx.tok == tOBRACK { - // array access - next(); - node = ast_make_lr(AST_INDEX, node, parse_expr()); - require(tCBRACK); - } else { - return node; - } - } - - return nil; // unreachable -} - -fn parse_primary_expr() Ast { - var node Ast; - if ctx.tok == tNUM { - node = ast_make_const(ctx.num, ctx.type_i32); - next(); - } else if ctx.tok == tSTR { - node = ast_make_simple(AST_STRING, 0); - node.name = string_make(ctx.tmp, strlen(ctx.tmp)); - next(); - } else if ctx.tok == tTRUE { - node = ast_make_const(1, ctx.type_bool); - next(); - } else if ctx.tok == tFALSE { - node = ast_make_const(0, ctx.type_bool); - next(); - } else if ctx.tok == tNIL { - node = ast_make_const(0, ctx.type_nil); - next(); - } else if ctx.tok == tOPAREN { - next(); - node = parse_expr(); - require(tCPAREN); - } else if ctx.tok == tNEW { - next(); - require(tOPAREN); - node = ast_make_simple(AST_NEW, 0); - node.name = parse_name("type name"); - require(tCPAREN); - } else if ctx.tok == tIDN { - node = parse_ident(); - } else { - error("invalid expression"); - } - return node; -} - -fn parse_unary_expr() Ast { - var op u32 = ctx.tok; - if op == tPLUS { - next(); - return parse_unary_expr(); - } else if op == tMINUS { - next(); - return ast_make_l(AST_NEG, parse_unary_expr()); - } else if op == tBANG { - next(); - return ast_make_l(AST_BOOL_NOT, parse_unary_expr()); - } else if op == tNOT { - next(); - return ast_make_l(AST_NOT, parse_unary_expr()); - } else if op == tAMP { - error("dereference not supported"); - //next(); - //parse_unary_expr(); - } else { - return parse_primary_expr(); - } - return nil; // unreachable -} - -fn parse_mul_expr() Ast { - var node Ast = parse_unary_expr(); - while ctx.tok & tcMASK == tcMULOP { - var op u32 = (ctx.tok - tSTAR) + AST_MUL; - next(); - node = ast_make_lr(op, node, parse_unary_expr()); - } - return node; -} - -fn parse_add_expr() Ast { - var node Ast = parse_mul_expr(); - while ctx.tok & tcMASK == tcADDOP { - var op u32 = (ctx.tok - tPLUS) + AST_ADD; - next(); - node = ast_make_lr(op, node, parse_mul_expr()); - } - return node; -} - -fn parse_rel_expr() Ast { - var node Ast = parse_add_expr(); - if ctx.tok & tcMASK == tcRELOP { - var op u32 = (ctx.tok - tEQ) + AST_EQ; - next(); - node = ast_make_lr(op, node, parse_add_expr()); - } - return node; -} - -fn parse_and_expr() Ast { - var node Ast = parse_rel_expr(); - if ctx.tok == tAND { - while ctx.tok == tAND { - next(); - node = ast_make_lr(AST_BOOL_AND, node, parse_rel_expr()); - } - } - return node; -} - -fn parse_expr() Ast { - var node Ast = parse_and_expr(); - if ctx.tok == tOR { - while ctx.tok == tOR { - next(); - node = ast_make_lr(AST_BOOL_OR, node, parse_and_expr()); - } - } - return node; -} - -fn parse_struct_type(name String) Type { - var type Type = type_find(name); - - if type == nil { - type = type_make(name, TYPE_STRUCT, nil, nil, 0); - } else { - if type.kind == TYPE_UNDEFINED { - // resolve forward ref - type.kind = TYPE_STRUCT; - } else { - error("cannot redefine struct '", @str name.text, "'"); - } - }; - scope_push(SCOPE_STRUCT); - require(tOBRACE); - while true { - if ctx.tok == tCBRACE { - next(); - break; - } - var fname String = parse_name("field name"); - var kind SymbolKind = SYMBOL_FLD; - if ctx.tok == tSTAR { - next(); - kind = SYMBOL_PTR; - } - var ftype Type = parse_type(true); - var sym Symbol = symbol_make(fname, ftype); - sym.kind = kind; - if ctx.tok != tCBRACE { - require(tCOMMA); - } - } - type.list = scope_pop(); - return type; -} - - -fn parse_array_type() Type { - var type Type; - var nelem u32 = 0; - if ctx.tok == tCBRACK { - // TODO: slices - next(); - type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, 0); - } else { - if ctx.tok != tNUM { - error("array size must be numeric"); - } - nelem = ctx.num; - next(); - require(tCBRACK); - type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, nelem); - } - // TODO: type.name? - return type; -} - -fn parse_type(fwd_ref_ok u32) Type { - if ctx.tok == tSTAR { // pointer-to - error("pointer types not supported"); - //next(); - //return type_make(nil, TYPE_POINTER, parse_type(true), nil, 0); - } else if ctx.tok == tOBRACK { // array-of - next(); - return parse_array_type(); - } else if ctx.tok == tFN { - error("func types not supported"); - //next(); - //return parse_func_type(); - } else if ctx.tok == tSTRUCT { - error ("anonymous struct types not supported"); - //next(); - //return parse_struct_type(nil); - } else if ctx.tok == tIDN { - var name String = ctx.ident; - next(); - var type Type = type_find(name); - if type == nil { - if fwd_ref_ok { - type = type_make(name, TYPE_UNDEFINED, nil, nil, 0); - } else { - error("undefined type '", @str name.text, "' not usable here"); - } - } - return type; - } else { - expected("type"); - } - return nil; -} - -fn parse_while() Ast { - // while expr { block } - var expr Ast = parse_expr(); - require(tOBRACE); - scope_push(SCOPE_LOOP); - var block Ast = parse_block(); - scope_pop(); - return ast_make_lr(AST_WHILE, expr, block); -} - -fn parse_if() Ast { - // if expr { block } - - var expr Ast = parse_expr(); - require(tOBRACE); - scope_push(SCOPE_BLOCK); - var block Ast = parse_block(); - scope_pop(); - - var last Ast = ast_make_lr(AST_CASE, expr, block); - var stmt Ast = ast_make_l(AST_IF, last); - - while ctx.tok == tELSE { - // ... else ... - next(); - if ctx.tok == tIF { - // ... if expr { block } - next(); - expr = parse_expr(); - require(tOBRACE); - scope_push(SCOPE_BLOCK); - block = parse_block(); - scope_pop(); - last.next = ast_make_lr(AST_CASE, expr, block); - last = last.next; - } else { - // ... { block } - require(tOBRACE); - scope_push(SCOPE_BLOCK); - block = parse_block(); - scope_pop(); - last.next = ast_make_l(AST_ELSE, block); - break; - } - } - return stmt; -} - -fn parse_return() Ast { - // TODO check for return required/type - var node Ast = ast_make_simple(AST_RETURN, 0); - if ctx.tok == tSEMI { - next(); - } else { - // error("return types do not match"); - node.left = parse_expr(); - require(tSEMI); - } - return node; -} - -fn parse_break() Ast { - // TODO: break-to-labeled-loop support - var scope Scope = scope_find(SCOPE_LOOP); - if scope == nil { - error("break must be used from inside a looping construct"); - } - require(tSEMI); - return ast_make_simple(AST_BREAK, 0); -} - -fn parse_continue() Ast { - // TODO: continue-to-labeled-loop support - var scope Scope = scope_find(SCOPE_LOOP); - if scope == nil { - error("continue must be used from inside a looping construct"); - } - require(tSEMI); - return ast_make_simple(AST_CONTINUE, 0); -} - -fn parse_struct_init(sym Symbol) { - while true { - if ctx.tok == tCBRACE { - next(); - break; - } - var name String = parse_name("field name"); - var field Symbol = sym.type.list; - while true { // TODO: field_find - if field == nil { - error("structure has no '", @str name.text, "' field"); - } - if field.name == name { - break; - } - field = field.next; - } - require(tCOLON); - if ctx.tok == tOBRACE { - next(); - parse_struct_init(field); - } else { - parse_expr(); - } - if ctx.tok != tCBRACE { - require(tCOMMA); - } - } -} - -fn parse_array_init(sym Symbol) { - while true { - if ctx.tok == tCBRACE { - next(); - break; - } - parse_expr(); - if ctx.tok != tCBRACE { - require(tCOMMA); - } - } -} - -fn parse_var() { - // TODO: global vs local - // TODO: generate assignment/initialization - var name String = parse_name("variable name"); - var type Type = parse_type(false); - var sym Symbol = symbol_make(name, type); - - if ctx.tok == tASSIGN { - next(); - if ctx.tok == tOBRACE { - next(); - if type.kind == TYPE_STRUCT { - parse_struct_init(sym); - } else if type.kind == TYPE_ARRAY { - parse_array_init(sym); - } else { - error("type ", @str type.name.text, - " cannot be initialized with {} expr"); - } - } else { - parse_expr(); - } - } else { - // default init - } - require(tSEMI); -} - -fn _parse_expr_statement() Ast { - var node Ast = parse_expr(); - var expr Ast = nil; - var op u32; - - if ctx.tok == tASSIGN { - // basic assignment - next(); - return ast_make_lr(AST_ASSIGN, node, parse_expr()); - } else if (ctx.tok & tcMASK) == tcAEQOP { - // +=, etc - op = (ctx.tok - tADDEQ) + AST_ADD; - next(); - expr = parse_expr(); - } else if (ctx.tok & tcMASK) == tcMEQOP { - // *=, etc - op = (ctx.tok - tMULEQ) + AST_MUL; - next(); - expr = parse_expr(); - } else if (ctx.tok == tINC) { - op = AST_ADD; - next(); - expr = ast_make_const(1, ctx.type_i32); - } else if (ctx.tok == tDEC) { - op = AST_SUB; - expr = ast_make_const(1, ctx.type_i32); - } else { - // simple expression - return node; - } - - // TODO duplicate node instead of sharing it - expr = ast_make_lr(op, node, expr); - return ast_make_lr(AST_ASSIGN, node, expr); -} - -fn parse_expr_statement() Ast { - var stmt Ast = ast_make_l(AST_EXPR, _parse_expr_statement()); - require(tSEMI); - return stmt; -} - -fn parse_block() Ast { - var block Ast = ast_make_simple(AST_BLOCK, 0); - var last Ast = nil; - var node Ast; - while true { - if ctx.tok == tCBRACE { - next(); - break; - } else if ctx.tok == tRETURN { - next(); - node = parse_return(); - } else if ctx.tok == tBREAK { - next(); - node = parse_break(); - } else if ctx.tok == tCONTINUE { - next(); - node = parse_continue(); - } else if ctx.tok == tWHILE { - next(); - node = parse_while(); - } else if ctx.tok == tIF { - next(); - node = parse_if(); - } else if ctx.tok == tVAR { - next(); - parse_var(); - } else if ctx.tok == tSEMI { - next(); - // empty statement - continue; - } else { - node = parse_expr_statement(); - } - if last == nil { - block.left = node; - } else { - last.next = node; - } - last = node; - } - return block; -} - -fn parse_param(fname String) Symbol { - var pname String = parse_name("parameter name"); - var ptype Type = parse_type(false); - if symbol_find(pname) != nil { - error("duplicate parameter name '", @str pname.text, "'"); - } - return symbol_make(pname, ptype); -} - -fn parse_fn_body(sym Symbol) Ast { - ctx.cur_fn = sym; - require(tOBRACE); - scope_push(SCOPE_FUNC); // parameters, from fn - scope_push(SCOPE_BLOCK); - var node Ast = parse_block(); - scope_pop(); // stow in node? - scope_pop(); - ctx.cur_fn = nil; - return node; -} - -fn parse_fn() Ast { - var fname String = parse_name("function name"); - var rtype Type = ctx.type_void; - - scope_push(SCOPE_FUNC); - - require(tOPAREN); - var n u32 = 0; - if ctx.tok != tCPAREN { - parse_param(fname); - while ctx.tok == tCOMMA { - next(); - parse_param(fname); - } - n++; - } - require(tCPAREN); - - if ctx.tok != tOBRACE { - rtype = parse_type(false); - } - - var sym Symbol = symbol_make_global(fname, rtype); - sym.kind = SYMBOL_FN; - sym.type = type_make(fname, TYPE_FN, rtype, nil, n); - - var node Ast = ast_make_simple(AST_FUNC, 0); - node.name = fname; - node.sym = sym; - node.right = parse_fn_body(sym); - - // save parameters - sym.type.list = scope_pop(); - - return node; -} - -fn parse_enum_def() { - if ctx.tok == tIDN { - var name String = parse_name("enum name"); - type_make(name, TYPE_ENUM, nil, nil, 0); - } - - require(tOBRACE); - var val u32 = 0; - while ctx.tok != tCBRACE { - var name String = parse_name("enum tag name"); - var sym Symbol = symbol_find(name); - if sym != nil { - error("cannot redfine '", @str name.text, "' as enum tag"); - } - sym = symbol_make_global(name, ctx.type_u32); - sym.kind = SYMBOL_DEF; - if ctx.tok == tASSIGN { - next(); - parse_expr(); - // TODO val <- expr - } else { - val++; - } - require(tCOMMA); - } - require(tCBRACE); - require(tSEMI); -} - -fn parse_program() Ast { - var program Ast = ast_make_simple(AST_PROGRAM, 0); - var last Ast = nil; - - while true { - if ctx.tok == tENUM { - next(); - parse_enum_def(); - } else if ctx.tok == tSTRUCT { - next(); - var name String = parse_name("struct name"); - parse_struct_type(name); - require(tSEMI); - } else if ctx.tok == tFN { - next(); - var node Ast = parse_fn(); - if last == nil { - program.left = node; - } else { - last.next = node; - } - last = node; - } else if ctx.tok == tVAR { - next(); - parse_var(); - } else if ctx.tok == tEOF { - break; - } else { - expected("function, variable, or type definition"); - } - } - return program; -} - -var indent u32 = 0; - -fn print_indent(fd i32) { - var n u32 = 0; - while n < indent { - writes(fd, " "); - n++; - } -} - -fn _dump_ast_node(fd i32, node Ast) { - var kind AstKind = node.kind; - var child Ast = nil; - - print_indent(fd); - writes(fd, ast_kind[kind]); - writes(fd, " "); - indent++; - if kind == AST_CONST { - writex(fd, node.ival); - } else if kind == AST_FUNC { - writes(fd, node.name.text); - writes(fd, "("); - var param Symbol = node.sym.type.list; - while param != nil { - writes(fd, param.name.text); - if param.next != nil { - writes(fd, ", "); - } - param = param.next; - } - writes(fd, ") "); - writes(fd, node.sym.type.of.name.text); - } else if kind == AST_STRING { - printstr(fd, node.name.text); - } else if kind == AST_SYMBOL { - writes(fd, node.name.text); - } - writes(fd, "\n"); - - dump_ast_node(fd, node.left); - dump_ast_node(fd, node.right); - indent = indent - 1; -} - -fn dump_ast_node(fd i32, node Ast) { - if node != nil { - while true { - _dump_ast_node(fd, node); - node = node.next; - if node == nil { - break; - } - } - } -} - -fn dump_ast(node Ast) { - dump_ast_node(1, node); -} - -fn start() i32 { - ctx_init(); - scan(); - - next(); - var ast Ast = parse_program(); - - dump_ast(ast); - - //while(next() != tEOF) { - // token_print(1); - //} - //writec(1, '\n'); - return 0; -} - diff --git a/compiler/lexer.spl b/compiler/lexer.spl @@ -0,0 +1,349 @@ +// Copyright 2023, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +fn error_begin() i32 { + writes(2, "\n"); + writes(2, ctx.filename); + writes(2, ":"); + writei(2, ctx.linenumber); + writes(2, ": error: "); + return 2; +} + +fn error_end() { + writes(2, "\n"); + os_exit(1); +} + +// ================================================================ +// lexical scanner + +// currently unrecognized: # $ ? \ ` +var lextab [256]u8 = { + tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, +// ! " # $ % & ' + tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, +// ( ) * + , - . / + tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, +// 0 1 2 3 4 5 6 7 + tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, +// 8 9 : ; < = > ? + tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, +// @ A B C D E F G + tAT, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, +// H I J K L M N O + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, +// P Q R S T U V W + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, +// X Y Z [ \ ] ^ _ + tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, +// ` a b c d e f g + tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, +// h i j k l m n o + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, +// p q r s t u v w + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, +// x y z { | } ~ + tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, +}; + +fn unhex(ch u32) i32 { + if (ch >= '0') && (ch <= '9') { + return ch - '0'; + } + if (ch >= 'a') && (ch <= 'f') { + return ch - 'a' + 10; + } + if (ch >= 'A') && (ch <= 'F') { + return ch - 'A' + 10; + } + return -1; +} + +fn scan() Token { + ctx.byteoffset++; + var ch i32 = readc(0); + if ch < 0 { + ctx.cc = 0; + } else { + ctx.cc = ch; + } + return ctx.cc; +} + +fn unescape(n u32) u32 { + if n == 'n' { + return 10; + } else if n == 'r' { + return 13; + } else if n == 't' { + return 9; + } else if n == '"' { + return '"'; + } else if n == '\'' { + return '\''; + } else if n == '\\' { + return '\\'; + } else if n == 'x' { + var x0 u32 = unhex(scan()); + var x1 u32 = unhex(scan()); + if (x0 < 0) || (x1 < 0) { + error("invalid hex escape"); + } + return (x0 << 4) | x1; + } else { + error("invalid escape ", n); + return 0; + } +} + +fn scan_string(cc u32, nc u32) Token { + var n u32 = 0; + while true { + if nc == '"' { + nc = scan(); + break; + } else if nc == 0 { + error("unterminated string"); + } else if nc == '\\' { + ctx.tmp[n] = unescape(scan()); + } else { + ctx.tmp[n] = nc; + } + nc = scan(); + n++; + if n == 255 { + error("constant string too large"); + } + } + ctx.tmp[n] = 0; + return tSTR; +} + +fn scan_keyword(len u32) Token { + ctx.tmp[len] = 0; + var idn String = string_make(ctx.tmp, len); + ctx.ident = idn; + + if len == 2 { + if idn == ctx.idn_if { return tIF; }; + if idn == ctx.idn_fn { return tFN; } + } else if len == 3 { + if idn == ctx.idn_for { return tFOR; } + if idn == ctx.idn_var { return tVAR; } + if idn == ctx.idn_nil { return tNIL; } + if idn == ctx.idn_new { return tNEW; } + } else if len == 4 { + if idn == ctx.idn_case { return tCASE; } + if idn == ctx.idn_else { return tELSE; } + if idn == ctx.idn_enum { return tENUM; } + if idn == ctx.idn_true { return tTRUE; } + } else if len == 5 { + if idn == ctx.idn_break { return tBREAK; } + if idn == ctx.idn_while { return tWHILE; } + if idn == ctx.idn_false { return tFALSE; } + } else if len == 6 { + if idn == ctx.idn_switch { return tSWITCH; } + if idn == ctx.idn_struct { return tSTRUCT; } + if idn == ctx.idn_return { return tRETURN; } + } else if len == 8 { + if idn == ctx.idn_continue { return tCONTINUE; } + } + return tIDN; +} + +fn scan_number(cc u32, nc u32) Token { + var n u32 = 1; + var val u32 = cc - '0'; + + if (cc == '0') && (nc == 'b') { // binary + nc = scan(); + while (nc == '0') || (nc == '1') { + val = (val << 1) | (nc - '0'); + nc = scan(); + n++; + if (n == 34) { + error("binary constant too large"); + } + } + } else if (cc == '0') && (nc == 'x') { // hex + nc = scan(); + while true { + var tmp i32 = unhex(nc); + if tmp == -1 { + break; + } + val = (val << 4) | tmp; + nc = scan(); + n++; + if n == 10 { + error("hex constant too large"); + } + } + } else { // decimal + while lextab[nc] == tNUM { + var tmp u32 = (val * 10) + (nc - '0'); + if tmp <= val { + error("decimal constant too large"); + } + val = tmp; + nc = scan(); + n++; + } + } + ctx.num = val; + return tNUM; +} + +fn scan_ident(cc u32, nc u32) Token { + ctx.tmp[0] = cc; + var n u32 = 1; + + while true { + var tok Token = lextab[nc]; + if (tok == tIDN) || (tok == tNUM) { + ctx.tmp[n] = nc; + n++; + if (n == 32) { error("identifier too large"); } + nc = scan(); + } else { + break; + } + } + return scan_keyword(n); +} + +fn _next() Token { + var nc u8 = ctx.cc; + while true { + var cc u8 = nc; + nc = scan(); + var tok Token = lextab[cc]; + if tok == tNUM { // 0..9 + return scan_number(cc, nc); + } else if tok == tIDN { // _ A..Z a..z + return scan_ident(cc, nc); + } else if tok == tDQT { // " + return scan_string(cc, nc); + } else if tok == tSQT { // ' + ctx.num = nc; + if nc == '\\' { + ctx.num = unescape(scan()); + } + nc = scan(); + if nc != '\'' { + error("unterminated character constant"); + } + nc = scan(); + return tNUM; + } else if tok == tPLUS { + if nc == '+' { tok = tINC; nc = scan(); } + } else if tok == tMINUS { + if nc == '-' { tok = tDEC; nc = scan(); } + } else if tok == tAMP { + if nc == '&' { tok = tAND; nc = scan(); } + } else if tok == tPIPE { + if nc == '|' { tok = tOR; nc = scan(); } + } else if tok == tGT { + if nc == '=' { tok = tGE; nc = scan(); } + else if nc == '>' { tok = tRIGHT; nc = scan(); } + } else if tok == tLT { + if nc == '=' { tok = tLE; nc = scan(); } + else if nc == '<' { tok = tLEFT; nc = scan(); } + } else if tok == tASSIGN { + if nc == '=' { tok = tEQ; nc = scan(); } + } else if tok == tBANG { + if nc == '=' { tok = tNE; nc = scan(); } + } else if tok == tSLASH { + if nc == '/' { + // comment -- consume until EOL or EOF + while (nc != '\n') && (nc != 0) { + nc = scan(); + } + continue; + } + } else if tok == tEOL { + ctx.linenumber++; + ctx.lineoffset = ctx.byteoffset; + //if ctx.flags & cfVisibleEOL { + // return tEOL; + //} + continue; + } else if tok == tSPC { + continue; + } else if (tok == tMSC) || (tok == tINV) { + error("unknown character 0x%02x", cc); + } + + // if we're an AddOp or MulOp, followed by an '=' + if ((tok & 0xF0) == 0x20) && (nc == '=') { + nc = scan(); + // transform us to a XEQ operation + tok = tok + 0x10; + } + + return tok; + } +} + +fn printstr(fd i32, s str) { + var n u32 = 0; + writec(fd, '"'); + while true { + var ch u32 = s[n]; + if ch == 0 { + break; + } else if (ch < ' ') || (ch > '~') { + writex(fd, ch); + } else if (ch == '"') || (ch == '\\') { + writec(fd, '\\'); + writec(fd, ch); + } else { + writec(fd, ch); + } + n++; + } + writec(fd, '"'); +} + +fn token_print(fd i32) { + if ctx.tok == tNUM { + writec(fd, '#'); + writex(fd, ctx.num); + } else if ctx.tok == tIDN { + writec(fd, '@'); + writes(fd, ctx.tmp); + } else if ctx.tok == tEOL { + writec(fd, '\n'); + } else if ctx.tok == tSTR { + printstr(fd, ctx.tmp); + } else { + writes(fd, tnames[ctx.tok]); + } + writec(fd, ' '); +} + +fn next() Token { + ctx.tok = _next(); + return ctx.tok; +} + diff --git a/compiler/main.spl b/compiler/main.spl @@ -0,0 +1,75 @@ +// Copyright 2023, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +var indent u32 = 0; + +fn print_indent(fd i32) { + var n u32 = 0; + while n < indent { + writes(fd, " "); + n++; + } +} + +fn _dump_ast_node(fd i32, node Ast) { + var kind AstKind = node.kind; + + print_indent(fd); + writes(fd, ast_kind[kind]); + writes(fd, " "); + indent++; + if kind == AST_CONST { + writex(fd, node.ival); + } else if kind == AST_FUNC { + writes(fd, node.name.text); + writes(fd, "("); + var param Symbol = node.sym.type.list; + while param != nil { + writes(fd, param.name.text); + if param.next != nil { + writes(fd, ", "); + } + param = param.next; + } + writes(fd, ") "); + writes(fd, node.sym.type.of.name.text); + } else if kind == AST_STRING { + printstr(fd, node.name.text); + } else if kind == AST_SYMBOL { + writes(fd, node.name.text); + } + writes(fd, "\n"); + + dump_ast_node(fd, node.left); + dump_ast_node(fd, node.right); + indent = indent - 1; +} + +fn dump_ast_node(fd i32, node Ast) { + if node != nil { + while true { + _dump_ast_node(fd, node); + node = node.next; + if node == nil { + break; + } + } + } +} + +fn dump_ast(node Ast) { + dump_ast_node(1, node); +} + +fn start() i32 { + ctx_init(); + scan(); + + next(); + var ast Ast = parse_program(); + + dump_ast(ast); + + return 0; +} + diff --git a/compiler/parser.spl b/compiler/parser.spl @@ -0,0 +1,651 @@ +// Copyright 2023, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +// ================================================================ +// parser + +fn expected(what str) { + error("expected ", what, ", found ", @str tnames[ctx.tok]); +} + +fn expect(tok Token) { + if ctx.tok != tok { + expected(tnames[tok]); + } +} + +fn require(tok Token) { + expect(tok); + next(); +} + +fn parse_name(what str) String { + if ctx.tok != tIDN { + expected(what); + } + var s String = ctx.ident; + next(); + return s; +} + +fn parse_symbol(what str) Ast { + var name String = parse_name(what); + var sym Symbol = symbol_find(name); + return ast_make_symbol(name, sym); +} + +fn parse_ident() Ast { + var node Ast = parse_symbol("identifier"); + + if (node.sym == nil) && (ctx.tok != tOPAREN) { + error("undefined identifier '", @str node.name.text, "'"); + } + + if ctx.tok == tOPAREN { + // function call + next(); + node = ast_make_l(AST_CALL, node); + var last Ast = nil; + + while ctx.tok != tCPAREN { + if ctx.tok == tAT { + // type annotation for varargs hack + next(); + parse_type(false); + } + + var expr Ast = parse_expr(); + if last != nil { + last.next = expr; + } else { + node.right = expr; + } + last = expr; + + if ctx.tok != tCPAREN { + require(tCOMMA); + } + } + next(); + } + + while true { + if ctx.tok == tDOT { + // field access + next(); + node = ast_make_lr(AST_FIELD, node, parse_symbol("field name")); + } else if ctx.tok == tOBRACK { + // array access + next(); + node = ast_make_lr(AST_INDEX, node, parse_expr()); + require(tCBRACK); + } else { + return node; + } + } + + return nil; // unreachable +} + +fn parse_primary_expr() Ast { + var node Ast; + if ctx.tok == tNUM { + node = ast_make_const(ctx.num, ctx.type_i32); + next(); + } else if ctx.tok == tSTR { + node = ast_make_simple(AST_STRING, 0); + node.name = string_make(ctx.tmp, strlen(ctx.tmp)); + next(); + } else if ctx.tok == tTRUE { + node = ast_make_const(1, ctx.type_bool); + next(); + } else if ctx.tok == tFALSE { + node = ast_make_const(0, ctx.type_bool); + next(); + } else if ctx.tok == tNIL { + node = ast_make_const(0, ctx.type_nil); + next(); + } else if ctx.tok == tOPAREN { + next(); + node = parse_expr(); + require(tCPAREN); + } else if ctx.tok == tNEW { + next(); + require(tOPAREN); + node = ast_make_simple(AST_NEW, 0); + node.name = parse_name("type name"); + require(tCPAREN); + } else if ctx.tok == tIDN { + node = parse_ident(); + } else { + error("invalid expression"); + } + return node; +} + +fn parse_unary_expr() Ast { + var op u32 = ctx.tok; + if op == tPLUS { + next(); + return parse_unary_expr(); + } else if op == tMINUS { + next(); + return ast_make_l(AST_NEG, parse_unary_expr()); + } else if op == tBANG { + next(); + return ast_make_l(AST_BOOL_NOT, parse_unary_expr()); + } else if op == tNOT { + next(); + return ast_make_l(AST_NOT, parse_unary_expr()); + } else if op == tAMP { + error("dereference not supported"); + //next(); + //parse_unary_expr(); + } else { + return parse_primary_expr(); + } + return nil; // unreachable +} + +fn parse_mul_expr() Ast { + var node Ast = parse_unary_expr(); + while ctx.tok & tcMASK == tcMULOP { + var op u32 = (ctx.tok - tSTAR) + AST_MUL; + next(); + node = ast_make_lr(op, node, parse_unary_expr()); + } + return node; +} + +fn parse_add_expr() Ast { + var node Ast = parse_mul_expr(); + while ctx.tok & tcMASK == tcADDOP { + var op u32 = (ctx.tok - tPLUS) + AST_ADD; + next(); + node = ast_make_lr(op, node, parse_mul_expr()); + } + return node; +} + +fn parse_rel_expr() Ast { + var node Ast = parse_add_expr(); + if ctx.tok & tcMASK == tcRELOP { + var op u32 = (ctx.tok - tEQ) + AST_EQ; + next(); + node = ast_make_lr(op, node, parse_add_expr()); + } + return node; +} + +fn parse_and_expr() Ast { + var node Ast = parse_rel_expr(); + if ctx.tok == tAND { + while ctx.tok == tAND { + next(); + node = ast_make_lr(AST_BOOL_AND, node, parse_rel_expr()); + } + } + return node; +} + +fn parse_expr() Ast { + var node Ast = parse_and_expr(); + if ctx.tok == tOR { + while ctx.tok == tOR { + next(); + node = ast_make_lr(AST_BOOL_OR, node, parse_and_expr()); + } + } + return node; +} + +fn parse_struct_type(name String) Type { + var type Type = type_find(name); + + if type == nil { + type = type_make(name, TYPE_STRUCT, nil, nil, 0); + } else { + if type.kind == TYPE_UNDEFINED { + // resolve forward ref + type.kind = TYPE_STRUCT; + } else { + error("cannot redefine struct '", @str name.text, "'"); + } + }; + scope_push(SCOPE_STRUCT); + require(tOBRACE); + while true { + if ctx.tok == tCBRACE { + next(); + break; + } + var fname String = parse_name("field name"); + var kind SymbolKind = SYMBOL_FLD; + if ctx.tok == tSTAR { + next(); + kind = SYMBOL_PTR; + } + var ftype Type = parse_type(true); + var sym Symbol = symbol_make(fname, ftype); + sym.kind = kind; + if ctx.tok != tCBRACE { + require(tCOMMA); + } + } + type.list = scope_pop(); + return type; +} + + +fn parse_array_type() Type { + var type Type; + var nelem u32 = 0; + if ctx.tok == tCBRACK { + // TODO: slices + next(); + type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, 0); + } else { + if ctx.tok != tNUM { + error("array size must be numeric"); + } + nelem = ctx.num; + next(); + require(tCBRACK); + type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, nelem); + } + // TODO: type.name? + return type; +} + +fn parse_type(fwd_ref_ok u32) Type { + if ctx.tok == tSTAR { // pointer-to + error("pointer types not supported"); + //next(); + //return type_make(nil, TYPE_POINTER, parse_type(true), nil, 0); + } else if ctx.tok == tOBRACK { // array-of + next(); + return parse_array_type(); + } else if ctx.tok == tFN { + error("func types not supported"); + //next(); + //return parse_func_type(); + } else if ctx.tok == tSTRUCT { + error ("anonymous struct types not supported"); + //next(); + //return parse_struct_type(nil); + } else if ctx.tok == tIDN { + var name String = ctx.ident; + next(); + var type Type = type_find(name); + if type == nil { + if fwd_ref_ok { + type = type_make(name, TYPE_UNDEFINED, nil, nil, 0); + } else { + error("undefined type '", @str name.text, "' not usable here"); + } + } + return type; + } else { + expected("type"); + } + return nil; +} + +fn parse_while() Ast { + // while expr { block } + var expr Ast = parse_expr(); + require(tOBRACE); + scope_push(SCOPE_LOOP); + var block Ast = parse_block(); + scope_pop(); + return ast_make_lr(AST_WHILE, expr, block); +} + +fn parse_if() Ast { + // if expr { block } + + var expr Ast = parse_expr(); + require(tOBRACE); + scope_push(SCOPE_BLOCK); + var block Ast = parse_block(); + scope_pop(); + + var last Ast = ast_make_lr(AST_CASE, expr, block); + var stmt Ast = ast_make_l(AST_IF, last); + + while ctx.tok == tELSE { + // ... else ... + next(); + if ctx.tok == tIF { + // ... if expr { block } + next(); + expr = parse_expr(); + require(tOBRACE); + scope_push(SCOPE_BLOCK); + block = parse_block(); + scope_pop(); + last.next = ast_make_lr(AST_CASE, expr, block); + last = last.next; + } else { + // ... { block } + require(tOBRACE); + scope_push(SCOPE_BLOCK); + block = parse_block(); + scope_pop(); + last.next = ast_make_l(AST_ELSE, block); + break; + } + } + return stmt; +} + +fn parse_return() Ast { + // TODO check for return required/type + var node Ast = ast_make_simple(AST_RETURN, 0); + if ctx.tok == tSEMI { + next(); + } else { + // error("return types do not match"); + node.left = parse_expr(); + require(tSEMI); + } + return node; +} + +fn parse_break() Ast { + // TODO: break-to-labeled-loop support + var scope Scope = scope_find(SCOPE_LOOP); + if scope == nil { + error("break must be used from inside a looping construct"); + } + require(tSEMI); + return ast_make_simple(AST_BREAK, 0); +} + +fn parse_continue() Ast { + // TODO: continue-to-labeled-loop support + var scope Scope = scope_find(SCOPE_LOOP); + if scope == nil { + error("continue must be used from inside a looping construct"); + } + require(tSEMI); + return ast_make_simple(AST_CONTINUE, 0); +} + +fn parse_struct_init(sym Symbol) { + while true { + if ctx.tok == tCBRACE { + next(); + break; + } + var name String = parse_name("field name"); + var field Symbol = sym.type.list; + while true { // TODO: field_find + if field == nil { + error("structure has no '", @str name.text, "' field"); + } + if field.name == name { + break; + } + field = field.next; + } + require(tCOLON); + if ctx.tok == tOBRACE { + next(); + parse_struct_init(field); + } else { + parse_expr(); + } + if ctx.tok != tCBRACE { + require(tCOMMA); + } + } +} + +fn parse_array_init(sym Symbol) { + while true { + if ctx.tok == tCBRACE { + next(); + break; + } + parse_expr(); + if ctx.tok != tCBRACE { + require(tCOMMA); + } + } +} + +fn parse_var() { + // TODO: global vs local + // TODO: generate assignment/initialization + var name String = parse_name("variable name"); + var type Type = parse_type(false); + var sym Symbol = symbol_make(name, type); + + if ctx.tok == tASSIGN { + next(); + if ctx.tok == tOBRACE { + next(); + if type.kind == TYPE_STRUCT { + parse_struct_init(sym); + } else if type.kind == TYPE_ARRAY { + parse_array_init(sym); + } else { + error("type ", @str type.name.text, + " cannot be initialized with {} expr"); + } + } else { + parse_expr(); + } + } else { + // default init + } + require(tSEMI); +} + +fn _parse_expr_statement() Ast { + var node Ast = parse_expr(); + var expr Ast = nil; + var op u32; + + if ctx.tok == tASSIGN { + // basic assignment + next(); + return ast_make_lr(AST_ASSIGN, node, parse_expr()); + } else if (ctx.tok & tcMASK) == tcAEQOP { + // +=, etc + op = (ctx.tok - tADDEQ) + AST_ADD; + next(); + expr = parse_expr(); + } else if (ctx.tok & tcMASK) == tcMEQOP { + // *=, etc + op = (ctx.tok - tMULEQ) + AST_MUL; + next(); + expr = parse_expr(); + } else if (ctx.tok == tINC) { + op = AST_ADD; + next(); + expr = ast_make_const(1, ctx.type_i32); + } else if (ctx.tok == tDEC) { + op = AST_SUB; + expr = ast_make_const(1, ctx.type_i32); + } else { + // simple expression + return node; + } + + // TODO duplicate node instead of sharing it + expr = ast_make_lr(op, node, expr); + return ast_make_lr(AST_ASSIGN, node, expr); +} + +fn parse_expr_statement() Ast { + var stmt Ast = ast_make_l(AST_EXPR, _parse_expr_statement()); + require(tSEMI); + return stmt; +} + +fn parse_block() Ast { + var block Ast = ast_make_simple(AST_BLOCK, 0); + var last Ast = nil; + var node Ast; + while true { + if ctx.tok == tCBRACE { + next(); + break; + } else if ctx.tok == tRETURN { + next(); + node = parse_return(); + } else if ctx.tok == tBREAK { + next(); + node = parse_break(); + } else if ctx.tok == tCONTINUE { + next(); + node = parse_continue(); + } else if ctx.tok == tWHILE { + next(); + node = parse_while(); + } else if ctx.tok == tIF { + next(); + node = parse_if(); + } else if ctx.tok == tVAR { + next(); + parse_var(); + } else if ctx.tok == tSEMI { + next(); + // empty statement + continue; + } else { + node = parse_expr_statement(); + } + if last == nil { + block.left = node; + } else { + last.next = node; + } + last = node; + } + return block; +} + +fn parse_param(fname String) Symbol { + var pname String = parse_name("parameter name"); + var ptype Type = parse_type(false); + if symbol_find(pname) != nil { + error("duplicate parameter name '", @str pname.text, "'"); + } + return symbol_make(pname, ptype); +} + +fn parse_fn_body(sym Symbol) Ast { + ctx.cur_fn = sym; + require(tOBRACE); + scope_push(SCOPE_FUNC); // parameters, from fn + scope_push(SCOPE_BLOCK); + var node Ast = parse_block(); + scope_pop(); // stow in node? + scope_pop(); + ctx.cur_fn = nil; + return node; +} + +fn parse_fn() Ast { + var fname String = parse_name("function name"); + var rtype Type = ctx.type_void; + + scope_push(SCOPE_FUNC); + + require(tOPAREN); + var n u32 = 0; + if ctx.tok != tCPAREN { + parse_param(fname); + while ctx.tok == tCOMMA { + next(); + parse_param(fname); + } + n++; + } + require(tCPAREN); + + if ctx.tok != tOBRACE { + rtype = parse_type(false); + } + + var sym Symbol = symbol_make_global(fname, rtype); + sym.kind = SYMBOL_FN; + sym.type = type_make(fname, TYPE_FN, rtype, nil, n); + + var node Ast = ast_make_simple(AST_FUNC, 0); + node.name = fname; + node.sym = sym; + node.right = parse_fn_body(sym); + + // save parameters + sym.type.list = scope_pop(); + + return node; +} + +fn parse_enum_def() { + if ctx.tok == tIDN { + var name String = parse_name("enum name"); + type_make(name, TYPE_ENUM, nil, nil, 0); + } + + require(tOBRACE); + var val u32 = 0; + while ctx.tok != tCBRACE { + var name String = parse_name("enum tag name"); + var sym Symbol = symbol_find(name); + if sym != nil { + error("cannot redfine '", @str name.text, "' as enum tag"); + } + sym = symbol_make_global(name, ctx.type_u32); + sym.kind = SYMBOL_DEF; + if ctx.tok == tASSIGN { + next(); + parse_expr(); + // TODO val <- expr + } else { + val++; + } + require(tCOMMA); + } + require(tCBRACE); + require(tSEMI); +} + +fn parse_program() Ast { + var program Ast = ast_make_simple(AST_PROGRAM, 0); + var last Ast = nil; + + while true { + if ctx.tok == tENUM { + next(); + parse_enum_def(); + } else if ctx.tok == tSTRUCT { + next(); + var name String = parse_name("struct name"); + parse_struct_type(name); + require(tSEMI); + } else if ctx.tok == tFN { + next(); + var node Ast = parse_fn(); + if last == nil { + program.left = node; + } else { + last.next = node; + } + last = node; + } else if ctx.tok == tVAR { + next(); + parse_var(); + } else if ctx.tok == tEOF { + break; + } else { + expected("function, variable, or type definition"); + } + } + return program; +} + diff --git a/compiler/stdlib.spl b/compiler/stdlib.spl @@ -0,0 +1,29 @@ +// Copyright 2023, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +fn strneq(s1 str, s2 str, len u32) bool { + var n u32 = 0; + while n < len { + if s1[n] != s2[n] { + return false; + } + n++; + } + return true; +} + +fn strcpyn(dst str, src str, len u32) { + var n u32 = 0; + while n < len { + dst[n] = src[n]; + n++; + } +} + +fn strlen(s str) u32 { + var n u32 = 0; + while s[n] != 0 { + n++; + } + return n; +} diff --git a/compiler/types.spl b/compiler/types.spl @@ -0,0 +1,483 @@ +// Copyright 2023, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +// ================================================================ +// data types + +struct String { + next *String, + len u32, + text [256]u8, +}; + +enum SymbolKind { + SYMBOL_VAR, + SYMBOL_FLD, // struct field + SYMBOL_PTR, // struct *field + SYMBOL_DEF, // enum + SYMBOL_FN, +}; + +struct Symbol { + next *Symbol, + name *String, + type *Type, + kind SymbolKind, +}; + +enum ScopeKind { + SCOPE_GLOBAL, + SCOPE_FUNC, + SCOPE_BLOCK, + SCOPE_LOOP, + SCOPE_STRUCT, +}; + +struct Scope { + parent *Scope, + first *Symbol, + last *Symbol, + kind ScopeKind, +}; + +enum TypeKind { + TYPE_VOID, + TYPE_BOOL, + TYPE_U8, + TYPE_U32, + TYPE_I32, + TYPE_NIL, + TYPE_POINTER, + TYPE_ARRAY, + TYPE_SLICE, + TYPE_STR, + TYPE_STRUCT, + TYPE_FN, + TYPE_ENUM, + TYPE_UNDEFINED, +}; + +struct Type { + next *Type, + name *String, + of *Type, // for slice, array, ptr, fn (return type) + list *Symbol, // for struct (fields), fn (params) + kind TypeKind, + count u32, +}; + +// ================================================================ +// Abstract Syntax Tree + +enum AstKind { +// top node + AST_PROGRAM, // l=FUNC* +// program components (chained into a list by next) + AST_FUNC, // l=BLOCK +// container of statements + AST_BLOCK, // l=STMT* +// statements (chained into a list by next) + AST_EXPR, // l=EXPR + AST_WHILE, // l=EXPR r=BLOCK + AST_BREAK, + AST_CONTINUE, + AST_RETURN, // l=EXPR + AST_IF, // l=CASE* +// sub-parts of if + AST_CASE, // l=EXPR r=BLOCK + AST_ELSE, // l=BLOCK +// expressions + AST_SYMBOL, + AST_CONST, // numeric constant + AST_STRING, // string constant + AST_DEREF, // l=EXPR type: pointer-to-... + AST_INDEX, // l=EXPR type: array-of-... r=EXPR index + AST_FIELD, // l=EXPR type: struct r=SYMBOL field + AST_ADDROF, // l=EXPR type: lvalue + AST_CALL, // l=NAME r=EXPR* + AST_ASSIGN, // l=lhsEXPR r=rhsEXPR + AST_NEW, // l=TYPE +// binary expressions + // Rel Ops (maintain order matched w/ lexer) + AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE, + // Add Ops (maintain order matched w/ lexer) + AST_ADD, AST_SUB, AST_OR, AST_XOR, + // Mul Ops (maintain order matched w/ lexer) + AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR, + // uncategorized ops + AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG, + AST_KIND_COUNT, +}; + +var ast_kind []str = { + "PROGRAM", "FUNC", + "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE", + "RETURN", "IF", "CASE", "ELSE", + "SYMBOL", "CONST", "STRING", + "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW", + "EQ", "NE", "LT", "LE", "GT", "GE", + "ADD", "SUB", "OR", "XOR", + "MUL", "DIV", "MOD", "AND", "LSL", "LSR", + "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG", +}; + +fn ast_is_relop(kind AstKind) bool { + return (kind >= AST_EQ) && (kind <= AST_GE); +} + +fn ast_is_addop(kind AstKind) bool { + return (kind >= AST_ADD) && (kind <= AST_XOR); +} + +fn ast_is_mulop(kind AstKind) bool { + return (kind >= AST_MUL) && (kind <= AST_LSR); +} + +fn ast_is_binop(kind AstKind) bool { + return (kind >= AST_EQ) && (kind <= AST_LSR); +} + +fn ast_is_expr(kind AstKind) bool { + return (kind >= AST_SYMBOL); +} + +fn ast_is_stmt(kind AstKind) bool { + return (kind >= AST_EXPR) && (kind <= AST_ELSE); +} + +struct Ast { + kind AstKind, + + left *Ast, + right *Ast, + next *Ast, // intrusive list + + ival u32, + name *String, + sym *Symbol, + type *Type, + + srcloc u32, // linenumber for now +}; + + +// ================================================================ +// lexical scanner tokens + +// token classes (tok & tcMASK) +enum TokenClass{ + tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, + tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, +}; + +enum Token { + // EndMarks, Braces, Brackets Parens + tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, + // RelOps (do not reorder) + tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, + // AddOps (do not reorder) + tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, + // MulOps (do not reorder) + tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, + // AsnOps (do not reorder) + tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, + tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, + // Various, UnaryNot, LogicalOps, + tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, + tASSIGN, tINC, tDEC, + tAT, + // Keywords + tNEW, tFN, tSTRUCT, tVAR, tENUM, + tIF, tELSE, tWHILE, + tBREAK, tCONTINUE, tRETURN, + tFOR, tSWITCH, tCASE, + tTRUE, tFALSE, tNIL, + tIDN, tNUM, tSTR, + // used internal to the lexer but never returned + tSPC, tINV, tDQT, tSQT, tMSC, +}; + +var tnames []str = { + "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", + "==", "!=", "<", "<=", ">", ">=", "", "", + "+", "-", "|", "^", "", "", "", "", + "*", "/", "%", "&", "<<", ">>", "", "", + "+=", "-=", "|=", "^=", "", "", "", "", + "*=", "/=", "%=", "&=", "<<=", ">>=", "", "", + ";", ":", ".", ",", "~", "&&", "||", "!", + "=", "++", "--", + "@", + "new", "fn", "struct", "var", "enum", + "if", "else", "while", + "break", "continue", "return", + "for", "switch", "case", + "true", "false", "nil", + "<ID>", "<NUM>", "<STR>", + "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", +}; + + +// ================================================================ +// lexer / parser / compiler context + +struct Context { + filename str, // filename of active source + outname str, // base name for output files + + linenumber u32, // line number of most recent line + lineoffset u32, // position of start of most recent line + byteoffset u32, // position of the most recent character + flags u32, + cc u32, // scanner: next character + + tok Token, // most recent token + num u32, // for tNUM + tmp [256]u8, // for tIDN, tSTR + ident *String, // for tSTR + + stringlist *String, // intern table + typelist *Type, // all types + + scope *Scope, // top of Scope stack + global *Scope, // the global scope + cur_fn *Symbol, // fn being parsed + + idn_if *String, // identifier strings + idn_fn *String, + idn_for *String, + idn_var *String, + idn_nil *String, + idn_new *String, + idn_case *String, + idn_else *String, + idn_enum *String, + idn_true *String, + idn_break *String, + idn_while *String, + idn_false *String, + idn_switch *String, + idn_struct *String, + idn_return *String, + idn_continue *String, + + type_void *Type, + type_bool *Type, + type_str *Type, + type_nil *Type, + type_u32 *Type, + type_i32 *Type, + type_u8 *Type, +}; + +var ctx Context; + +fn string_make(text str, len u32) String { + var s String = ctx.stringlist; + while s != nil { + if (s.len == len) && strneq(text, s.text, len) { + return s; + } + s = s.next; + } + s = new(String); + s.len = len; + strcpyn(s.text, text, len + 1); + s.next = ctx.stringlist; + ctx.stringlist = s; + return s; +} + +fn scope_push(kind ScopeKind) Scope { + var scope Scope = new(Scope); + scope.first = nil; + scope.last = nil; + scope.parent = ctx.scope; + scope.kind = kind; + ctx.scope = scope; + return scope; +} + +// returns symbol list for the popped scope +fn scope_pop() Symbol { + var scope Scope = ctx.scope; + ctx.scope = scope.parent; + return scope.first; +} + +fn scope_find(kind ScopeKind) Scope { + var scope Scope = ctx.scope; + while scope != nil { + if scope.kind == kind { + return scope; + } + scope = scope.parent; + } + return nil; +} + +fn symbol_find_in(name String, scope Scope) Symbol { + var sym Symbol = scope.first; + while sym != nil { + if sym.name == name { + return sym; + } + sym = sym.next; + } + return nil; +} + +// find the first surrounding scope of a specified kind +fn symbol_find(name String) Symbol { + var scope Scope = ctx.scope; + while scope != nil { + var sym Symbol = symbol_find_in(name, scope); + if sym != nil { + return sym; + } + scope = scope.parent; + } + return nil; +} + +fn symbol_make_in_scope(name String, type Type, scope Scope) Symbol { + var sym Symbol = new(Symbol); + sym.name = name; + sym.type = type; + sym.next = nil; + sym.kind = SYMBOL_VAR; + if scope.first == nil { + scope.first = sym; + } else { + scope.last.next = sym; + } + scope.last = sym; + return sym; +} + +fn symbol_make_global(name String, type Type) Symbol { + return symbol_make_in_scope(name, type, ctx.global); +} + +fn symbol_make(name String, type Type) Symbol { + return symbol_make_in_scope(name, type, ctx.scope); +} + +fn type_make(name String, kind TypeKind, of Type, list Symbol, count u32) Type { + var type Type = new(Type); + type.name = name; + type.of = of; + type.list = list; + type.kind = kind; + type.count = count; + if name != nil { + type.next = ctx.typelist; + ctx.typelist = type; + } else { + type.next = nil; + } + return type; +} + +fn type_find(name String) Type { + var t Type = ctx.typelist; + while t != nil { + if t.name == name { + return t; + } + t = t.next; + } + return nil; +} + +fn type_find_field(type Type, name String) Symbol { + if type.kind != TYPE_STRUCT { + error("not a struct"); + } + var s Symbol = type.list; + while s != nil { + if s.name == name { + return s; + } + s = s.next; + } + error("struct has no such field '", @str name.text, "'"); + return nil; +} + +fn ast_make(kind AstKind, ival u32, name String, sym Symbol, type Type) Ast { + var node Ast = new(Ast); + node.kind = kind; + node.ival = ival; + node.name = name; + node.sym = sym; + node.type = type; + node.srcloc = ctx.linenumber; + return node; +} + +fn ast_make_lr(kind AstKind, left Ast, right Ast) Ast { + var node Ast = ast_make(kind, 0, nil, nil, nil); + node.left = left; + node.right = right; + return node; +} + +fn ast_make_l(kind AstKind, child Ast) Ast { + var node Ast = ast_make(kind, 0, nil, nil, nil); + node.left = child; + return node; +} + +fn ast_make_simple(kind AstKind, x u32) Ast { + return ast_make(kind, x, nil, nil, nil); +} + +fn ast_make_const(value u32, type Type) Ast { + return ast_make(AST_CONST, value, nil, nil, type); +} + +fn ast_make_symbol(name String, sym Symbol) Ast { + // TODO maybe handle nil sym differently? + var type Type = nil; + if sym != nil { + type = sym.type; + } + return ast_make(AST_SYMBOL, 0, name, sym, type); +} + +fn ctx_init() { + ctx = new(Context); + + ctx.idn_if = string_make("if", 2); + ctx.idn_fn = string_make("fn", 2); + ctx.idn_for = string_make("for", 3); + ctx.idn_var = string_make("var", 3); + ctx.idn_nil = string_make("nil", 3); + ctx.idn_new = string_make("new", 3); + ctx.idn_case = string_make("case", 4); + ctx.idn_else = string_make("else", 4); + ctx.idn_enum = string_make("enum", 4); + ctx.idn_true = string_make("true", 4); + ctx.idn_break = string_make("break", 5); + ctx.idn_while = string_make("while", 5); + ctx.idn_false = string_make("false", 5); + ctx.idn_switch = string_make("switch", 6); + ctx.idn_struct = string_make("struct", 6); + ctx.idn_return = string_make("return", 6); + ctx.idn_continue = string_make("continue", 8); + + ctx.type_void = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0); + ctx.type_bool = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0); + ctx.type_nil = type_make(string_make("nil", 3), TYPE_NIL, nil, nil, 0); + ctx.type_str = type_make(string_make("str", 3), TYPE_STR, nil, nil, 0); + ctx.type_u32 = type_make(string_make("u32", 3), TYPE_U32, nil, nil, 0); + ctx.type_i32 = type_make(string_make("i32", 3), TYPE_I32, nil, nil, 0); + ctx.type_u8 = type_make(string_make("u8", 2), TYPE_U8, nil, nil, 0); + + scope_push(SCOPE_GLOBAL); + ctx.global = ctx.scope; + + ctx.linenumber = 1; + ctx.filename = "<stdin>"; +} +