commit ad7b5d853250e600a291b35eab85c2e20af508e3
parent e364f81a9c842e5f90fbf91da782716599839982
Author: Brian Swetland <swetland@frotz.net>
Date: Fri, 20 Oct 2023 17:03:40 -0700
compiler1: split into multiple source files
Diffstat:
D | compiler/compiler.spl | | | 1583 | ------------------------------------------------------------------------------- |
A | compiler/lexer.spl | | | 349 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | compiler/main.spl | | | 75 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | compiler/parser.spl | | | 651 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | compiler/stdlib.spl | | | 29 | +++++++++++++++++++++++++++++ |
A | compiler/types.spl | | | 483 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
6 files changed, 1587 insertions(+), 1583 deletions(-)
diff --git a/compiler/compiler.spl b/compiler/compiler.spl
@@ -1,1583 +0,0 @@
-// Copyright 2023, Brian Swetland <swetland@frotz.net>
-// Licensed under the Apache License, Version 2.0.
-
-// utility functions
-
-fn strneq(s1 str, s2 str, len u32) bool {
- var n u32 = 0;
- while n < len {
- if s1[n] != s2[n] {
- return false;
- }
- n++;
- }
- return true;
-}
-
-fn strcpyn(dst str, src str, len u32) {
- var n u32 = 0;
- while n < len {
- dst[n] = src[n];
- n++;
- }
-}
-
-fn strlen(s str) u32 {
- var n u32 = 0;
- while s[n] != 0 {
- n++;
- }
- return n;
-}
-
-// ================================================================
-// data types
-
-struct String {
- next *String,
- len u32,
- text [256]u8,
-};
-
-enum SymbolKind {
- SYMBOL_VAR,
- SYMBOL_FLD, // struct field
- SYMBOL_PTR, // struct *field
- SYMBOL_DEF, // enum
- SYMBOL_FN,
-};
-
-struct Symbol {
- next *Symbol,
- name *String,
- type *Type,
- kind SymbolKind,
-};
-
-enum ScopeKind {
- SCOPE_GLOBAL,
- SCOPE_FUNC,
- SCOPE_BLOCK,
- SCOPE_LOOP,
- SCOPE_STRUCT,
-};
-
-struct Scope {
- parent *Scope,
- first *Symbol,
- last *Symbol,
- kind ScopeKind,
-};
-
-enum TypeKind {
- TYPE_VOID,
- TYPE_BOOL,
- TYPE_U8,
- TYPE_U32,
- TYPE_I32,
- TYPE_NIL,
- TYPE_POINTER,
- TYPE_ARRAY,
- TYPE_SLICE,
- TYPE_STR,
- TYPE_STRUCT,
- TYPE_FN,
- TYPE_ENUM,
- TYPE_UNDEFINED,
-};
-
-struct Type {
- next *Type,
- name *String,
- of *Type, // for slice, array, ptr, fn (return type)
- list *Symbol, // for struct (fields), fn (params)
- kind TypeKind,
- count u32,
-};
-
-// ================================================================
-// Abstract Syntax Tree
-
-enum AstKind {
-// top node
- AST_PROGRAM, // l=FUNC*
-// program components (chained into a list by next)
- AST_FUNC, // l=BLOCK
-// container of statements
- AST_BLOCK, // l=STMT*
-// statements (chained into a list by next)
- AST_EXPR, // l=EXPR
- AST_WHILE, // l=EXPR r=BLOCK
- AST_BREAK,
- AST_CONTINUE,
- AST_RETURN, // l=EXPR
- AST_IF, // l=CASE*
-// sub-parts of if
- AST_CASE, // l=EXPR r=BLOCK
- AST_ELSE, // l=BLOCK
-// expressions
- AST_SYMBOL,
- AST_CONST, // numeric constant
- AST_STRING, // string constant
- AST_DEREF, // l=EXPR type: pointer-to-...
- AST_INDEX, // l=EXPR type: array-of-... r=EXPR index
- AST_FIELD, // l=EXPR type: struct r=SYMBOL field
- AST_ADDROF, // l=EXPR type: lvalue
- AST_CALL, // l=NAME r=EXPR*
- AST_ASSIGN, // l=lhsEXPR r=rhsEXPR
- AST_NEW, // l=TYPE
-// binary expressions
- // Rel Ops (maintain order matched w/ lexer)
- AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE,
- // Add Ops (maintain order matched w/ lexer)
- AST_ADD, AST_SUB, AST_OR, AST_XOR,
- // Mul Ops (maintain order matched w/ lexer)
- AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR,
- // uncategorized ops
- AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG,
- AST_KIND_COUNT,
-};
-
-var ast_kind []str = {
- "PROGRAM", "FUNC",
- "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE",
- "RETURN", "IF", "CASE", "ELSE",
- "SYMBOL", "CONST", "STRING",
- "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW",
- "EQ", "NE", "LT", "LE", "GT", "GE",
- "ADD", "SUB", "OR", "XOR",
- "MUL", "DIV", "MOD", "AND", "LSL", "LSR",
- "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG",
-};
-
-fn ast_is_relop(kind AstKind) bool {
- return (kind >= AST_EQ) && (kind <= AST_GE);
-}
-
-fn ast_is_addop(kind AstKind) bool {
- return (kind >= AST_ADD) && (kind <= AST_XOR);
-}
-
-fn ast_is_mulop(kind AstKind) bool {
- return (kind >= AST_MUL) && (kind <= AST_LSR);
-}
-
-fn ast_is_binop(kind AstKind) bool {
- return (kind >= AST_EQ) && (kind <= AST_LSR);
-}
-
-fn ast_is_expr(kind AstKind) bool {
- return (kind >= AST_SYMBOL);
-}
-
-fn ast_is_stmt(kind AstKind) bool {
- return (kind >= AST_EXPR) && (kind <= AST_ELSE);
-}
-
-struct Ast {
- kind AstKind,
-
- left *Ast,
- right *Ast,
- next *Ast, // intrusive list
-
- ival u32,
- name *String,
- sym *Symbol,
- type *Type,
-
- srcloc u32, // linenumber for now
-};
-
-
-// ================================================================
-// lexical scanner tokens
-
-// token classes (tok & tcMASK)
-enum TokenClass{
- tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
- tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
-};
-
-enum Token {
- // EndMarks, Braces, Brackets Parens
- tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
- // RelOps (do not reorder)
- tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
- // AddOps (do not reorder)
- tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
- // MulOps (do not reorder)
- tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
- // AsnOps (do not reorder)
- tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
- tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
- // Various, UnaryNot, LogicalOps,
- tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
- tASSIGN, tINC, tDEC,
- tAT,
- // Keywords
- tNEW, tFN, tSTRUCT, tVAR, tENUM,
- tIF, tELSE, tWHILE,
- tBREAK, tCONTINUE, tRETURN,
- tFOR, tSWITCH, tCASE,
- tTRUE, tFALSE, tNIL,
- tIDN, tNUM, tSTR,
- // used internal to the lexer but never returned
- tSPC, tINV, tDQT, tSQT, tMSC,
-};
-
-var tnames []str = {
- "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")",
- "==", "!=", "<", "<=", ">", ">=", "", "",
- "+", "-", "|", "^", "", "", "", "",
- "*", "/", "%", "&", "<<", ">>", "", "",
- "+=", "-=", "|=", "^=", "", "", "", "",
- "*=", "/=", "%=", "&=", "<<=", ">>=", "", "",
- ";", ":", ".", ",", "~", "&&", "||", "!",
- "=", "++", "--",
- "@",
- "new", "fn", "struct", "var", "enum",
- "if", "else", "while",
- "break", "continue", "return",
- "for", "switch", "case",
- "true", "false", "nil",
- "<ID>", "<NUM>", "<STR>",
- "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
-};
-
-
-// ================================================================
-// lexer / parser / compiler context
-
-struct Context {
- filename str, // filename of active source
- outname str, // base name for output files
-
- linenumber u32, // line number of most recent line
- lineoffset u32, // position of start of most recent line
- byteoffset u32, // position of the most recent character
- flags u32,
- cc u32, // scanner: next character
-
- tok Token, // most recent token
- num u32, // for tNUM
- tmp [256]u8, // for tIDN, tSTR
- ident *String, // for tSTR
-
- stringlist *String, // intern table
- typelist *Type, // all types
-
- scope *Scope, // top of Scope stack
- global *Scope, // the global scope
- cur_fn *Symbol, // fn being parsed
-
- idn_if *String, // identifier strings
- idn_fn *String,
- idn_for *String,
- idn_var *String,
- idn_nil *String,
- idn_new *String,
- idn_case *String,
- idn_else *String,
- idn_enum *String,
- idn_true *String,
- idn_break *String,
- idn_while *String,
- idn_false *String,
- idn_switch *String,
- idn_struct *String,
- idn_return *String,
- idn_continue *String,
-
- type_void *Type,
- type_bool *Type,
- type_str *Type,
- type_nil *Type,
- type_u32 *Type,
- type_i32 *Type,
- type_u8 *Type,
-};
-
-var ctx Context;
-
-fn string_make(text str, len u32) String {
- var s String = ctx.stringlist;
- while s != nil {
- if (s.len == len) && strneq(text, s.text, len) {
- return s;
- }
- s = s.next;
- }
- s = new(String);
- s.len = len;
- strcpyn(s.text, text, len + 1);
- s.next = ctx.stringlist;
- ctx.stringlist = s;
- return s;
-}
-
-fn scope_push(kind ScopeKind) Scope {
- var scope Scope = new(Scope);
- scope.first = nil;
- scope.last = nil;
- scope.parent = ctx.scope;
- scope.kind = kind;
- ctx.scope = scope;
- return scope;
-}
-
-// returns symbol list for the popped scope
-fn scope_pop() Symbol {
- var scope Scope = ctx.scope;
- ctx.scope = scope.parent;
- return scope.first;
-}
-
-fn scope_find(kind ScopeKind) Scope {
- var scope Scope = ctx.scope;
- while scope != nil {
- if scope.kind == kind {
- return scope;
- }
- scope = scope.parent;
- }
- return nil;
-}
-
-fn symbol_find_in(name String, scope Scope) Symbol {
- var sym Symbol = scope.first;
- while sym != nil {
- if sym.name == name {
- return sym;
- }
- sym = sym.next;
- }
- return nil;
-}
-
-// find the first surrounding scope of a specified kind
-fn symbol_find(name String) Symbol {
- var scope Scope = ctx.scope;
- while scope != nil {
- var sym Symbol = symbol_find_in(name, scope);
- if sym != nil {
- return sym;
- }
- scope = scope.parent;
- }
- return nil;
-}
-
-fn symbol_make_in_scope(name String, type Type, scope Scope) Symbol {
- var sym Symbol = new(Symbol);
- sym.name = name;
- sym.type = type;
- sym.next = nil;
- sym.kind = SYMBOL_VAR;
- if scope.first == nil {
- scope.first = sym;
- } else {
- scope.last.next = sym;
- }
- scope.last = sym;
- return sym;
-}
-
-fn symbol_make_global(name String, type Type) Symbol {
- return symbol_make_in_scope(name, type, ctx.global);
-}
-
-fn symbol_make(name String, type Type) Symbol {
- return symbol_make_in_scope(name, type, ctx.scope);
-}
-
-fn type_make(name String, kind TypeKind, of Type, list Symbol, count u32) Type {
- var type Type = new(Type);
- type.name = name;
- type.of = of;
- type.list = list;
- type.kind = kind;
- type.count = count;
- if name != nil {
- type.next = ctx.typelist;
- ctx.typelist = type;
- } else {
- type.next = nil;
- }
- return type;
-}
-
-fn type_find(name String) Type {
- var t Type = ctx.typelist;
- while t != nil {
- if t.name == name {
- return t;
- }
- t = t.next;
- }
- return nil;
-}
-
-fn type_find_field(type Type, name String) Symbol {
- if type.kind != TYPE_STRUCT {
- error("not a struct");
- }
- var s Symbol = type.list;
- while s != nil {
- if s.name == name {
- return s;
- }
- s = s.next;
- }
- error("struct has no such field '", @str name.text, "'");
- return nil;
-}
-
-fn ast_make(kind AstKind, ival u32, name String, sym Symbol, type Type) Ast {
- var node Ast = new(Ast);
- node.kind = kind;
- node.ival = ival;
- node.name = name;
- node.sym = sym;
- node.type = type;
- node.srcloc = ctx.linenumber;
- return node;
-}
-
-fn ast_make_lr(kind AstKind, left Ast, right Ast) Ast {
- var node Ast = ast_make(kind, 0, nil, nil, nil);
- node.left = left;
- node.right = right;
- return node;
-}
-
-fn ast_make_l(kind AstKind, child Ast) Ast {
- var node Ast = ast_make(kind, 0, nil, nil, nil);
- node.left = child;
- return node;
-}
-
-fn ast_make_simple(kind AstKind, x u32) Ast {
- return ast_make(kind, x, nil, nil, nil);
-}
-
-fn ast_make_const(value u32, type Type) Ast {
- return ast_make(AST_CONST, value, nil, nil, type);
-}
-
-fn ast_make_symbol(name String, sym Symbol) Ast {
- // TODO maybe handle nil sym differently?
- var type Type = nil;
- if sym != nil {
- type = sym.type;
- }
- return ast_make(AST_SYMBOL, 0, name, sym, type);
-}
-
-fn ctx_init() {
- ctx = new(Context);
-
- ctx.idn_if = string_make("if", 2);
- ctx.idn_fn = string_make("fn", 2);
- ctx.idn_for = string_make("for", 3);
- ctx.idn_var = string_make("var", 3);
- ctx.idn_nil = string_make("nil", 3);
- ctx.idn_new = string_make("new", 3);
- ctx.idn_case = string_make("case", 4);
- ctx.idn_else = string_make("else", 4);
- ctx.idn_enum = string_make("enum", 4);
- ctx.idn_true = string_make("true", 4);
- ctx.idn_break = string_make("break", 5);
- ctx.idn_while = string_make("while", 5);
- ctx.idn_false = string_make("false", 5);
- ctx.idn_switch = string_make("switch", 6);
- ctx.idn_struct = string_make("struct", 6);
- ctx.idn_return = string_make("return", 6);
- ctx.idn_continue = string_make("continue", 8);
-
- ctx.type_void = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0);
- ctx.type_bool = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0);
- ctx.type_nil = type_make(string_make("nil", 3), TYPE_NIL, nil, nil, 0);
- ctx.type_str = type_make(string_make("str", 3), TYPE_STR, nil, nil, 0);
- ctx.type_u32 = type_make(string_make("u32", 3), TYPE_U32, nil, nil, 0);
- ctx.type_i32 = type_make(string_make("i32", 3), TYPE_I32, nil, nil, 0);
- ctx.type_u8 = type_make(string_make("u8", 2), TYPE_U8, nil, nil, 0);
-
- scope_push(SCOPE_GLOBAL);
- ctx.global = ctx.scope;
-
- ctx.linenumber = 1;
- ctx.filename = "<stdin>";
-}
-
-fn error_begin() i32 {
- writes(2, "\n");
- writes(2, ctx.filename);
- writes(2, ":");
- writei(2, ctx.linenumber);
- writes(2, ": error: ");
- return 2;
-}
-
-fn error_end() {
- writes(2, "\n");
- os_exit(1);
-}
-
-// ================================================================
-// lexical scanner
-
-// currently unrecognized: # $ ? \ `
-var lextab [256]u8 = {
- tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
-// ! " # $ % & '
- tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT,
-// ( ) * + , - . /
- tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH,
-// 0 1 2 3 4 5 6 7
- tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM,
-// 8 9 : ; < = > ?
- tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC,
-// @ A B C D E F G
- tAT, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
-// H I J K L M N O
- tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
-// P Q R S T U V W
- tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
-// X Y Z [ \ ] ^ _
- tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN,
-// ` a b c d e f g
- tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
-// h i j k l m n o
- tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
-// p q r s t u v w
- tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
-// x y z { | } ~
- tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
- tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
-};
-
-fn unhex(ch u32) i32 {
- if (ch >= '0') && (ch <= '9') {
- return ch - '0';
- }
- if (ch >= 'a') && (ch <= 'f') {
- return ch - 'a' + 10;
- }
- if (ch >= 'A') && (ch <= 'F') {
- return ch - 'A' + 10;
- }
- return -1;
-}
-
-fn scan() Token {
- ctx.byteoffset++;
- var ch i32 = readc(0);
- if ch < 0 {
- ctx.cc = 0;
- } else {
- ctx.cc = ch;
- }
- return ctx.cc;
-}
-
-fn unescape(n u32) u32 {
- if n == 'n' {
- return 10;
- } else if n == 'r' {
- return 13;
- } else if n == 't' {
- return 9;
- } else if n == '"' {
- return '"';
- } else if n == '\'' {
- return '\'';
- } else if n == '\\' {
- return '\\';
- } else if n == 'x' {
- var x0 u32 = unhex(scan());
- var x1 u32 = unhex(scan());
- if (x0 < 0) || (x1 < 0) {
- error("invalid hex escape");
- }
- return (x0 << 4) | x1;
- } else {
- error("invalid escape ", n);
- return 0;
- }
-}
-
-fn scan_string(cc u32, nc u32) Token {
- var n u32 = 0;
- while true {
- if nc == '"' {
- nc = scan();
- break;
- } else if nc == 0 {
- error("unterminated string");
- } else if nc == '\\' {
- ctx.tmp[n] = unescape(scan());
- } else {
- ctx.tmp[n] = nc;
- }
- nc = scan();
- n++;
- if n == 255 {
- error("constant string too large");
- }
- }
- ctx.tmp[n] = 0;
- return tSTR;
-}
-
-fn scan_keyword(len u32) Token {
- ctx.tmp[len] = 0;
- var idn String = string_make(ctx.tmp, len);
- ctx.ident = idn;
-
- if len == 2 {
- if idn == ctx.idn_if { return tIF; };
- if idn == ctx.idn_fn { return tFN; }
- } else if len == 3 {
- if idn == ctx.idn_for { return tFOR; }
- if idn == ctx.idn_var { return tVAR; }
- if idn == ctx.idn_nil { return tNIL; }
- if idn == ctx.idn_new { return tNEW; }
- } else if len == 4 {
- if idn == ctx.idn_case { return tCASE; }
- if idn == ctx.idn_else { return tELSE; }
- if idn == ctx.idn_enum { return tENUM; }
- if idn == ctx.idn_true { return tTRUE; }
- } else if len == 5 {
- if idn == ctx.idn_break { return tBREAK; }
- if idn == ctx.idn_while { return tWHILE; }
- if idn == ctx.idn_false { return tFALSE; }
- } else if len == 6 {
- if idn == ctx.idn_switch { return tSWITCH; }
- if idn == ctx.idn_struct { return tSTRUCT; }
- if idn == ctx.idn_return { return tRETURN; }
- } else if len == 8 {
- if idn == ctx.idn_continue { return tCONTINUE; }
- }
- return tIDN;
-}
-
-fn scan_number(cc u32, nc u32) Token {
- var n u32 = 1;
- var val u32 = cc - '0';
-
- if (cc == '0') && (nc == 'b') { // binary
- nc = scan();
- while (nc == '0') || (nc == '1') {
- val = (val << 1) | (nc - '0');
- nc = scan();
- n++;
- if (n == 34) {
- error("binary constant too large");
- }
- }
- } else if (cc == '0') && (nc == 'x') { // hex
- nc = scan();
- while true {
- var tmp i32 = unhex(nc);
- if tmp == -1 {
- break;
- }
- val = (val << 4) | tmp;
- nc = scan();
- n++;
- if n == 10 {
- error("hex constant too large");
- }
- }
- } else { // decimal
- while lextab[nc] == tNUM {
- var tmp u32 = (val * 10) + (nc - '0');
- if tmp <= val {
- error("decimal constant too large");
- }
- val = tmp;
- nc = scan();
- n++;
- }
- }
- ctx.num = val;
- return tNUM;
-}
-
-fn scan_ident(cc u32, nc u32) Token {
- ctx.tmp[0] = cc;
- var n u32 = 1;
-
- while true {
- var tok Token = lextab[nc];
- if (tok == tIDN) || (tok == tNUM) {
- ctx.tmp[n] = nc;
- n++;
- if (n == 32) { error("identifier too large"); }
- nc = scan();
- } else {
- break;
- }
- }
- return scan_keyword(n);
-}
-
-fn _next() Token {
- var nc u8 = ctx.cc;
- while true {
- var cc u8 = nc;
- nc = scan();
- var tok Token = lextab[cc];
- if tok == tNUM { // 0..9
- return scan_number(cc, nc);
- } else if tok == tIDN { // _ A..Z a..z
- return scan_ident(cc, nc);
- } else if tok == tDQT { // "
- return scan_string(cc, nc);
- } else if tok == tSQT { // '
- ctx.num = nc;
- if nc == '\\' {
- ctx.num = unescape(scan());
- }
- nc = scan();
- if nc != '\'' {
- error("unterminated character constant");
- }
- nc = scan();
- return tNUM;
- } else if tok == tPLUS {
- if nc == '+' { tok = tINC; nc = scan(); }
- } else if tok == tMINUS {
- if nc == '-' { tok = tDEC; nc = scan(); }
- } else if tok == tAMP {
- if nc == '&' { tok = tAND; nc = scan(); }
- } else if tok == tPIPE {
- if nc == '|' { tok = tOR; nc = scan(); }
- } else if tok == tGT {
- if nc == '=' { tok = tGE; nc = scan(); }
- else if nc == '>' { tok = tRIGHT; nc = scan(); }
- } else if tok == tLT {
- if nc == '=' { tok = tLE; nc = scan(); }
- else if nc == '<' { tok = tLEFT; nc = scan(); }
- } else if tok == tASSIGN {
- if nc == '=' { tok = tEQ; nc = scan(); }
- } else if tok == tBANG {
- if nc == '=' { tok = tNE; nc = scan(); }
- } else if tok == tSLASH {
- if nc == '/' {
- // comment -- consume until EOL or EOF
- while (nc != '\n') && (nc != 0) {
- nc = scan();
- }
- continue;
- }
- } else if tok == tEOL {
- ctx.linenumber++;
- ctx.lineoffset = ctx.byteoffset;
- //if ctx.flags & cfVisibleEOL {
- // return tEOL;
- //}
- continue;
- } else if tok == tSPC {
- continue;
- } else if (tok == tMSC) || (tok == tINV) {
- error("unknown character 0x%02x", cc);
- }
-
- // if we're an AddOp or MulOp, followed by an '='
- if ((tok & 0xF0) == 0x20) && (nc == '=') {
- nc = scan();
- // transform us to a XEQ operation
- tok = tok + 0x10;
- }
-
- return tok;
- }
-}
-
-fn printstr(fd i32, s str) {
- var n u32 = 0;
- writec(fd, '"');
- while true {
- var ch u32 = s[n];
- if ch == 0 {
- break;
- } else if (ch < ' ') || (ch > '~') {
- writex(fd, ch);
- } else if (ch == '"') || (ch == '\\') {
- writec(fd, '\\');
- writec(fd, ch);
- } else {
- writec(fd, ch);
- }
- n++;
- }
- writec(fd, '"');
-}
-
-fn token_print(fd i32) {
- if ctx.tok == tNUM {
- writec(fd, '#');
- writex(fd, ctx.num);
- } else if ctx.tok == tIDN {
- writec(fd, '@');
- writes(fd, ctx.tmp);
- } else if ctx.tok == tEOL {
- writec(fd, '\n');
- } else if ctx.tok == tSTR {
- printstr(fd, ctx.tmp);
- } else {
- writes(fd, tnames[ctx.tok]);
- }
- writec(fd, ' ');
-}
-
-fn next() Token {
- ctx.tok = _next();
- return ctx.tok;
-}
-
-// ================================================================
-// parser
-
-fn expected(what str) {
- error("expected ", what, ", found ", @str tnames[ctx.tok]);
-}
-
-fn expect(tok Token) {
- if ctx.tok != tok {
- expected(tnames[tok]);
- }
-}
-
-fn require(tok Token) {
- expect(tok);
- next();
-}
-
-fn parse_name(what str) String {
- if ctx.tok != tIDN {
- expected(what);
- }
- var s String = ctx.ident;
- next();
- return s;
-}
-
-fn parse_symbol(what str) Ast {
- var name String = parse_name(what);
- var sym Symbol = symbol_find(name);
- return ast_make_symbol(name, sym);
-}
-
-fn parse_ident() Ast {
- var node Ast = parse_symbol("identifier");
-
- if (node.sym == nil) && (ctx.tok != tOPAREN) {
- error("undefined identifier '", @str node.name.text, "'");
- }
-
- if ctx.tok == tOPAREN {
- // function call
- next();
- node = ast_make_l(AST_CALL, node);
- var last Ast = nil;
-
- while ctx.tok != tCPAREN {
- if ctx.tok == tAT {
- // type annotation for varargs hack
- next();
- parse_type(false);
- }
-
- var expr Ast = parse_expr();
- if last != nil {
- last.next = expr;
- } else {
- node.right = expr;
- }
- last = expr;
-
- if ctx.tok != tCPAREN {
- require(tCOMMA);
- }
- }
- next();
- }
-
- while true {
- if ctx.tok == tDOT {
- // field access
- next();
- node = ast_make_lr(AST_FIELD, node, parse_symbol("field name"));
- } else if ctx.tok == tOBRACK {
- // array access
- next();
- node = ast_make_lr(AST_INDEX, node, parse_expr());
- require(tCBRACK);
- } else {
- return node;
- }
- }
-
- return nil; // unreachable
-}
-
-fn parse_primary_expr() Ast {
- var node Ast;
- if ctx.tok == tNUM {
- node = ast_make_const(ctx.num, ctx.type_i32);
- next();
- } else if ctx.tok == tSTR {
- node = ast_make_simple(AST_STRING, 0);
- node.name = string_make(ctx.tmp, strlen(ctx.tmp));
- next();
- } else if ctx.tok == tTRUE {
- node = ast_make_const(1, ctx.type_bool);
- next();
- } else if ctx.tok == tFALSE {
- node = ast_make_const(0, ctx.type_bool);
- next();
- } else if ctx.tok == tNIL {
- node = ast_make_const(0, ctx.type_nil);
- next();
- } else if ctx.tok == tOPAREN {
- next();
- node = parse_expr();
- require(tCPAREN);
- } else if ctx.tok == tNEW {
- next();
- require(tOPAREN);
- node = ast_make_simple(AST_NEW, 0);
- node.name = parse_name("type name");
- require(tCPAREN);
- } else if ctx.tok == tIDN {
- node = parse_ident();
- } else {
- error("invalid expression");
- }
- return node;
-}
-
-fn parse_unary_expr() Ast {
- var op u32 = ctx.tok;
- if op == tPLUS {
- next();
- return parse_unary_expr();
- } else if op == tMINUS {
- next();
- return ast_make_l(AST_NEG, parse_unary_expr());
- } else if op == tBANG {
- next();
- return ast_make_l(AST_BOOL_NOT, parse_unary_expr());
- } else if op == tNOT {
- next();
- return ast_make_l(AST_NOT, parse_unary_expr());
- } else if op == tAMP {
- error("dereference not supported");
- //next();
- //parse_unary_expr();
- } else {
- return parse_primary_expr();
- }
- return nil; // unreachable
-}
-
-fn parse_mul_expr() Ast {
- var node Ast = parse_unary_expr();
- while ctx.tok & tcMASK == tcMULOP {
- var op u32 = (ctx.tok - tSTAR) + AST_MUL;
- next();
- node = ast_make_lr(op, node, parse_unary_expr());
- }
- return node;
-}
-
-fn parse_add_expr() Ast {
- var node Ast = parse_mul_expr();
- while ctx.tok & tcMASK == tcADDOP {
- var op u32 = (ctx.tok - tPLUS) + AST_ADD;
- next();
- node = ast_make_lr(op, node, parse_mul_expr());
- }
- return node;
-}
-
-fn parse_rel_expr() Ast {
- var node Ast = parse_add_expr();
- if ctx.tok & tcMASK == tcRELOP {
- var op u32 = (ctx.tok - tEQ) + AST_EQ;
- next();
- node = ast_make_lr(op, node, parse_add_expr());
- }
- return node;
-}
-
-fn parse_and_expr() Ast {
- var node Ast = parse_rel_expr();
- if ctx.tok == tAND {
- while ctx.tok == tAND {
- next();
- node = ast_make_lr(AST_BOOL_AND, node, parse_rel_expr());
- }
- }
- return node;
-}
-
-fn parse_expr() Ast {
- var node Ast = parse_and_expr();
- if ctx.tok == tOR {
- while ctx.tok == tOR {
- next();
- node = ast_make_lr(AST_BOOL_OR, node, parse_and_expr());
- }
- }
- return node;
-}
-
-fn parse_struct_type(name String) Type {
- var type Type = type_find(name);
-
- if type == nil {
- type = type_make(name, TYPE_STRUCT, nil, nil, 0);
- } else {
- if type.kind == TYPE_UNDEFINED {
- // resolve forward ref
- type.kind = TYPE_STRUCT;
- } else {
- error("cannot redefine struct '", @str name.text, "'");
- }
- };
- scope_push(SCOPE_STRUCT);
- require(tOBRACE);
- while true {
- if ctx.tok == tCBRACE {
- next();
- break;
- }
- var fname String = parse_name("field name");
- var kind SymbolKind = SYMBOL_FLD;
- if ctx.tok == tSTAR {
- next();
- kind = SYMBOL_PTR;
- }
- var ftype Type = parse_type(true);
- var sym Symbol = symbol_make(fname, ftype);
- sym.kind = kind;
- if ctx.tok != tCBRACE {
- require(tCOMMA);
- }
- }
- type.list = scope_pop();
- return type;
-}
-
-
-fn parse_array_type() Type {
- var type Type;
- var nelem u32 = 0;
- if ctx.tok == tCBRACK {
- // TODO: slices
- next();
- type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, 0);
- } else {
- if ctx.tok != tNUM {
- error("array size must be numeric");
- }
- nelem = ctx.num;
- next();
- require(tCBRACK);
- type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, nelem);
- }
- // TODO: type.name?
- return type;
-}
-
-fn parse_type(fwd_ref_ok u32) Type {
- if ctx.tok == tSTAR { // pointer-to
- error("pointer types not supported");
- //next();
- //return type_make(nil, TYPE_POINTER, parse_type(true), nil, 0);
- } else if ctx.tok == tOBRACK { // array-of
- next();
- return parse_array_type();
- } else if ctx.tok == tFN {
- error("func types not supported");
- //next();
- //return parse_func_type();
- } else if ctx.tok == tSTRUCT {
- error ("anonymous struct types not supported");
- //next();
- //return parse_struct_type(nil);
- } else if ctx.tok == tIDN {
- var name String = ctx.ident;
- next();
- var type Type = type_find(name);
- if type == nil {
- if fwd_ref_ok {
- type = type_make(name, TYPE_UNDEFINED, nil, nil, 0);
- } else {
- error("undefined type '", @str name.text, "' not usable here");
- }
- }
- return type;
- } else {
- expected("type");
- }
- return nil;
-}
-
-fn parse_while() Ast {
- // while expr { block }
- var expr Ast = parse_expr();
- require(tOBRACE);
- scope_push(SCOPE_LOOP);
- var block Ast = parse_block();
- scope_pop();
- return ast_make_lr(AST_WHILE, expr, block);
-}
-
-fn parse_if() Ast {
- // if expr { block }
-
- var expr Ast = parse_expr();
- require(tOBRACE);
- scope_push(SCOPE_BLOCK);
- var block Ast = parse_block();
- scope_pop();
-
- var last Ast = ast_make_lr(AST_CASE, expr, block);
- var stmt Ast = ast_make_l(AST_IF, last);
-
- while ctx.tok == tELSE {
- // ... else ...
- next();
- if ctx.tok == tIF {
- // ... if expr { block }
- next();
- expr = parse_expr();
- require(tOBRACE);
- scope_push(SCOPE_BLOCK);
- block = parse_block();
- scope_pop();
- last.next = ast_make_lr(AST_CASE, expr, block);
- last = last.next;
- } else {
- // ... { block }
- require(tOBRACE);
- scope_push(SCOPE_BLOCK);
- block = parse_block();
- scope_pop();
- last.next = ast_make_l(AST_ELSE, block);
- break;
- }
- }
- return stmt;
-}
-
-fn parse_return() Ast {
- // TODO check for return required/type
- var node Ast = ast_make_simple(AST_RETURN, 0);
- if ctx.tok == tSEMI {
- next();
- } else {
- // error("return types do not match");
- node.left = parse_expr();
- require(tSEMI);
- }
- return node;
-}
-
-fn parse_break() Ast {
- // TODO: break-to-labeled-loop support
- var scope Scope = scope_find(SCOPE_LOOP);
- if scope == nil {
- error("break must be used from inside a looping construct");
- }
- require(tSEMI);
- return ast_make_simple(AST_BREAK, 0);
-}
-
-fn parse_continue() Ast {
- // TODO: continue-to-labeled-loop support
- var scope Scope = scope_find(SCOPE_LOOP);
- if scope == nil {
- error("continue must be used from inside a looping construct");
- }
- require(tSEMI);
- return ast_make_simple(AST_CONTINUE, 0);
-}
-
-fn parse_struct_init(sym Symbol) {
- while true {
- if ctx.tok == tCBRACE {
- next();
- break;
- }
- var name String = parse_name("field name");
- var field Symbol = sym.type.list;
- while true { // TODO: field_find
- if field == nil {
- error("structure has no '", @str name.text, "' field");
- }
- if field.name == name {
- break;
- }
- field = field.next;
- }
- require(tCOLON);
- if ctx.tok == tOBRACE {
- next();
- parse_struct_init(field);
- } else {
- parse_expr();
- }
- if ctx.tok != tCBRACE {
- require(tCOMMA);
- }
- }
-}
-
-fn parse_array_init(sym Symbol) {
- while true {
- if ctx.tok == tCBRACE {
- next();
- break;
- }
- parse_expr();
- if ctx.tok != tCBRACE {
- require(tCOMMA);
- }
- }
-}
-
-fn parse_var() {
- // TODO: global vs local
- // TODO: generate assignment/initialization
- var name String = parse_name("variable name");
- var type Type = parse_type(false);
- var sym Symbol = symbol_make(name, type);
-
- if ctx.tok == tASSIGN {
- next();
- if ctx.tok == tOBRACE {
- next();
- if type.kind == TYPE_STRUCT {
- parse_struct_init(sym);
- } else if type.kind == TYPE_ARRAY {
- parse_array_init(sym);
- } else {
- error("type ", @str type.name.text,
- " cannot be initialized with {} expr");
- }
- } else {
- parse_expr();
- }
- } else {
- // default init
- }
- require(tSEMI);
-}
-
-fn _parse_expr_statement() Ast {
- var node Ast = parse_expr();
- var expr Ast = nil;
- var op u32;
-
- if ctx.tok == tASSIGN {
- // basic assignment
- next();
- return ast_make_lr(AST_ASSIGN, node, parse_expr());
- } else if (ctx.tok & tcMASK) == tcAEQOP {
- // +=, etc
- op = (ctx.tok - tADDEQ) + AST_ADD;
- next();
- expr = parse_expr();
- } else if (ctx.tok & tcMASK) == tcMEQOP {
- // *=, etc
- op = (ctx.tok - tMULEQ) + AST_MUL;
- next();
- expr = parse_expr();
- } else if (ctx.tok == tINC) {
- op = AST_ADD;
- next();
- expr = ast_make_const(1, ctx.type_i32);
- } else if (ctx.tok == tDEC) {
- op = AST_SUB;
- expr = ast_make_const(1, ctx.type_i32);
- } else {
- // simple expression
- return node;
- }
-
- // TODO duplicate node instead of sharing it
- expr = ast_make_lr(op, node, expr);
- return ast_make_lr(AST_ASSIGN, node, expr);
-}
-
-fn parse_expr_statement() Ast {
- var stmt Ast = ast_make_l(AST_EXPR, _parse_expr_statement());
- require(tSEMI);
- return stmt;
-}
-
-fn parse_block() Ast {
- var block Ast = ast_make_simple(AST_BLOCK, 0);
- var last Ast = nil;
- var node Ast;
- while true {
- if ctx.tok == tCBRACE {
- next();
- break;
- } else if ctx.tok == tRETURN {
- next();
- node = parse_return();
- } else if ctx.tok == tBREAK {
- next();
- node = parse_break();
- } else if ctx.tok == tCONTINUE {
- next();
- node = parse_continue();
- } else if ctx.tok == tWHILE {
- next();
- node = parse_while();
- } else if ctx.tok == tIF {
- next();
- node = parse_if();
- } else if ctx.tok == tVAR {
- next();
- parse_var();
- } else if ctx.tok == tSEMI {
- next();
- // empty statement
- continue;
- } else {
- node = parse_expr_statement();
- }
- if last == nil {
- block.left = node;
- } else {
- last.next = node;
- }
- last = node;
- }
- return block;
-}
-
-fn parse_param(fname String) Symbol {
- var pname String = parse_name("parameter name");
- var ptype Type = parse_type(false);
- if symbol_find(pname) != nil {
- error("duplicate parameter name '", @str pname.text, "'");
- }
- return symbol_make(pname, ptype);
-}
-
-fn parse_fn_body(sym Symbol) Ast {
- ctx.cur_fn = sym;
- require(tOBRACE);
- scope_push(SCOPE_FUNC); // parameters, from fn
- scope_push(SCOPE_BLOCK);
- var node Ast = parse_block();
- scope_pop(); // stow in node?
- scope_pop();
- ctx.cur_fn = nil;
- return node;
-}
-
-fn parse_fn() Ast {
- var fname String = parse_name("function name");
- var rtype Type = ctx.type_void;
-
- scope_push(SCOPE_FUNC);
-
- require(tOPAREN);
- var n u32 = 0;
- if ctx.tok != tCPAREN {
- parse_param(fname);
- while ctx.tok == tCOMMA {
- next();
- parse_param(fname);
- }
- n++;
- }
- require(tCPAREN);
-
- if ctx.tok != tOBRACE {
- rtype = parse_type(false);
- }
-
- var sym Symbol = symbol_make_global(fname, rtype);
- sym.kind = SYMBOL_FN;
- sym.type = type_make(fname, TYPE_FN, rtype, nil, n);
-
- var node Ast = ast_make_simple(AST_FUNC, 0);
- node.name = fname;
- node.sym = sym;
- node.right = parse_fn_body(sym);
-
- // save parameters
- sym.type.list = scope_pop();
-
- return node;
-}
-
-fn parse_enum_def() {
- if ctx.tok == tIDN {
- var name String = parse_name("enum name");
- type_make(name, TYPE_ENUM, nil, nil, 0);
- }
-
- require(tOBRACE);
- var val u32 = 0;
- while ctx.tok != tCBRACE {
- var name String = parse_name("enum tag name");
- var sym Symbol = symbol_find(name);
- if sym != nil {
- error("cannot redfine '", @str name.text, "' as enum tag");
- }
- sym = symbol_make_global(name, ctx.type_u32);
- sym.kind = SYMBOL_DEF;
- if ctx.tok == tASSIGN {
- next();
- parse_expr();
- // TODO val <- expr
- } else {
- val++;
- }
- require(tCOMMA);
- }
- require(tCBRACE);
- require(tSEMI);
-}
-
-fn parse_program() Ast {
- var program Ast = ast_make_simple(AST_PROGRAM, 0);
- var last Ast = nil;
-
- while true {
- if ctx.tok == tENUM {
- next();
- parse_enum_def();
- } else if ctx.tok == tSTRUCT {
- next();
- var name String = parse_name("struct name");
- parse_struct_type(name);
- require(tSEMI);
- } else if ctx.tok == tFN {
- next();
- var node Ast = parse_fn();
- if last == nil {
- program.left = node;
- } else {
- last.next = node;
- }
- last = node;
- } else if ctx.tok == tVAR {
- next();
- parse_var();
- } else if ctx.tok == tEOF {
- break;
- } else {
- expected("function, variable, or type definition");
- }
- }
- return program;
-}
-
-var indent u32 = 0;
-
-fn print_indent(fd i32) {
- var n u32 = 0;
- while n < indent {
- writes(fd, " ");
- n++;
- }
-}
-
-fn _dump_ast_node(fd i32, node Ast) {
- var kind AstKind = node.kind;
- var child Ast = nil;
-
- print_indent(fd);
- writes(fd, ast_kind[kind]);
- writes(fd, " ");
- indent++;
- if kind == AST_CONST {
- writex(fd, node.ival);
- } else if kind == AST_FUNC {
- writes(fd, node.name.text);
- writes(fd, "(");
- var param Symbol = node.sym.type.list;
- while param != nil {
- writes(fd, param.name.text);
- if param.next != nil {
- writes(fd, ", ");
- }
- param = param.next;
- }
- writes(fd, ") ");
- writes(fd, node.sym.type.of.name.text);
- } else if kind == AST_STRING {
- printstr(fd, node.name.text);
- } else if kind == AST_SYMBOL {
- writes(fd, node.name.text);
- }
- writes(fd, "\n");
-
- dump_ast_node(fd, node.left);
- dump_ast_node(fd, node.right);
- indent = indent - 1;
-}
-
-fn dump_ast_node(fd i32, node Ast) {
- if node != nil {
- while true {
- _dump_ast_node(fd, node);
- node = node.next;
- if node == nil {
- break;
- }
- }
- }
-}
-
-fn dump_ast(node Ast) {
- dump_ast_node(1, node);
-}
-
-fn start() i32 {
- ctx_init();
- scan();
-
- next();
- var ast Ast = parse_program();
-
- dump_ast(ast);
-
- //while(next() != tEOF) {
- // token_print(1);
- //}
- //writec(1, '\n');
- return 0;
-}
-
diff --git a/compiler/lexer.spl b/compiler/lexer.spl
@@ -0,0 +1,349 @@
+// Copyright 2023, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+fn error_begin() i32 {
+ writes(2, "\n");
+ writes(2, ctx.filename);
+ writes(2, ":");
+ writei(2, ctx.linenumber);
+ writes(2, ": error: ");
+ return 2;
+}
+
+fn error_end() {
+ writes(2, "\n");
+ os_exit(1);
+}
+
+// ================================================================
+// lexical scanner
+
+// currently unrecognized: # $ ? \ `
+var lextab [256]u8 = {
+ tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+// ! " # $ % & '
+ tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT,
+// ( ) * + , - . /
+ tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH,
+// 0 1 2 3 4 5 6 7
+ tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM,
+// 8 9 : ; < = > ?
+ tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC,
+// @ A B C D E F G
+ tAT, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
+// H I J K L M N O
+ tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
+// P Q R S T U V W
+ tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
+// X Y Z [ \ ] ^ _
+ tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN,
+// ` a b c d e f g
+ tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
+// h i j k l m n o
+ tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
+// p q r s t u v w
+ tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
+// x y z { | } ~
+ tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+ tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
+};
+
+fn unhex(ch u32) i32 {
+ if (ch >= '0') && (ch <= '9') {
+ return ch - '0';
+ }
+ if (ch >= 'a') && (ch <= 'f') {
+ return ch - 'a' + 10;
+ }
+ if (ch >= 'A') && (ch <= 'F') {
+ return ch - 'A' + 10;
+ }
+ return -1;
+}
+
+fn scan() Token {
+ ctx.byteoffset++;
+ var ch i32 = readc(0);
+ if ch < 0 {
+ ctx.cc = 0;
+ } else {
+ ctx.cc = ch;
+ }
+ return ctx.cc;
+}
+
+fn unescape(n u32) u32 {
+ if n == 'n' {
+ return 10;
+ } else if n == 'r' {
+ return 13;
+ } else if n == 't' {
+ return 9;
+ } else if n == '"' {
+ return '"';
+ } else if n == '\'' {
+ return '\'';
+ } else if n == '\\' {
+ return '\\';
+ } else if n == 'x' {
+ var x0 u32 = unhex(scan());
+ var x1 u32 = unhex(scan());
+ if (x0 < 0) || (x1 < 0) {
+ error("invalid hex escape");
+ }
+ return (x0 << 4) | x1;
+ } else {
+ error("invalid escape ", n);
+ return 0;
+ }
+}
+
+fn scan_string(cc u32, nc u32) Token {
+ var n u32 = 0;
+ while true {
+ if nc == '"' {
+ nc = scan();
+ break;
+ } else if nc == 0 {
+ error("unterminated string");
+ } else if nc == '\\' {
+ ctx.tmp[n] = unescape(scan());
+ } else {
+ ctx.tmp[n] = nc;
+ }
+ nc = scan();
+ n++;
+ if n == 255 {
+ error("constant string too large");
+ }
+ }
+ ctx.tmp[n] = 0;
+ return tSTR;
+}
+
+fn scan_keyword(len u32) Token {
+ ctx.tmp[len] = 0;
+ var idn String = string_make(ctx.tmp, len);
+ ctx.ident = idn;
+
+ if len == 2 {
+ if idn == ctx.idn_if { return tIF; };
+ if idn == ctx.idn_fn { return tFN; }
+ } else if len == 3 {
+ if idn == ctx.idn_for { return tFOR; }
+ if idn == ctx.idn_var { return tVAR; }
+ if idn == ctx.idn_nil { return tNIL; }
+ if idn == ctx.idn_new { return tNEW; }
+ } else if len == 4 {
+ if idn == ctx.idn_case { return tCASE; }
+ if idn == ctx.idn_else { return tELSE; }
+ if idn == ctx.idn_enum { return tENUM; }
+ if idn == ctx.idn_true { return tTRUE; }
+ } else if len == 5 {
+ if idn == ctx.idn_break { return tBREAK; }
+ if idn == ctx.idn_while { return tWHILE; }
+ if idn == ctx.idn_false { return tFALSE; }
+ } else if len == 6 {
+ if idn == ctx.idn_switch { return tSWITCH; }
+ if idn == ctx.idn_struct { return tSTRUCT; }
+ if idn == ctx.idn_return { return tRETURN; }
+ } else if len == 8 {
+ if idn == ctx.idn_continue { return tCONTINUE; }
+ }
+ return tIDN;
+}
+
+fn scan_number(cc u32, nc u32) Token {
+ var n u32 = 1;
+ var val u32 = cc - '0';
+
+ if (cc == '0') && (nc == 'b') { // binary
+ nc = scan();
+ while (nc == '0') || (nc == '1') {
+ val = (val << 1) | (nc - '0');
+ nc = scan();
+ n++;
+ if (n == 34) {
+ error("binary constant too large");
+ }
+ }
+ } else if (cc == '0') && (nc == 'x') { // hex
+ nc = scan();
+ while true {
+ var tmp i32 = unhex(nc);
+ if tmp == -1 {
+ break;
+ }
+ val = (val << 4) | tmp;
+ nc = scan();
+ n++;
+ if n == 10 {
+ error("hex constant too large");
+ }
+ }
+ } else { // decimal
+ while lextab[nc] == tNUM {
+ var tmp u32 = (val * 10) + (nc - '0');
+ if tmp <= val {
+ error("decimal constant too large");
+ }
+ val = tmp;
+ nc = scan();
+ n++;
+ }
+ }
+ ctx.num = val;
+ return tNUM;
+}
+
+fn scan_ident(cc u32, nc u32) Token {
+ ctx.tmp[0] = cc;
+ var n u32 = 1;
+
+ while true {
+ var tok Token = lextab[nc];
+ if (tok == tIDN) || (tok == tNUM) {
+ ctx.tmp[n] = nc;
+ n++;
+ if (n == 32) { error("identifier too large"); }
+ nc = scan();
+ } else {
+ break;
+ }
+ }
+ return scan_keyword(n);
+}
+
+fn _next() Token {
+ var nc u8 = ctx.cc;
+ while true {
+ var cc u8 = nc;
+ nc = scan();
+ var tok Token = lextab[cc];
+ if tok == tNUM { // 0..9
+ return scan_number(cc, nc);
+ } else if tok == tIDN { // _ A..Z a..z
+ return scan_ident(cc, nc);
+ } else if tok == tDQT { // "
+ return scan_string(cc, nc);
+ } else if tok == tSQT { // '
+ ctx.num = nc;
+ if nc == '\\' {
+ ctx.num = unescape(scan());
+ }
+ nc = scan();
+ if nc != '\'' {
+ error("unterminated character constant");
+ }
+ nc = scan();
+ return tNUM;
+ } else if tok == tPLUS {
+ if nc == '+' { tok = tINC; nc = scan(); }
+ } else if tok == tMINUS {
+ if nc == '-' { tok = tDEC; nc = scan(); }
+ } else if tok == tAMP {
+ if nc == '&' { tok = tAND; nc = scan(); }
+ } else if tok == tPIPE {
+ if nc == '|' { tok = tOR; nc = scan(); }
+ } else if tok == tGT {
+ if nc == '=' { tok = tGE; nc = scan(); }
+ else if nc == '>' { tok = tRIGHT; nc = scan(); }
+ } else if tok == tLT {
+ if nc == '=' { tok = tLE; nc = scan(); }
+ else if nc == '<' { tok = tLEFT; nc = scan(); }
+ } else if tok == tASSIGN {
+ if nc == '=' { tok = tEQ; nc = scan(); }
+ } else if tok == tBANG {
+ if nc == '=' { tok = tNE; nc = scan(); }
+ } else if tok == tSLASH {
+ if nc == '/' {
+ // comment -- consume until EOL or EOF
+ while (nc != '\n') && (nc != 0) {
+ nc = scan();
+ }
+ continue;
+ }
+ } else if tok == tEOL {
+ ctx.linenumber++;
+ ctx.lineoffset = ctx.byteoffset;
+ //if ctx.flags & cfVisibleEOL {
+ // return tEOL;
+ //}
+ continue;
+ } else if tok == tSPC {
+ continue;
+ } else if (tok == tMSC) || (tok == tINV) {
+ error("unknown character 0x%02x", cc);
+ }
+
+ // if we're an AddOp or MulOp, followed by an '='
+ if ((tok & 0xF0) == 0x20) && (nc == '=') {
+ nc = scan();
+ // transform us to a XEQ operation
+ tok = tok + 0x10;
+ }
+
+ return tok;
+ }
+}
+
+fn printstr(fd i32, s str) {
+ var n u32 = 0;
+ writec(fd, '"');
+ while true {
+ var ch u32 = s[n];
+ if ch == 0 {
+ break;
+ } else if (ch < ' ') || (ch > '~') {
+ writex(fd, ch);
+ } else if (ch == '"') || (ch == '\\') {
+ writec(fd, '\\');
+ writec(fd, ch);
+ } else {
+ writec(fd, ch);
+ }
+ n++;
+ }
+ writec(fd, '"');
+}
+
+fn token_print(fd i32) {
+ if ctx.tok == tNUM {
+ writec(fd, '#');
+ writex(fd, ctx.num);
+ } else if ctx.tok == tIDN {
+ writec(fd, '@');
+ writes(fd, ctx.tmp);
+ } else if ctx.tok == tEOL {
+ writec(fd, '\n');
+ } else if ctx.tok == tSTR {
+ printstr(fd, ctx.tmp);
+ } else {
+ writes(fd, tnames[ctx.tok]);
+ }
+ writec(fd, ' ');
+}
+
+fn next() Token {
+ ctx.tok = _next();
+ return ctx.tok;
+}
+
diff --git a/compiler/main.spl b/compiler/main.spl
@@ -0,0 +1,75 @@
+// Copyright 2023, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+var indent u32 = 0;
+
+fn print_indent(fd i32) {
+ var n u32 = 0;
+ while n < indent {
+ writes(fd, " ");
+ n++;
+ }
+}
+
+fn _dump_ast_node(fd i32, node Ast) {
+ var kind AstKind = node.kind;
+
+ print_indent(fd);
+ writes(fd, ast_kind[kind]);
+ writes(fd, " ");
+ indent++;
+ if kind == AST_CONST {
+ writex(fd, node.ival);
+ } else if kind == AST_FUNC {
+ writes(fd, node.name.text);
+ writes(fd, "(");
+ var param Symbol = node.sym.type.list;
+ while param != nil {
+ writes(fd, param.name.text);
+ if param.next != nil {
+ writes(fd, ", ");
+ }
+ param = param.next;
+ }
+ writes(fd, ") ");
+ writes(fd, node.sym.type.of.name.text);
+ } else if kind == AST_STRING {
+ printstr(fd, node.name.text);
+ } else if kind == AST_SYMBOL {
+ writes(fd, node.name.text);
+ }
+ writes(fd, "\n");
+
+ dump_ast_node(fd, node.left);
+ dump_ast_node(fd, node.right);
+ indent = indent - 1;
+}
+
+fn dump_ast_node(fd i32, node Ast) {
+ if node != nil {
+ while true {
+ _dump_ast_node(fd, node);
+ node = node.next;
+ if node == nil {
+ break;
+ }
+ }
+ }
+}
+
+fn dump_ast(node Ast) {
+ dump_ast_node(1, node);
+}
+
+fn start() i32 {
+ ctx_init();
+ scan();
+
+ next();
+ var ast Ast = parse_program();
+
+ dump_ast(ast);
+
+ return 0;
+}
+
diff --git a/compiler/parser.spl b/compiler/parser.spl
@@ -0,0 +1,651 @@
+// Copyright 2023, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+// ================================================================
+// parser
+
+fn expected(what str) {
+ error("expected ", what, ", found ", @str tnames[ctx.tok]);
+}
+
+fn expect(tok Token) {
+ if ctx.tok != tok {
+ expected(tnames[tok]);
+ }
+}
+
+fn require(tok Token) {
+ expect(tok);
+ next();
+}
+
+fn parse_name(what str) String {
+ if ctx.tok != tIDN {
+ expected(what);
+ }
+ var s String = ctx.ident;
+ next();
+ return s;
+}
+
+fn parse_symbol(what str) Ast {
+ var name String = parse_name(what);
+ var sym Symbol = symbol_find(name);
+ return ast_make_symbol(name, sym);
+}
+
+fn parse_ident() Ast {
+ var node Ast = parse_symbol("identifier");
+
+ if (node.sym == nil) && (ctx.tok != tOPAREN) {
+ error("undefined identifier '", @str node.name.text, "'");
+ }
+
+ if ctx.tok == tOPAREN {
+ // function call
+ next();
+ node = ast_make_l(AST_CALL, node);
+ var last Ast = nil;
+
+ while ctx.tok != tCPAREN {
+ if ctx.tok == tAT {
+ // type annotation for varargs hack
+ next();
+ parse_type(false);
+ }
+
+ var expr Ast = parse_expr();
+ if last != nil {
+ last.next = expr;
+ } else {
+ node.right = expr;
+ }
+ last = expr;
+
+ if ctx.tok != tCPAREN {
+ require(tCOMMA);
+ }
+ }
+ next();
+ }
+
+ while true {
+ if ctx.tok == tDOT {
+ // field access
+ next();
+ node = ast_make_lr(AST_FIELD, node, parse_symbol("field name"));
+ } else if ctx.tok == tOBRACK {
+ // array access
+ next();
+ node = ast_make_lr(AST_INDEX, node, parse_expr());
+ require(tCBRACK);
+ } else {
+ return node;
+ }
+ }
+
+ return nil; // unreachable
+}
+
+fn parse_primary_expr() Ast {
+ var node Ast;
+ if ctx.tok == tNUM {
+ node = ast_make_const(ctx.num, ctx.type_i32);
+ next();
+ } else if ctx.tok == tSTR {
+ node = ast_make_simple(AST_STRING, 0);
+ node.name = string_make(ctx.tmp, strlen(ctx.tmp));
+ next();
+ } else if ctx.tok == tTRUE {
+ node = ast_make_const(1, ctx.type_bool);
+ next();
+ } else if ctx.tok == tFALSE {
+ node = ast_make_const(0, ctx.type_bool);
+ next();
+ } else if ctx.tok == tNIL {
+ node = ast_make_const(0, ctx.type_nil);
+ next();
+ } else if ctx.tok == tOPAREN {
+ next();
+ node = parse_expr();
+ require(tCPAREN);
+ } else if ctx.tok == tNEW {
+ next();
+ require(tOPAREN);
+ node = ast_make_simple(AST_NEW, 0);
+ node.name = parse_name("type name");
+ require(tCPAREN);
+ } else if ctx.tok == tIDN {
+ node = parse_ident();
+ } else {
+ error("invalid expression");
+ }
+ return node;
+}
+
+fn parse_unary_expr() Ast {
+ var op u32 = ctx.tok;
+ if op == tPLUS {
+ next();
+ return parse_unary_expr();
+ } else if op == tMINUS {
+ next();
+ return ast_make_l(AST_NEG, parse_unary_expr());
+ } else if op == tBANG {
+ next();
+ return ast_make_l(AST_BOOL_NOT, parse_unary_expr());
+ } else if op == tNOT {
+ next();
+ return ast_make_l(AST_NOT, parse_unary_expr());
+ } else if op == tAMP {
+ error("dereference not supported");
+ //next();
+ //parse_unary_expr();
+ } else {
+ return parse_primary_expr();
+ }
+ return nil; // unreachable
+}
+
+fn parse_mul_expr() Ast {
+ var node Ast = parse_unary_expr();
+ while ctx.tok & tcMASK == tcMULOP {
+ var op u32 = (ctx.tok - tSTAR) + AST_MUL;
+ next();
+ node = ast_make_lr(op, node, parse_unary_expr());
+ }
+ return node;
+}
+
+fn parse_add_expr() Ast {
+ var node Ast = parse_mul_expr();
+ while ctx.tok & tcMASK == tcADDOP {
+ var op u32 = (ctx.tok - tPLUS) + AST_ADD;
+ next();
+ node = ast_make_lr(op, node, parse_mul_expr());
+ }
+ return node;
+}
+
+fn parse_rel_expr() Ast {
+ var node Ast = parse_add_expr();
+ if ctx.tok & tcMASK == tcRELOP {
+ var op u32 = (ctx.tok - tEQ) + AST_EQ;
+ next();
+ node = ast_make_lr(op, node, parse_add_expr());
+ }
+ return node;
+}
+
+fn parse_and_expr() Ast {
+ var node Ast = parse_rel_expr();
+ if ctx.tok == tAND {
+ while ctx.tok == tAND {
+ next();
+ node = ast_make_lr(AST_BOOL_AND, node, parse_rel_expr());
+ }
+ }
+ return node;
+}
+
+fn parse_expr() Ast {
+ var node Ast = parse_and_expr();
+ if ctx.tok == tOR {
+ while ctx.tok == tOR {
+ next();
+ node = ast_make_lr(AST_BOOL_OR, node, parse_and_expr());
+ }
+ }
+ return node;
+}
+
+fn parse_struct_type(name String) Type {
+ var type Type = type_find(name);
+
+ if type == nil {
+ type = type_make(name, TYPE_STRUCT, nil, nil, 0);
+ } else {
+ if type.kind == TYPE_UNDEFINED {
+ // resolve forward ref
+ type.kind = TYPE_STRUCT;
+ } else {
+ error("cannot redefine struct '", @str name.text, "'");
+ }
+ };
+ scope_push(SCOPE_STRUCT);
+ require(tOBRACE);
+ while true {
+ if ctx.tok == tCBRACE {
+ next();
+ break;
+ }
+ var fname String = parse_name("field name");
+ var kind SymbolKind = SYMBOL_FLD;
+ if ctx.tok == tSTAR {
+ next();
+ kind = SYMBOL_PTR;
+ }
+ var ftype Type = parse_type(true);
+ var sym Symbol = symbol_make(fname, ftype);
+ sym.kind = kind;
+ if ctx.tok != tCBRACE {
+ require(tCOMMA);
+ }
+ }
+ type.list = scope_pop();
+ return type;
+}
+
+
+fn parse_array_type() Type {
+ var type Type;
+ var nelem u32 = 0;
+ if ctx.tok == tCBRACK {
+ // TODO: slices
+ next();
+ type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, 0);
+ } else {
+ if ctx.tok != tNUM {
+ error("array size must be numeric");
+ }
+ nelem = ctx.num;
+ next();
+ require(tCBRACK);
+ type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, nelem);
+ }
+ // TODO: type.name?
+ return type;
+}
+
+fn parse_type(fwd_ref_ok u32) Type {
+ if ctx.tok == tSTAR { // pointer-to
+ error("pointer types not supported");
+ //next();
+ //return type_make(nil, TYPE_POINTER, parse_type(true), nil, 0);
+ } else if ctx.tok == tOBRACK { // array-of
+ next();
+ return parse_array_type();
+ } else if ctx.tok == tFN {
+ error("func types not supported");
+ //next();
+ //return parse_func_type();
+ } else if ctx.tok == tSTRUCT {
+ error ("anonymous struct types not supported");
+ //next();
+ //return parse_struct_type(nil);
+ } else if ctx.tok == tIDN {
+ var name String = ctx.ident;
+ next();
+ var type Type = type_find(name);
+ if type == nil {
+ if fwd_ref_ok {
+ type = type_make(name, TYPE_UNDEFINED, nil, nil, 0);
+ } else {
+ error("undefined type '", @str name.text, "' not usable here");
+ }
+ }
+ return type;
+ } else {
+ expected("type");
+ }
+ return nil;
+}
+
+fn parse_while() Ast {
+ // while expr { block }
+ var expr Ast = parse_expr();
+ require(tOBRACE);
+ scope_push(SCOPE_LOOP);
+ var block Ast = parse_block();
+ scope_pop();
+ return ast_make_lr(AST_WHILE, expr, block);
+}
+
+fn parse_if() Ast {
+ // if expr { block }
+
+ var expr Ast = parse_expr();
+ require(tOBRACE);
+ scope_push(SCOPE_BLOCK);
+ var block Ast = parse_block();
+ scope_pop();
+
+ var last Ast = ast_make_lr(AST_CASE, expr, block);
+ var stmt Ast = ast_make_l(AST_IF, last);
+
+ while ctx.tok == tELSE {
+ // ... else ...
+ next();
+ if ctx.tok == tIF {
+ // ... if expr { block }
+ next();
+ expr = parse_expr();
+ require(tOBRACE);
+ scope_push(SCOPE_BLOCK);
+ block = parse_block();
+ scope_pop();
+ last.next = ast_make_lr(AST_CASE, expr, block);
+ last = last.next;
+ } else {
+ // ... { block }
+ require(tOBRACE);
+ scope_push(SCOPE_BLOCK);
+ block = parse_block();
+ scope_pop();
+ last.next = ast_make_l(AST_ELSE, block);
+ break;
+ }
+ }
+ return stmt;
+}
+
+fn parse_return() Ast {
+ // TODO check for return required/type
+ var node Ast = ast_make_simple(AST_RETURN, 0);
+ if ctx.tok == tSEMI {
+ next();
+ } else {
+ // error("return types do not match");
+ node.left = parse_expr();
+ require(tSEMI);
+ }
+ return node;
+}
+
+fn parse_break() Ast {
+ // TODO: break-to-labeled-loop support
+ var scope Scope = scope_find(SCOPE_LOOP);
+ if scope == nil {
+ error("break must be used from inside a looping construct");
+ }
+ require(tSEMI);
+ return ast_make_simple(AST_BREAK, 0);
+}
+
+fn parse_continue() Ast {
+ // TODO: continue-to-labeled-loop support
+ var scope Scope = scope_find(SCOPE_LOOP);
+ if scope == nil {
+ error("continue must be used from inside a looping construct");
+ }
+ require(tSEMI);
+ return ast_make_simple(AST_CONTINUE, 0);
+}
+
+fn parse_struct_init(sym Symbol) {
+ while true {
+ if ctx.tok == tCBRACE {
+ next();
+ break;
+ }
+ var name String = parse_name("field name");
+ var field Symbol = sym.type.list;
+ while true { // TODO: field_find
+ if field == nil {
+ error("structure has no '", @str name.text, "' field");
+ }
+ if field.name == name {
+ break;
+ }
+ field = field.next;
+ }
+ require(tCOLON);
+ if ctx.tok == tOBRACE {
+ next();
+ parse_struct_init(field);
+ } else {
+ parse_expr();
+ }
+ if ctx.tok != tCBRACE {
+ require(tCOMMA);
+ }
+ }
+}
+
+fn parse_array_init(sym Symbol) {
+ while true {
+ if ctx.tok == tCBRACE {
+ next();
+ break;
+ }
+ parse_expr();
+ if ctx.tok != tCBRACE {
+ require(tCOMMA);
+ }
+ }
+}
+
+fn parse_var() {
+ // TODO: global vs local
+ // TODO: generate assignment/initialization
+ var name String = parse_name("variable name");
+ var type Type = parse_type(false);
+ var sym Symbol = symbol_make(name, type);
+
+ if ctx.tok == tASSIGN {
+ next();
+ if ctx.tok == tOBRACE {
+ next();
+ if type.kind == TYPE_STRUCT {
+ parse_struct_init(sym);
+ } else if type.kind == TYPE_ARRAY {
+ parse_array_init(sym);
+ } else {
+ error("type ", @str type.name.text,
+ " cannot be initialized with {} expr");
+ }
+ } else {
+ parse_expr();
+ }
+ } else {
+ // default init
+ }
+ require(tSEMI);
+}
+
+fn _parse_expr_statement() Ast {
+ var node Ast = parse_expr();
+ var expr Ast = nil;
+ var op u32;
+
+ if ctx.tok == tASSIGN {
+ // basic assignment
+ next();
+ return ast_make_lr(AST_ASSIGN, node, parse_expr());
+ } else if (ctx.tok & tcMASK) == tcAEQOP {
+ // +=, etc
+ op = (ctx.tok - tADDEQ) + AST_ADD;
+ next();
+ expr = parse_expr();
+ } else if (ctx.tok & tcMASK) == tcMEQOP {
+ // *=, etc
+ op = (ctx.tok - tMULEQ) + AST_MUL;
+ next();
+ expr = parse_expr();
+ } else if (ctx.tok == tINC) {
+ op = AST_ADD;
+ next();
+ expr = ast_make_const(1, ctx.type_i32);
+ } else if (ctx.tok == tDEC) {
+ op = AST_SUB;
+ expr = ast_make_const(1, ctx.type_i32);
+ } else {
+ // simple expression
+ return node;
+ }
+
+ // TODO duplicate node instead of sharing it
+ expr = ast_make_lr(op, node, expr);
+ return ast_make_lr(AST_ASSIGN, node, expr);
+}
+
+fn parse_expr_statement() Ast {
+ var stmt Ast = ast_make_l(AST_EXPR, _parse_expr_statement());
+ require(tSEMI);
+ return stmt;
+}
+
+fn parse_block() Ast {
+ var block Ast = ast_make_simple(AST_BLOCK, 0);
+ var last Ast = nil;
+ var node Ast;
+ while true {
+ if ctx.tok == tCBRACE {
+ next();
+ break;
+ } else if ctx.tok == tRETURN {
+ next();
+ node = parse_return();
+ } else if ctx.tok == tBREAK {
+ next();
+ node = parse_break();
+ } else if ctx.tok == tCONTINUE {
+ next();
+ node = parse_continue();
+ } else if ctx.tok == tWHILE {
+ next();
+ node = parse_while();
+ } else if ctx.tok == tIF {
+ next();
+ node = parse_if();
+ } else if ctx.tok == tVAR {
+ next();
+ parse_var();
+ } else if ctx.tok == tSEMI {
+ next();
+ // empty statement
+ continue;
+ } else {
+ node = parse_expr_statement();
+ }
+ if last == nil {
+ block.left = node;
+ } else {
+ last.next = node;
+ }
+ last = node;
+ }
+ return block;
+}
+
+fn parse_param(fname String) Symbol {
+ var pname String = parse_name("parameter name");
+ var ptype Type = parse_type(false);
+ if symbol_find(pname) != nil {
+ error("duplicate parameter name '", @str pname.text, "'");
+ }
+ return symbol_make(pname, ptype);
+}
+
+fn parse_fn_body(sym Symbol) Ast {
+ ctx.cur_fn = sym;
+ require(tOBRACE);
+ scope_push(SCOPE_FUNC); // parameters, from fn
+ scope_push(SCOPE_BLOCK);
+ var node Ast = parse_block();
+ scope_pop(); // stow in node?
+ scope_pop();
+ ctx.cur_fn = nil;
+ return node;
+}
+
+fn parse_fn() Ast {
+ var fname String = parse_name("function name");
+ var rtype Type = ctx.type_void;
+
+ scope_push(SCOPE_FUNC);
+
+ require(tOPAREN);
+ var n u32 = 0;
+ if ctx.tok != tCPAREN {
+ parse_param(fname);
+ while ctx.tok == tCOMMA {
+ next();
+ parse_param(fname);
+ }
+ n++;
+ }
+ require(tCPAREN);
+
+ if ctx.tok != tOBRACE {
+ rtype = parse_type(false);
+ }
+
+ var sym Symbol = symbol_make_global(fname, rtype);
+ sym.kind = SYMBOL_FN;
+ sym.type = type_make(fname, TYPE_FN, rtype, nil, n);
+
+ var node Ast = ast_make_simple(AST_FUNC, 0);
+ node.name = fname;
+ node.sym = sym;
+ node.right = parse_fn_body(sym);
+
+ // save parameters
+ sym.type.list = scope_pop();
+
+ return node;
+}
+
+fn parse_enum_def() {
+ if ctx.tok == tIDN {
+ var name String = parse_name("enum name");
+ type_make(name, TYPE_ENUM, nil, nil, 0);
+ }
+
+ require(tOBRACE);
+ var val u32 = 0;
+ while ctx.tok != tCBRACE {
+ var name String = parse_name("enum tag name");
+ var sym Symbol = symbol_find(name);
+ if sym != nil {
+ error("cannot redfine '", @str name.text, "' as enum tag");
+ }
+ sym = symbol_make_global(name, ctx.type_u32);
+ sym.kind = SYMBOL_DEF;
+ if ctx.tok == tASSIGN {
+ next();
+ parse_expr();
+ // TODO val <- expr
+ } else {
+ val++;
+ }
+ require(tCOMMA);
+ }
+ require(tCBRACE);
+ require(tSEMI);
+}
+
+fn parse_program() Ast {
+ var program Ast = ast_make_simple(AST_PROGRAM, 0);
+ var last Ast = nil;
+
+ while true {
+ if ctx.tok == tENUM {
+ next();
+ parse_enum_def();
+ } else if ctx.tok == tSTRUCT {
+ next();
+ var name String = parse_name("struct name");
+ parse_struct_type(name);
+ require(tSEMI);
+ } else if ctx.tok == tFN {
+ next();
+ var node Ast = parse_fn();
+ if last == nil {
+ program.left = node;
+ } else {
+ last.next = node;
+ }
+ last = node;
+ } else if ctx.tok == tVAR {
+ next();
+ parse_var();
+ } else if ctx.tok == tEOF {
+ break;
+ } else {
+ expected("function, variable, or type definition");
+ }
+ }
+ return program;
+}
+
diff --git a/compiler/stdlib.spl b/compiler/stdlib.spl
@@ -0,0 +1,29 @@
+// Copyright 2023, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+fn strneq(s1 str, s2 str, len u32) bool {
+ var n u32 = 0;
+ while n < len {
+ if s1[n] != s2[n] {
+ return false;
+ }
+ n++;
+ }
+ return true;
+}
+
+fn strcpyn(dst str, src str, len u32) {
+ var n u32 = 0;
+ while n < len {
+ dst[n] = src[n];
+ n++;
+ }
+}
+
+fn strlen(s str) u32 {
+ var n u32 = 0;
+ while s[n] != 0 {
+ n++;
+ }
+ return n;
+}
diff --git a/compiler/types.spl b/compiler/types.spl
@@ -0,0 +1,483 @@
+// Copyright 2023, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+// ================================================================
+// data types
+
+struct String {
+ next *String,
+ len u32,
+ text [256]u8,
+};
+
+enum SymbolKind {
+ SYMBOL_VAR,
+ SYMBOL_FLD, // struct field
+ SYMBOL_PTR, // struct *field
+ SYMBOL_DEF, // enum
+ SYMBOL_FN,
+};
+
+struct Symbol {
+ next *Symbol,
+ name *String,
+ type *Type,
+ kind SymbolKind,
+};
+
+enum ScopeKind {
+ SCOPE_GLOBAL,
+ SCOPE_FUNC,
+ SCOPE_BLOCK,
+ SCOPE_LOOP,
+ SCOPE_STRUCT,
+};
+
+struct Scope {
+ parent *Scope,
+ first *Symbol,
+ last *Symbol,
+ kind ScopeKind,
+};
+
+enum TypeKind {
+ TYPE_VOID,
+ TYPE_BOOL,
+ TYPE_U8,
+ TYPE_U32,
+ TYPE_I32,
+ TYPE_NIL,
+ TYPE_POINTER,
+ TYPE_ARRAY,
+ TYPE_SLICE,
+ TYPE_STR,
+ TYPE_STRUCT,
+ TYPE_FN,
+ TYPE_ENUM,
+ TYPE_UNDEFINED,
+};
+
+struct Type {
+ next *Type,
+ name *String,
+ of *Type, // for slice, array, ptr, fn (return type)
+ list *Symbol, // for struct (fields), fn (params)
+ kind TypeKind,
+ count u32,
+};
+
+// ================================================================
+// Abstract Syntax Tree
+
+enum AstKind {
+// top node
+ AST_PROGRAM, // l=FUNC*
+// program components (chained into a list by next)
+ AST_FUNC, // l=BLOCK
+// container of statements
+ AST_BLOCK, // l=STMT*
+// statements (chained into a list by next)
+ AST_EXPR, // l=EXPR
+ AST_WHILE, // l=EXPR r=BLOCK
+ AST_BREAK,
+ AST_CONTINUE,
+ AST_RETURN, // l=EXPR
+ AST_IF, // l=CASE*
+// sub-parts of if
+ AST_CASE, // l=EXPR r=BLOCK
+ AST_ELSE, // l=BLOCK
+// expressions
+ AST_SYMBOL,
+ AST_CONST, // numeric constant
+ AST_STRING, // string constant
+ AST_DEREF, // l=EXPR type: pointer-to-...
+ AST_INDEX, // l=EXPR type: array-of-... r=EXPR index
+ AST_FIELD, // l=EXPR type: struct r=SYMBOL field
+ AST_ADDROF, // l=EXPR type: lvalue
+ AST_CALL, // l=NAME r=EXPR*
+ AST_ASSIGN, // l=lhsEXPR r=rhsEXPR
+ AST_NEW, // l=TYPE
+// binary expressions
+ // Rel Ops (maintain order matched w/ lexer)
+ AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE,
+ // Add Ops (maintain order matched w/ lexer)
+ AST_ADD, AST_SUB, AST_OR, AST_XOR,
+ // Mul Ops (maintain order matched w/ lexer)
+ AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR,
+ // uncategorized ops
+ AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG,
+ AST_KIND_COUNT,
+};
+
+var ast_kind []str = {
+ "PROGRAM", "FUNC",
+ "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE",
+ "RETURN", "IF", "CASE", "ELSE",
+ "SYMBOL", "CONST", "STRING",
+ "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW",
+ "EQ", "NE", "LT", "LE", "GT", "GE",
+ "ADD", "SUB", "OR", "XOR",
+ "MUL", "DIV", "MOD", "AND", "LSL", "LSR",
+ "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG",
+};
+
+fn ast_is_relop(kind AstKind) bool {
+ return (kind >= AST_EQ) && (kind <= AST_GE);
+}
+
+fn ast_is_addop(kind AstKind) bool {
+ return (kind >= AST_ADD) && (kind <= AST_XOR);
+}
+
+fn ast_is_mulop(kind AstKind) bool {
+ return (kind >= AST_MUL) && (kind <= AST_LSR);
+}
+
+fn ast_is_binop(kind AstKind) bool {
+ return (kind >= AST_EQ) && (kind <= AST_LSR);
+}
+
+fn ast_is_expr(kind AstKind) bool {
+ return (kind >= AST_SYMBOL);
+}
+
+fn ast_is_stmt(kind AstKind) bool {
+ return (kind >= AST_EXPR) && (kind <= AST_ELSE);
+}
+
+struct Ast {
+ kind AstKind,
+
+ left *Ast,
+ right *Ast,
+ next *Ast, // intrusive list
+
+ ival u32,
+ name *String,
+ sym *Symbol,
+ type *Type,
+
+ srcloc u32, // linenumber for now
+};
+
+
+// ================================================================
+// lexical scanner tokens
+
+// token classes (tok & tcMASK)
+enum TokenClass{
+ tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
+ tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
+};
+
+enum Token {
+ // EndMarks, Braces, Brackets Parens
+ tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
+ // RelOps (do not reorder)
+ tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
+ // AddOps (do not reorder)
+ tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
+ // MulOps (do not reorder)
+ tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
+ // AsnOps (do not reorder)
+ tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
+ tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
+ // Various, UnaryNot, LogicalOps,
+ tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
+ tASSIGN, tINC, tDEC,
+ tAT,
+ // Keywords
+ tNEW, tFN, tSTRUCT, tVAR, tENUM,
+ tIF, tELSE, tWHILE,
+ tBREAK, tCONTINUE, tRETURN,
+ tFOR, tSWITCH, tCASE,
+ tTRUE, tFALSE, tNIL,
+ tIDN, tNUM, tSTR,
+ // used internal to the lexer but never returned
+ tSPC, tINV, tDQT, tSQT, tMSC,
+};
+
+var tnames []str = {
+ "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")",
+ "==", "!=", "<", "<=", ">", ">=", "", "",
+ "+", "-", "|", "^", "", "", "", "",
+ "*", "/", "%", "&", "<<", ">>", "", "",
+ "+=", "-=", "|=", "^=", "", "", "", "",
+ "*=", "/=", "%=", "&=", "<<=", ">>=", "", "",
+ ";", ":", ".", ",", "~", "&&", "||", "!",
+ "=", "++", "--",
+ "@",
+ "new", "fn", "struct", "var", "enum",
+ "if", "else", "while",
+ "break", "continue", "return",
+ "for", "switch", "case",
+ "true", "false", "nil",
+ "<ID>", "<NUM>", "<STR>",
+ "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
+};
+
+
+// ================================================================
+// lexer / parser / compiler context
+
+struct Context {
+ filename str, // filename of active source
+ outname str, // base name for output files
+
+ linenumber u32, // line number of most recent line
+ lineoffset u32, // position of start of most recent line
+ byteoffset u32, // position of the most recent character
+ flags u32,
+ cc u32, // scanner: next character
+
+ tok Token, // most recent token
+ num u32, // for tNUM
+ tmp [256]u8, // for tIDN, tSTR
+ ident *String, // for tSTR
+
+ stringlist *String, // intern table
+ typelist *Type, // all types
+
+ scope *Scope, // top of Scope stack
+ global *Scope, // the global scope
+ cur_fn *Symbol, // fn being parsed
+
+ idn_if *String, // identifier strings
+ idn_fn *String,
+ idn_for *String,
+ idn_var *String,
+ idn_nil *String,
+ idn_new *String,
+ idn_case *String,
+ idn_else *String,
+ idn_enum *String,
+ idn_true *String,
+ idn_break *String,
+ idn_while *String,
+ idn_false *String,
+ idn_switch *String,
+ idn_struct *String,
+ idn_return *String,
+ idn_continue *String,
+
+ type_void *Type,
+ type_bool *Type,
+ type_str *Type,
+ type_nil *Type,
+ type_u32 *Type,
+ type_i32 *Type,
+ type_u8 *Type,
+};
+
+var ctx Context;
+
+fn string_make(text str, len u32) String {
+ var s String = ctx.stringlist;
+ while s != nil {
+ if (s.len == len) && strneq(text, s.text, len) {
+ return s;
+ }
+ s = s.next;
+ }
+ s = new(String);
+ s.len = len;
+ strcpyn(s.text, text, len + 1);
+ s.next = ctx.stringlist;
+ ctx.stringlist = s;
+ return s;
+}
+
+fn scope_push(kind ScopeKind) Scope {
+ var scope Scope = new(Scope);
+ scope.first = nil;
+ scope.last = nil;
+ scope.parent = ctx.scope;
+ scope.kind = kind;
+ ctx.scope = scope;
+ return scope;
+}
+
+// returns symbol list for the popped scope
+fn scope_pop() Symbol {
+ var scope Scope = ctx.scope;
+ ctx.scope = scope.parent;
+ return scope.first;
+}
+
+fn scope_find(kind ScopeKind) Scope {
+ var scope Scope = ctx.scope;
+ while scope != nil {
+ if scope.kind == kind {
+ return scope;
+ }
+ scope = scope.parent;
+ }
+ return nil;
+}
+
+fn symbol_find_in(name String, scope Scope) Symbol {
+ var sym Symbol = scope.first;
+ while sym != nil {
+ if sym.name == name {
+ return sym;
+ }
+ sym = sym.next;
+ }
+ return nil;
+}
+
+// find the first surrounding scope of a specified kind
+fn symbol_find(name String) Symbol {
+ var scope Scope = ctx.scope;
+ while scope != nil {
+ var sym Symbol = symbol_find_in(name, scope);
+ if sym != nil {
+ return sym;
+ }
+ scope = scope.parent;
+ }
+ return nil;
+}
+
+fn symbol_make_in_scope(name String, type Type, scope Scope) Symbol {
+ var sym Symbol = new(Symbol);
+ sym.name = name;
+ sym.type = type;
+ sym.next = nil;
+ sym.kind = SYMBOL_VAR;
+ if scope.first == nil {
+ scope.first = sym;
+ } else {
+ scope.last.next = sym;
+ }
+ scope.last = sym;
+ return sym;
+}
+
+fn symbol_make_global(name String, type Type) Symbol {
+ return symbol_make_in_scope(name, type, ctx.global);
+}
+
+fn symbol_make(name String, type Type) Symbol {
+ return symbol_make_in_scope(name, type, ctx.scope);
+}
+
+fn type_make(name String, kind TypeKind, of Type, list Symbol, count u32) Type {
+ var type Type = new(Type);
+ type.name = name;
+ type.of = of;
+ type.list = list;
+ type.kind = kind;
+ type.count = count;
+ if name != nil {
+ type.next = ctx.typelist;
+ ctx.typelist = type;
+ } else {
+ type.next = nil;
+ }
+ return type;
+}
+
+fn type_find(name String) Type {
+ var t Type = ctx.typelist;
+ while t != nil {
+ if t.name == name {
+ return t;
+ }
+ t = t.next;
+ }
+ return nil;
+}
+
+fn type_find_field(type Type, name String) Symbol {
+ if type.kind != TYPE_STRUCT {
+ error("not a struct");
+ }
+ var s Symbol = type.list;
+ while s != nil {
+ if s.name == name {
+ return s;
+ }
+ s = s.next;
+ }
+ error("struct has no such field '", @str name.text, "'");
+ return nil;
+}
+
+fn ast_make(kind AstKind, ival u32, name String, sym Symbol, type Type) Ast {
+ var node Ast = new(Ast);
+ node.kind = kind;
+ node.ival = ival;
+ node.name = name;
+ node.sym = sym;
+ node.type = type;
+ node.srcloc = ctx.linenumber;
+ return node;
+}
+
+fn ast_make_lr(kind AstKind, left Ast, right Ast) Ast {
+ var node Ast = ast_make(kind, 0, nil, nil, nil);
+ node.left = left;
+ node.right = right;
+ return node;
+}
+
+fn ast_make_l(kind AstKind, child Ast) Ast {
+ var node Ast = ast_make(kind, 0, nil, nil, nil);
+ node.left = child;
+ return node;
+}
+
+fn ast_make_simple(kind AstKind, x u32) Ast {
+ return ast_make(kind, x, nil, nil, nil);
+}
+
+fn ast_make_const(value u32, type Type) Ast {
+ return ast_make(AST_CONST, value, nil, nil, type);
+}
+
+fn ast_make_symbol(name String, sym Symbol) Ast {
+ // TODO maybe handle nil sym differently?
+ var type Type = nil;
+ if sym != nil {
+ type = sym.type;
+ }
+ return ast_make(AST_SYMBOL, 0, name, sym, type);
+}
+
+fn ctx_init() {
+ ctx = new(Context);
+
+ ctx.idn_if = string_make("if", 2);
+ ctx.idn_fn = string_make("fn", 2);
+ ctx.idn_for = string_make("for", 3);
+ ctx.idn_var = string_make("var", 3);
+ ctx.idn_nil = string_make("nil", 3);
+ ctx.idn_new = string_make("new", 3);
+ ctx.idn_case = string_make("case", 4);
+ ctx.idn_else = string_make("else", 4);
+ ctx.idn_enum = string_make("enum", 4);
+ ctx.idn_true = string_make("true", 4);
+ ctx.idn_break = string_make("break", 5);
+ ctx.idn_while = string_make("while", 5);
+ ctx.idn_false = string_make("false", 5);
+ ctx.idn_switch = string_make("switch", 6);
+ ctx.idn_struct = string_make("struct", 6);
+ ctx.idn_return = string_make("return", 6);
+ ctx.idn_continue = string_make("continue", 8);
+
+ ctx.type_void = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0);
+ ctx.type_bool = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0);
+ ctx.type_nil = type_make(string_make("nil", 3), TYPE_NIL, nil, nil, 0);
+ ctx.type_str = type_make(string_make("str", 3), TYPE_STR, nil, nil, 0);
+ ctx.type_u32 = type_make(string_make("u32", 3), TYPE_U32, nil, nil, 0);
+ ctx.type_i32 = type_make(string_make("i32", 3), TYPE_I32, nil, nil, 0);
+ ctx.type_u8 = type_make(string_make("u8", 2), TYPE_U8, nil, nil, 0);
+
+ scope_push(SCOPE_GLOBAL);
+ ctx.global = ctx.scope;
+
+ ctx.linenumber = 1;
+ ctx.filename = "<stdin>";
+}
+