compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

compiler2.c (54119B)


      1 // Copyright 2020, Brian Swetland <swetland@frotz.net>
      2 // Licensed under the Apache License, Version 2.0.
      3 
      4 #if C
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <stdarg.h>
      8 #include <stdint.h>
      9 #include <stdbool.h>
     10 #include <strings.h>
     11 #include <string.h>
     12 
     13 #include <fcntl.h>
     14 #include <unistd.h>
     15 #include <sys/stat.h>
     16 
     17 // builtin types
     18 #define nil 0
     19 typedef uint32_t u32;
     20 typedef int32_t i32;
     21 typedef uint8_t u8;
     22 
     23 // rewriter only handles single word types
     24 typedef uint32_t token_t;
     25 typedef uint32_t ast_t;
     26 typedef uint32_t type_t;
     27 typedef uint32_t symbol_t;
     28 typedef uint32_t scope_t;
     29 typedef char* str;
     30 typedef char** args;
     31 typedef uint32_t* u32ptr;
     32 
     33 void error(const char *fmt, ...);
     34 #endif
     35 
     36 // ------------------------------------------------------------------
     37 // structures
     38 
     39 typedef struct StringRec StringRec;
     40 typedef struct CtxRec CtxRec;
     41 typedef struct AstRec AstRec;
     42 typedef struct TypeRec TypeRec;
     43 typedef struct SymbolRec SymbolRec;
     44 typedef struct ScopeRec ScopeRec;
     45 typedef struct FixupRec FixupRec;
     46 
     47 typedef struct StringRec* String;
     48 typedef struct CtxRec* Ctx;
     49 typedef struct AstRec* Ast;
     50 typedef struct TypeRec* Type;
     51 typedef struct SymbolRec* Symbol;
     52 typedef struct ScopeRec* Scope;
     53 typedef struct FixupRec* Fixup;
     54 
     55 struct StringRec {
     56 	String next;
     57 	u32 len;
     58 	char text[0];
     59 };
     60 
     61 enum {
     62 // top node
     63 	AST_PROGRAM,  // c2=FUNC*
     64 // program components (chained into a list by c2)
     65 	AST_FUNC,     // c0=BLOCK
     66 // container of statements
     67 	AST_BLOCK,    // c0=STMT
     68 // statements (chained into a list by c2)
     69 	AST_EXPR,     // c0=EXPR
     70 	AST_WHILE,    // c0=EXPR c1=BLOCK
     71 	AST_BREAK,
     72 	AST_CONTINUE,
     73 	AST_RETURN,   // c0=EXPR
     74 	AST_IF,       // c0=IFELSE
     75 // sub-part of if
     76 	AST_IFELSE,   // c0=EXPR c1=BLOCKthen c2=BLOCKelse|IFELSE
     77 // expression parts
     78 	AST_SYMBOL,
     79 	AST_CONST,
     80 	AST_STRING,
     81 	AST_DEREF,    // c0=EXPR type: pointer-to-...
     82 	AST_INDEX,    // c0=EXPR type: array-of-...  c1=EXPR index
     83 	AST_FIELD,    // c0=EXPR type: struct        c1=SYMBOL field
     84 	AST_ADDROF,   // c0=EXPR type: lvalue
     85 	AST_CALL,     // c0=NAME c2=EXPR*
     86 	AST_ASSIGN,
     87 
     88 	// Rel Ops (maintain order matched w/ lexer)
     89 	AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE,
     90 	// Add Ops (maintain order matched w/ lexer)
     91 	AST_ADD, AST_SUB, AST_OR, AST_XOR,
     92 	// Mul Ops (maintain order matched w/ lexer)
     93 	AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR,
     94 	// uncategorized ops
     95 	AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG,
     96 	AST_KIND_COUNT
     97 };
     98 
     99 bool ast_kind_is_relop(u32 kind) {
    100 	return (kind >= AST_EQ) && (kind <= AST_GE);
    101 }
    102 bool ast_kind_is_addop(u32 kind) {
    103 	return (kind >= AST_ADD) && (kind <= AST_XOR);
    104 }
    105 bool ast_kind_is_mulop(u32 kind) {
    106 	return (kind >= AST_MUL) && (kind <= AST_LSR);
    107 }
    108 bool ast_kind_is_binop(u32 kind) {
    109 	return (kind >= AST_EQ) && (kind <= AST_LSR);
    110 }
    111 
    112 str ast_kind[AST_KIND_COUNT] = {
    113 	"PROGRAM", "FUNC",
    114 	"BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE",
    115 	"RETURN", "IF", "IFELSE",
    116 	"SYMBOL", "CONST", "STR",
    117 	"DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN",
    118 	"EQ", "NE", "LT", "LE", "GT", "GE",
    119 	"ADD", "SUB", "OR", "XOR",
    120 	"MUL", "DIV", "MOD", "AND", "LSL", "LSR",
    121 	"NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG",
    122 };
    123 
    124 struct AstRec {
    125 	ast_t kind;
    126 	Ast c0; // left
    127 	Ast c1; // right
    128 	Ast c2; // third or listhead (chained on c2 fields)
    129 
    130 	u32 ival;
    131 	String name;
    132 	Symbol sym;
    133 	Type type;
    134 
    135 	u32 srcloc; // linenumber for now
    136 };
    137 
    138 enum {
    139 	TYPE_VOID,
    140 	TYPE_BOOL,
    141 	TYPE_BYTE,
    142 	TYPE_I32,
    143 	TYPE_NIL,
    144 	TYPE_POINTER,
    145 	TYPE_ARRAY,
    146 	TYPE_SLICE,
    147 	TYPE_RECORD,
    148 	TYPE_FUNC,
    149 	TYPE_ENUM,
    150 	TYPE_UNDEFINED,
    151 };
    152 
    153 str type_kind[TYPE_UNDEFINED + 1] = {
    154 	"void", "bool", "byte", "i32", "nil", "*", "[]", "[]",
    155 	"struct", "func", "enum", "undef"
    156 };
    157 
    158 struct TypeRec {
    159 	type_t kind;
    160 	Type base;      // pointer-to, func-return-type, array-of
    161 	Symbol sym;     // if not anonymous
    162 	Symbol first;   // list of params or fields
    163 	u32 len;        // array elem count, param count
    164 	u32 size;       // in bytes, local stack for funcs
    165 };
    166 
    167 enum {
    168 	SYM_CONST,
    169 	SYM_TYPE,
    170 	SYM_FUNC,
    171 	SYM_GLOBAL,
    172 	SYM_LOCAL,
    173 	SYM_PARAM,
    174 	SYM_FIELD,
    175 };
    176 
    177 str sym_kind[SYM_FIELD + 1] = {
    178 	"CONST", "TYPE", "FUNC", "GLOBAL", "LOCAL", "PARAM", "FIELD",
    179 };
    180 
    181 enum {
    182 	SYM_IS_READ_ONLY = 0x01,
    183 	SYM_IS_PUBLIC =    0x02,
    184 	SYM_IS_DEFINED =   0x04,
    185 	SYM_IS_BUILTIN =   0x08,
    186 	SYM_IS_PLACED =    0x10, // exists at addr in sym->value
    187 };
    188 
    189 struct SymbolRec {
    190 	symbol_t kind;
    191 	Symbol next;
    192 	u32 flags;
    193 	String name;
    194 	Type type;
    195 	Symbol first; // list of fields or params
    196 	u32 value; // SYM_CONST
    197 	Fixup fixups; // references from before placement
    198 };
    199 
    200 enum {
    201 	SCOPE_GLOBAL, SCOPE_FUNC, SCOPE_BLOCK, SCOPE_LOOP,
    202 };
    203 
    204 struct ScopeRec {
    205 	scope_t kind;
    206 	Scope next;
    207 	Symbol first;
    208 	u32 level;
    209 	u32 save_stack;
    210 	// Fixup?
    211 };
    212 
    213 
    214 struct FixupRec {
    215 	Fixup next;
    216 	u32 addr;
    217 };
    218 
    219 // ------------------------------------------------------------------
    220 // compiler global context
    221 
    222 struct CtxRec {
    223 	const char* filename;  // filename of active source
    224 	int fd;
    225 
    226 	u8 iobuffer[1024];     // scanner file io buffer
    227 	u32 ionext;
    228 	u32 iolast;
    229 
    230 	u32 linenumber;        // line number of most recent line
    231 	u32 lineoffset;        // position of start of most recent line
    232 	u32 byteoffset;        // position of the most recent character
    233 	u32 flags;
    234 	u32 cc;                // scanner: next character
    235 
    236 	token_t tok;           // most recent token
    237 	u32 num;               // used for tNUM
    238 	char tmp[256];         // used for tIDN, tSTR;
    239 	String ident;          // used for tIDN
    240 
    241 	String strtab;         // TODO: hashtable
    242 	Symbol typetab;        // TODO: hashtable
    243 	Scope scope;           // scope stack
    244 
    245 	Symbol fn;             // active function being parsed
    246 	u32 spill_stack;       // where to spill temp regs (TODO: dynamic)
    247 	u32 local_stack;       // total stack for all locals (params, vars, tmps)
    248 	u32 alloc_stack;       // where to allocate next var (grows/shrinks as
    249 	                       // we enter/exit scopes)
    250 	ScopeRec global;
    251 
    252 	String idn_if;
    253 	String idn_for;
    254 	String idn_var;
    255 	String idn_nil;
    256 	String idn_case;
    257 	String idn_func;
    258 	String idn_else;
    259 	String idn_enum;
    260 	String idn_true;
    261 	String idn_type;
    262 	String idn_break;
    263 	String idn_while;
    264 	String idn_false;
    265 	String idn_switch;
    266 	String idn_struct;
    267 	String idn_return;
    268 	String idn_continue;
    269 
    270 	Type type_void;
    271 	Type type_bool;
    272 	Type type_byte;
    273 	Type type_i32;
    274 	Type type_nil;
    275 	Type type_string;
    276 
    277 	u32 pc;              // next ins to emit
    278 	u32 gp;              // next global data to emit
    279 
    280 	u32 code[8192];
    281 	u32 data[8192];
    282 	u32 xref[8192];
    283 };
    284 
    285 CtxRec ctx;
    286 
    287 // ------------------------------------------------------------------
    288 
    289 String string_make(const char* text, u32 len) {
    290 	// OPT obviously this wants to be a hash table
    291 	String str = ctx.strtab;
    292 	while (str != nil) {
    293 		if ((str->len == len) && (memcmp(text, str->text, len) == 0)) {
    294 			return str;
    295 		}
    296 		str = str->next;
    297 	}
    298 
    299 	str = malloc(sizeof(StringRec) + len + 1);
    300 	str->len = len;
    301 	memcpy(str->text, text, len);
    302 	str->text[len] = 0;
    303 	str->next = ctx.strtab;
    304 	ctx.strtab = str;
    305 
    306 	return str;
    307 }
    308 
    309 // ================================================================
    310 
    311 Ast ast_make(ast_t kind, u32 ival, String name, Symbol sym, Type type) {
    312 	Ast a = malloc(sizeof(AstRec));
    313 	a->kind = kind;
    314 	a->c0 = nil;
    315 	a->c1 = nil;
    316 	a->c2 = nil;
    317 	a->ival = ival;
    318 	a->name = name;
    319 	a->sym = sym;
    320 	a->type = type;
    321 	a->srcloc = ctx.linenumber;
    322 	return a;
    323 }
    324 
    325 Ast ast_make_binop(u32 kind, Ast left, Ast right) {
    326 	Ast node = ast_make(kind, 0, nil, nil, nil);
    327 	node->c0 = left;
    328 	node->c1 = right;
    329 	return node;
    330 }
    331 
    332 Ast ast_make_unop(u32 kind, Ast child) {
    333 	Ast node = ast_make(kind, 0, nil, nil, nil);
    334 	node->c0 = child;
    335 	return node;
    336 }
    337 
    338 Ast ast_make_simple(ast_t kind, u32 x) {
    339 	return ast_make(kind, x, nil, nil, nil);
    340 }
    341 
    342 Ast ast_make_const(u32 value, Type type) {
    343 	return ast_make(AST_CONST, value, nil, nil, type);
    344 }
    345 
    346 Ast ast_make_symbol(String name, Symbol sym) {
    347 	return ast_make(AST_SYMBOL, 0, name, sym, sym->type);
    348 }
    349 
    350 // ================================================================
    351 
    352 Symbol symbol_make(symbol_t kind, u32 flags, String name, Type type, u32 value) {
    353 	Symbol s = malloc(sizeof(SymbolRec));
    354 	s->kind = kind;
    355 	s->flags = flags;
    356 	s->name = name;
    357 	s->type = type;
    358 	s->value = value;
    359 	s->first = nil;
    360 	s->fixups = nil;
    361 	return s;
    362 }
    363 
    364 void symbol_add_global(Symbol sym) {
    365 	sym->next = ctx.global.first;
    366 	ctx.global.first = sym;
    367 }
    368 
    369 // search the scope stack for a symbol
    370 Symbol symbol_find(String str) {
    371 	Scope scope = ctx.scope;
    372 	while (scope != nil) {
    373 		Symbol sym = scope->first;
    374 		while (sym != nil) {
    375 			if (sym->name == str) {
    376 				return sym;
    377 			}
    378 			sym = sym->next;
    379 		}
    380 		scope = scope->next;
    381 	}
    382 	return nil;
    383 }
    384 
    385 // ================================================================
    386 
    387 Type type_make(type_t kind, Type base, u32 len, u32 size) {
    388 	Type t = malloc(sizeof(TypeRec));
    389 	t->kind = kind;
    390 	t->base = base;
    391 	t->sym = nil;
    392 	t->first = nil;
    393 	t->len = len;
    394 	t->size = size;
    395 	return t;
    396 }
    397 
    398 Type type_make_ptr(Type type) {
    399 	return type_make(TYPE_POINTER, type, 4, 0);
    400 }
    401 
    402 void type_add(Type type, String name) {
    403 	//fprintf(stderr,"type_add(%s, %s)\n",type_id_tab[type->kind],name->text);
    404 	Symbol sym = symbol_make(SYM_TYPE, 0, name, type, 0);
    405 	if (type->sym == nil) {
    406 		// only set the the type's object if it is
    407 		// not already set (otherwise aliases will
    408 		// clobber the canonical type names -- yuck!)
    409 		type->sym = sym;
    410 	}
    411 	sym->next = ctx.typetab;
    412 	ctx.typetab = sym;
    413 }
    414 
    415 Type type_find(String name) {
    416 	Symbol sym = ctx.typetab;
    417 	while (sym != nil) {
    418 		if (sym->name == name) {
    419 			return sym->type;
    420 		}
    421 		sym = sym->next;
    422 	}
    423 	return nil;
    424 }
    425 
    426 Type type_setup(const char* text, u32 tlen, u32 kind, u32 size) {
    427 	String name = string_make(text, tlen);
    428 	Type type = type_make(kind, nil, 0, size);
    429 	type_add(type, name);
    430 	return type;
    431 }
    432 
    433 bool type_is_same(Type a, Type b) {
    434 	if (a == b) {
    435 		return true;
    436 	}
    437 	if (a->kind != b->kind) {
    438 		return false;
    439 	}
    440 	if (a->base != b->base) {
    441 		return false;
    442 	}
    443 	if (a->len != b->len) {
    444 		return false;
    445 	}
    446 	Symbol a1 = a->first;
    447 	Symbol b1 = b->first;
    448 	while ((a1 != nil) && (b1 != nil)) {
    449 		// check that parameters and fields match
    450 		if (!type_is_same(a1->type, b1->type)) {
    451 			return false;
    452 		}
    453 	}
    454 	if ((a1 != nil) || (b1 != nil)) {
    455 		// mismatched number of parameters or fields
    456 		return false;
    457 	}
    458 	return true;
    459 }
    460 
    461 bool type_is_i32_shape(Type type) {
    462 	if ((type->kind == TYPE_I32) ||
    463 	    (type->kind == TYPE_BYTE) ||
    464 	    (type->kind == TYPE_BOOL)) {
    465 		return true;
    466 	} else {
    467 		return false;
    468 	}
    469 }
    470 
    471 bool type_is_compatible(Type dst, Type src) {
    472 	if (type_is_i32_shape(dst) && type_is_i32_shape(src)) {
    473 		return true;
    474 	}
    475 	return type_is_same(dst, src);
    476 	// check if deref of src is same as dst?
    477 }
    478 
    479 void type_dump(FILE* fp, Type type, bool use_short_name) {
    480 	if (use_short_name && (type->sym != nil)) {
    481 		fprintf(fp, "%s", type->sym->name->text);
    482 	} else if (type->kind == TYPE_ARRAY) {
    483 		fprintf(fp, "[%u]", type->len);
    484 		type_dump(fp, type->base, true);
    485 	} else if (type->kind == TYPE_RECORD) {
    486 		fprintf(fp, "struct {\n");
    487 		Symbol field = type->first;
    488 		while (field != nil) {
    489 			fprintf(fp, "    %s ", field->name->text);
    490 			type_dump(fp, field->type, true);
    491 			fprintf(fp, ", // off=%u, sz=%u\n", field->value, field->type->size);
    492 			field = field->next;
    493 		}
    494 		fprintf(fp, "}");
    495 	} else {
    496 		fprintf(fp, "%s", type_kind[type->kind]);
    497 		if ((type->kind == TYPE_POINTER) || (type->kind == TYPE_SLICE)) {
    498 			type_dump(fp, type->base, true);
    499 		}
    500 	}
    501 }
    502 
    503 // ================================================================
    504 
    505 // push a new scope, adding list of symbols if provided
    506 Scope scope_push(scope_t kind, Symbol list) {
    507 	Scope scope = malloc(sizeof(ScopeRec));
    508 	scope->kind = kind;
    509 	scope->next = ctx.scope;
    510 	scope->first = list;
    511 	scope->level = ctx.scope->level + 1;
    512 	// save current stack offset (next local var)
    513 	scope->save_stack = ctx.alloc_stack;
    514 	ctx.scope = scope;
    515 	return scope;
    516 }
    517 
    518 void scope_add_symbol(Scope scope, Symbol sym) {
    519 	sym->next = scope->first;
    520 	scope->first = sym;
    521 }
    522 
    523 // pop a scope, return list of symbols
    524 Symbol scope_pop() {
    525 	if (ctx.scope->level == 0) {
    526 		error("cannot pop the global scope");
    527 	}
    528 
    529 	// restore prev stack offset (next local var)
    530 	ctx.alloc_stack = ctx.scope->save_stack;
    531 
    532 	Symbol list = ctx.scope->first;
    533 	// XXX dealloc
    534 	ctx.scope = ctx.scope->next;
    535 	return list;
    536 }
    537 
    538 // find the first surrounding scope of a specified kind
    539 Scope scope_find(scope_t scope_kind) {
    540 	Scope scope = ctx.scope;
    541 	while (scope != nil) {
    542 		if (scope->kind == scope_kind) {
    543 			return scope;
    544 		}
    545 		scope = scope->next;
    546 	}
    547 	return nil;
    548 }
    549 
    550 enum {
    551 	BI_EXIT,
    552 	BI_PRINT_HEX_32,
    553 	BI_PUT_C,
    554 };
    555 
    556 void builtin_make(const char* name, u32 id, Type p0, Type p1, Type rtn) {
    557 	String fname = string_make(name, strlen(name));
    558 	Type type = type_make(TYPE_FUNC, rtn, 0, 0);
    559 	type->sym = symbol_make(SYM_FUNC, SYM_IS_DEFINED | SYM_IS_BUILTIN, fname, type, id);
    560 
    561 	if (p0 != nil) {
    562 		Symbol param = symbol_make(SYM_PARAM, 0, string_make("a", 1), p0, 0);
    563 		type->sym->first = param;
    564 		type->first = param;
    565 		type->len = 1;
    566 		if (p1 != nil) {
    567 			param->next = symbol_make(SYM_PARAM, 0, string_make("b", 1), p1, 1);
    568 			type->len = 2;
    569 		}
    570 	}
    571 	symbol_add_global(type->sym);
    572 }
    573 
    574 void fixup_add_list(Fixup list, u32 addr) {
    575 	Fixup fixup = malloc(sizeof(FixupRec));
    576 	fixup->next = list->next;
    577 	fixup->addr = addr;
    578 	list->next = fixup;
    579 }
    580 
    581 void fixup_add_sym(Symbol sym, u32 addr) {
    582 	Fixup fixup = malloc(sizeof(FixupRec));
    583 	fixup->next = sym->fixups;
    584 	fixup->addr = addr;
    585 	sym->fixups = fixup;
    586 }
    587 
    588 // ================================================================
    589 
    590 enum {
    591 	cfVisibleEOL   = 1,
    592 	cfAbortOnError = 2,
    593 	cfTraceCodeGen = 3,
    594 };
    595 
    596 void ctx_init() {
    597 	memset(&ctx, 0, sizeof(ctx));
    598 
    599 	// pre-intern keywords
    600 	ctx.idn_if       = string_make("if", 2);
    601 	ctx.idn_for      = string_make("for", 3);
    602 	ctx.idn_var      = string_make("var", 3);
    603 	ctx.idn_nil      = string_make("nil", 3);
    604 	ctx.idn_case     = string_make("case", 4);
    605 	ctx.idn_func     = string_make("func", 4);
    606 	ctx.idn_else     = string_make("else", 4);
    607 	ctx.idn_enum     = string_make("enum", 4);
    608 	ctx.idn_true     = string_make("true", 4);
    609 	ctx.idn_type     = string_make("type", 4);
    610 	ctx.idn_break    = string_make("break", 5);
    611 	ctx.idn_while    = string_make("while", 5);
    612 	ctx.idn_false    = string_make("false", 5);
    613 	ctx.idn_switch   = string_make("switch", 6);
    614 	ctx.idn_struct   = string_make("struct", 6);
    615 	ctx.idn_return   = string_make("return", 6);
    616 	ctx.idn_continue = string_make("continue", 8);
    617 
    618 	// install built-in basic types
    619 	ctx.type_void    = type_setup("void", 4, TYPE_VOID, 0);
    620 	ctx.type_byte    = type_setup("byte", 4, TYPE_BYTE, 1);
    621 	ctx.type_bool    = type_setup("bool", 4, TYPE_BOOL, 1);
    622 	ctx.type_i32     = type_setup("i32",  3, TYPE_I32, 4);
    623 	ctx.type_nil     = type_setup("nil",  3, TYPE_NIL, 4);
    624 	ctx.type_string  = type_setup("str",  3, TYPE_SLICE, 8);
    625 	ctx.type_string->base = ctx.type_byte;
    626 
    627 	// magic builtin functions
    628 	builtin_make("_hexout_", BI_PRINT_HEX_32, ctx.type_i32, nil, ctx.type_void);
    629 	builtin_make("_putc_", BI_PUT_C, ctx.type_i32, nil, ctx.type_void);
    630 
    631 	ctx.scope = &(ctx.global);
    632 }
    633 
    634 void dump_file_line(const char* fn, u32 offset);
    635 void dump_error_ctxt();
    636 
    637 #if C
    638 void error(const char *fmt, ...)
    639 #else
    640 void error(const char *fmt)
    641 #endif
    642 {
    643 	va_list ap;
    644 
    645 	fprintf(stderr,"\n%s:%d: ", ctx.filename, ctx.linenumber);
    646 	va_start(ap, fmt);
    647 	vfprintf(stderr, fmt, ap);
    648 	va_end(ap);
    649 
    650 	if (ctx.linenumber > 0) {
    651 		// dump_file_line(ctx.filename, ctx.lineoffset);
    652 	}
    653 	fprintf(stderr, "\n");
    654 
    655 	dump_error_ctxt();
    656 
    657 	if (ctx.flags & cfAbortOnError) {
    658 		abort();
    659 	} else {
    660 		exit(1);
    661 	}
    662 }
    663 
    664 void ctx_open_source(const char* filename) {
    665 	ctx.filename = filename;
    666 	ctx.linenumber = 0;
    667 
    668 	if (ctx.fd >= 0) {
    669 		close(ctx.fd);
    670 	}
    671 	ctx.fd = open(filename, O_RDONLY);
    672 	if (ctx.fd < 0) {
    673 		error("cannot open file '%s'", filename);
    674 	}
    675 	ctx.ionext = 0;
    676 	ctx.iolast = 0;
    677 	ctx.linenumber = 1;
    678 	ctx.lineoffset = 0;
    679 	ctx.byteoffset = 0;
    680 }
    681 
    682 // ================================================================
    683 // lexical scanner
    684 
    685 // token classes (tok & tcMASK)
    686 enum {
    687 	tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
    688 	tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
    689 };
    690 
    691 enum {
    692 	// EndMarks, Braces, Brackets Parens
    693 	tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
    694 	// RelOps (do not reorder)
    695 	tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
    696 	// AddOps (do not reorder)
    697 	tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
    698 	// MulOps (do not reorder)
    699 	tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
    700 	// AsnOps (do not reorder)
    701 	tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
    702 	tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
    703 	// Various, UnaryNot, LogicalOps,
    704 	tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
    705 	tASSIGN, tINC, tDEC,
    706 	// Keywords
    707 	tTYPE, tFUNC, tSTRUCT, tVAR, tENUM,
    708 	tIF, tELSE, tWHILE,
    709 	tBREAK, tCONTINUE, tRETURN,
    710 	tFOR, tSWITCH, tCASE,
    711 	tTRUE, tFALSE, tNIL,
    712 	tIDN, tNUM, tSTR,
    713 	// used internal to the lexer but never returned
    714 	tSPC, tINV, tDQT, tSQT, tMSC,
    715 };
    716 
    717 str tnames[] = {
    718 	"<EOF>", "<EOL>", "{",  "}",  "[",   "]",   "(",   ")",
    719 	"==",    "!=",    "<",  "<=", ">",   ">=",  "",    "",
    720 	"+",     "-",     "|",  "^",  "",    "",    "",    "",
    721 	"*",     "/",     "%",  "&",  "&~",  "<<",  ">>",  "",
    722 	"+=",    "-=",    "|=", "^=", "",    "",    "",    "",
    723 	"*=",    "/=",    "%=", "&=", "&~=", "<<=", ">>=", "",
    724 	";",     ":",     ".",  ",",  "~",   "&&",  "||",  "!",
    725 	"=",     "++",    "--",
    726 	"type", "func", "struct", "var", "enum",
    727 	"if", "else", "while",
    728 	"break", "continue", "return",
    729 	"for", "switch", "case",
    730 	"true", "false", "nil",
    731 	"<ID>", "<NUM>", "<STR>",
    732 	"<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
    733 };
    734 
    735 u8 lextab[256] = {
    736 	tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    737 	tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV,
    738 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    739 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    740 	tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT,
    741 	tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH,
    742 	tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM,
    743 	tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC,
    744 	tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
    745 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
    746 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
    747 	tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN,
    748 	tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
    749 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
    750 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
    751 	tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV,
    752 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    753 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    754 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    755 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    756 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    757 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    758 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    759 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    760 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    761 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    762 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    763 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    764 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    765 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    766 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    767 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    768 };
    769 
    770 i32 unhex(u32 ch) {
    771 	if ((ch >= '0') && (ch <= '9')) {
    772 		return ch - '0';
    773 	}
    774 	if ((ch >= 'a') && (ch <= 'f')) {
    775 		return ch - 'a' + 10;
    776 	}
    777 	if ((ch >= 'A') && (ch <= 'F')) {
    778 		return ch - 'A' + 10;
    779 	}
    780 	return -1;
    781 }
    782 
    783 u32 scan() {
    784 	while (ctx.ionext == ctx.iolast) {
    785 		if (ctx.fd < 0) {
    786 			ctx.cc = 0;
    787 			return ctx.cc;
    788 		}
    789 		int r = read(ctx.fd, ctx.iobuffer, sizeof(ctx.iobuffer));
    790 		if (r <= 0) {
    791 			ctx.fd = -1;
    792 		} else {
    793 			ctx.iolast = r;
    794 			ctx.ionext = 0;
    795 		}
    796 	}
    797 	ctx.cc = ctx.iobuffer[ctx.ionext];
    798 	ctx.ionext++;
    799 	ctx.byteoffset++;
    800 	return ctx.cc;
    801 }
    802 
    803 u32 unescape(u32 n) {
    804 	if (n == 'n') {
    805 		return 10;
    806 	} else if (n == 'r') {
    807 		return 13;
    808 	} else if (n == 't') {
    809 		return 9;
    810 	} else if (n == '"') {
    811 		return '"';
    812 	} else if (n == '\'') {
    813 		return '\'';
    814 	} else if (n == '\\') {
    815 		return '\\';
    816 	} else if (n == 'x') {
    817 		int x0 = unhex(scan());
    818 		int x1 = unhex(scan());
    819 		if ((x0 < 0) || (x1 < 0)) {
    820 			error("invalid hex escape");
    821 		}
    822 		return (x0 << 4) | x1;
    823 	} else {
    824 		error("invalid escape 0x%02x", n);
    825 		return 0;
    826 	}
    827 }
    828 
    829 token_t scan_string(u32 cc, u32 nc) {
    830 	u32 n = 0;
    831 	while (true) {
    832 		if (nc == '"') {
    833 			break;
    834 		} else if (nc == 0) {
    835 			error("unterminated string");
    836 		} else if (nc == '\\') {
    837 			ctx.tmp[n] = unescape(scan());
    838 		} else {
    839 			ctx.tmp[n] = nc;
    840 		}
    841 		nc = scan();
    842 		n++;
    843 		if (n == 255) {
    844 			error("constant string too large");
    845 		}
    846 	}
    847 	ctx.tmp[n] = 0;
    848 	return tSTR;
    849 }
    850 
    851 token_t scan_keyword(u32 len) {
    852 	ctx.tmp[len] = 0;
    853 	String idn = string_make(ctx.tmp, len);
    854 	ctx.ident = idn;
    855 
    856 	if (len == 2) {
    857 		if (idn == ctx.idn_if) { return tIF; };
    858 	} else if (len == 3) {
    859 		if (idn == ctx.idn_for) { return tFOR; }
    860 		if (idn == ctx.idn_var) { return tVAR; }
    861 		if (idn == ctx.idn_nil) { return tNIL; }
    862 	} else if (len == 4) {
    863 		if (idn == ctx.idn_case) { return tCASE; }
    864 		if (idn == ctx.idn_func) { return tFUNC; }
    865 		if (idn == ctx.idn_else) { return tELSE; }
    866 		if (idn == ctx.idn_enum) { return tENUM; }
    867 		if (idn == ctx.idn_true) { return tTRUE; }
    868 		if (idn == ctx.idn_type) { return tTYPE; }
    869 	} else if (len == 5) {
    870 		if (idn == ctx.idn_break) { return tBREAK; }
    871 		if (idn == ctx.idn_while) { return tWHILE; }
    872 		if (idn == ctx.idn_false) { return tFALSE; }
    873 	} else if (len == 6) {
    874 		if (idn == ctx.idn_switch) { return tSWITCH; }
    875 		if (idn == ctx.idn_struct) { return tSTRUCT; }
    876 		if (idn == ctx.idn_return) { return tRETURN; }
    877 	} else if (len == 8) {
    878 		if (idn == ctx.idn_continue) { return tCONTINUE; }
    879 	}
    880 	return tIDN;
    881 }
    882 
    883 token_t scan_number(u32 cc, u32 nc) {
    884 	u32 n = 1;
    885 	u32 val = cc - '0';
    886 
    887 	if ((cc == '0') && (nc == 'b')) { // binary
    888 		nc = scan();
    889 		while ((nc == '0') || (nc == '1')) {
    890 			val = (val << 1) | (nc - '0');
    891 			nc = scan();
    892 			n++;
    893 			if (n == 34) {
    894 				error("binary constant too large");
    895 			}
    896 		}
    897 	} else if ((cc == '0') && (nc == 'x')) { // hex
    898 		nc = scan();
    899 		while (true) {
    900 			int tmp = unhex(nc);
    901 			if (tmp == -1) {
    902 				break;
    903 			}
    904 			val = (val << 4) | tmp;
    905 			nc = scan();
    906 			n++;
    907 			if (n == 10) {
    908 				error("hex constant too large");
    909 			}
    910 		}
    911 	} else { // decimal
    912 		while (lextab[nc] == tNUM) {
    913 			u32 tmp = (val * 10) + (nc - '0');
    914 			if (tmp <= val) {
    915 				error("decimal constant too large");
    916 			}
    917 			val = tmp;
    918 			nc = scan();
    919 			n++;
    920 		}
    921 	}
    922 	ctx.num = val;
    923 	return tNUM;
    924 }
    925 
    926 token_t scan_ident(u32 cc, u32 nc) {
    927 	ctx.tmp[0] = cc;
    928 	u32 n = 1;
    929 
    930 	while (true) {
    931 		u32 tok = lextab[nc];
    932 		if ((tok == tIDN) || (tok == tNUM)) {
    933 			ctx.tmp[n] = nc;
    934 			n++;
    935 			if (n == 32) { error("identifier too large"); }
    936 			nc = scan();
    937 		} else {
    938 			break;
    939 		}
    940 	}
    941 	return scan_keyword(n);
    942 }
    943 
    944 token_t _next() {
    945 	u8 nc = ctx.cc;
    946 	while (true) {
    947 		u8 cc = nc;
    948 		nc = scan();
    949 		u32 tok = lextab[cc];
    950 		if (tok == tNUM) { // 0..9
    951 			return scan_number(cc, nc);
    952 		} else if (tok == tIDN) { // _ A..Z a..z
    953 			return scan_ident(cc, nc);
    954 		} else if (tok == tDQT) { // "
    955 			return scan_string(cc, nc);
    956 		} else if (tok == tSQT) { // '
    957 			ctx.num = nc;
    958 			if (nc == '\\') {
    959 				ctx.num = unescape(scan());
    960 			}
    961 			nc = scan();
    962 			if (nc != '\'') {
    963 				error("unterminated character constant");
    964 			}
    965 			nc = scan();
    966 			return tNUM;
    967 		} else if (tok == tPLUS) {
    968 			if (nc == '+') { tok = tINC; nc = scan(); }
    969 		} else if (tok == tMINUS) {
    970 			if (nc == '-') { tok = tDEC; nc = scan(); }
    971 		} else if (tok == tAMP) {
    972 			if (nc == '&') { tok = tAND; nc = scan(); }
    973 		} else if (tok == tPIPE) {
    974 			if (nc == '|') { tok = tOR; nc = scan(); }
    975 		} else if (tok == tGT) {
    976 			if (nc == '=') { tok = tGE; nc = scan(); }
    977 			else if (nc == '>') { tok = tRIGHT; nc = scan(); }
    978 		} else if (tok == tLT) {
    979 			if (nc == '=') { tok = tLE; nc = scan(); }
    980 			else if (nc == '<') { tok = tLEFT; nc = scan(); }
    981 		} else if (tok == tASSIGN) {
    982 			if (nc == '=') { tok = tEQ; nc = scan(); }
    983 		} else if (tok == tBANG) {
    984 			if (nc == '=') { tok = tNE; nc = scan(); }
    985 		} else if (tok == tSLASH) {
    986 			if (nc == '/') {
    987 				// comment -- consume until EOL or EOF
    988 				while ((nc != '\n') && (nc != 0)) {
    989 					nc = scan();
    990 				}
    991 				continue;
    992 			}
    993 		} else if (tok == tEOL) {
    994 			ctx.linenumber++;
    995 			ctx.lineoffset = ctx.byteoffset;
    996 			//ctx.xref[ctx.pc / 4] = ctx.linenumber;
    997 			if (ctx.flags & cfVisibleEOL) {
    998 				return tEOL;
    999 			}
   1000 			continue;
   1001 		} else if (tok == tSPC) {
   1002 			continue;
   1003 		} else if ((tok == tMSC) || (tok == tINV)) {
   1004 			error("unknown character 0x%02x", cc);
   1005 		}
   1006 
   1007 		// if we're an AddOp or MulOp, followed by an '='
   1008 		if (((tok & 0xF0) == 0x20) && (nc == '=')) {
   1009 			nc = scan();
   1010 			// transform us to a XEQ operation
   1011 			tok = tok + 0x10;
   1012 		}
   1013 
   1014 		return tok;
   1015 	}
   1016 }
   1017 
   1018 token_t next() {
   1019 	return (ctx.tok = _next());
   1020 }
   1021 
   1022 void token_printstr(void) {
   1023 	u32 n = 0;
   1024 	printf("\"");
   1025 	while (n < 256) {
   1026 		u32 ch = ctx.tmp[n];
   1027 		if (ch == 0) {
   1028 			break;
   1029 		} else if ((ch < ' ') || (ch > '~')) {
   1030 			printf("\\x%02x", ch);
   1031 		} else if ((ch == '"') || (ch == '\\')) {
   1032 			printf("\\%c", ch);
   1033 		} else {
   1034 			printf("%c", ch);
   1035 		}
   1036 		n++;
   1037 	}
   1038 	printf("\"");
   1039 }
   1040 
   1041 void token_print(void) {
   1042 	if (ctx.tok == tNUM) {
   1043 		printf("#%u ", ctx.num);
   1044 	} else if (ctx.tok == tIDN) {
   1045 		printf("@%s ", ctx.tmp);
   1046 	} else if (ctx.tok == tEOL) {
   1047 		printf("\n");
   1048 	} else if (ctx.tok == tSTR) {
   1049 		token_printstr();
   1050 	} else {
   1051 		printf("%s ", tnames[ctx.tok]);
   1052 	}
   1053 }
   1054 
   1055 void expected(const char* what) {
   1056 	error("expected %s, found %s", what, tnames[ctx.tok]);
   1057 }
   1058 
   1059 void expect(token_t tok) {
   1060 	if (ctx.tok != tok) {
   1061 		error("expected %s, found %s", tnames[tok], tnames[ctx.tok]);
   1062 	}
   1063 }
   1064 
   1065 void require(token_t tok) {
   1066 	expect(tok);
   1067 	next();
   1068 }
   1069 
   1070 // ================================================================
   1071 
   1072 //TODO: handle overflow and div/mod-by-zero
   1073 i32 ast_get_const_i32(Ast node) {
   1074 	if (node->kind == AST_CONST) {
   1075 		return node->ival;
   1076 	} else if (node->kind == AST_SYMBOL) {
   1077 		if (node->sym->kind != SYM_CONST) {
   1078 			error("non-const symbol (%s) in constexpr\n", node->sym->name);
   1079 		}
   1080 		return node->sym->value;
   1081 	} else if (ast_kind_is_binop(node->kind)) {
   1082 		i32 left = ast_get_const_i32(node->c0);
   1083 		i32 right = ast_get_const_i32(node->c1);
   1084 		u32 op = node->kind;
   1085 		if (op == AST_ADD) {
   1086 			return left + right;
   1087 		} else if (op == AST_SUB) {
   1088 			return left - right;
   1089 		} else if (op == AST_MUL) {
   1090 			return left * right;
   1091 		} else if (op == AST_DIV) {
   1092 			return left / right;
   1093 		} else if (op == AST_MOD) {
   1094 			return left % right;
   1095 		} else if (op == AST_AND) {
   1096 			return left & right;
   1097 		} else if (op == AST_OR) {
   1098 			return left | right;
   1099 		} else if (op == AST_XOR) {
   1100 			return left | right;
   1101 		} else if (op == AST_BOOL_AND) {
   1102 			return left && right;
   1103 		} else if (op == AST_BOOL_OR) {
   1104 			return left || right;
   1105 		} else {
   1106 			error("unsupported const op %s\n", ast_kind[op]);
   1107 		}
   1108 	} else if (node->kind == AST_NEG) {
   1109 		return -ast_get_const_i32(node->c0);
   1110 	} else if (node->kind == AST_NOT) {
   1111 		return ~ast_get_const_i32(node->c0);
   1112 	} else if (node->kind == AST_BOOL_NOT) {
   1113 		return !ast_get_const_i32(node->c0);
   1114 	} else {
   1115 		error("non-const expr (%s)\n", ast_kind[node->kind]);
   1116 	}
   1117 	return 0;
   1118 }
   1119 
   1120 void ast_type_compat(Ast node, Type type) {
   1121 	if (node->c0) {
   1122 		if (!type_is_compatible(type, node->c0->type)) {
   1123 			error("incompatible types");
   1124 		}
   1125 	}
   1126 	if (node->c1) {
   1127 		if (!type_is_compatible(type, node->c1->type)) {
   1128 			error("incompatible types");
   1129 		}
   1130 	}
   1131 	node->type = type;
   1132 }
   1133 
   1134 Ast ast_make_deref(Ast child) {
   1135 	Ast node = ast_make_simple(AST_DEREF, 0);
   1136 	node->c0 = child;
   1137 	node->type = child->type->base;
   1138 	return node;
   1139 }
   1140 
   1141 Ast ast_require_array_type(Ast node) {
   1142 	if (node->type->kind == TYPE_ARRAY) {
   1143 		return node;
   1144 	}
   1145 	if ((node->type->kind == TYPE_POINTER) &&
   1146 	    (node->type->base->kind == TYPE_ARRAY)) {
   1147 		return ast_make_deref(node);
   1148 	}
   1149 	error("expected an array");
   1150 	return nil;
   1151 }
   1152 
   1153 Ast ast_require_struct_type(Ast node) {
   1154 	if (node->type->kind == TYPE_RECORD) {
   1155 		return node;
   1156 	}
   1157 	if ((node->type->kind == TYPE_POINTER) &&
   1158 	    (node->type->base->kind == TYPE_RECORD)) {
   1159 		return ast_make_deref(node);
   1160 	}
   1161 	error("expected a struct");
   1162 	return nil;
   1163 }
   1164 
   1165 String parse_name(const char* what) {
   1166 	if (ctx.tok != tIDN) {
   1167 		error("expected %s, found %s %u", what, tnames[ctx.tok], ctx.tok);
   1168 	}
   1169 	String str = ctx.ident;
   1170 	next();
   1171 	return str;
   1172 }
   1173 
   1174 Ast parse_expr();
   1175 
   1176 Ast parse_operand() {
   1177 	Ast node = nil;
   1178 	if (ctx.tok == tNUM) {
   1179 		node = ast_make_const(ctx.num, ctx.type_i32);
   1180 	} else if (ctx.tok == tSTR) {
   1181 		error("<TODO> string const");
   1182 	} else if (ctx.tok == tTRUE) {
   1183 		node = ast_make_const(1, ctx.type_bool);
   1184 	} else if (ctx.tok == tFALSE) {
   1185 		node = ast_make_const(0, ctx.type_bool);
   1186 	} else if (ctx.tok == tNIL) {
   1187 		node = ast_make_const(0, ctx.type_nil);
   1188 	} else if (ctx.tok == tOPAREN) {
   1189 		next();
   1190 		node = parse_expr();
   1191 		require(tCPAREN);
   1192 		return node;
   1193 	} else if (ctx.tok == tIDN) {
   1194 		// TODO: could happen in an AST pass to allow
   1195 		// for forward references
   1196 		Symbol sym = symbol_find(ctx.ident);
   1197 		if (sym == nil) {
   1198 			error("undefined identifier '%s'", ctx.ident->text);
   1199 		}
   1200 		node = ast_make_symbol(ctx.ident, sym);
   1201 	} else {
   1202 		error("invalid expression");
   1203 	}
   1204 	next();
   1205 	return node;
   1206 }
   1207 
   1208 Symbol type_find_field(Type type, String name) {
   1209 	Symbol field = type->first;
   1210 	while (field != nil) {
   1211 		if (field->name == name) {
   1212 			return field;
   1213 		}
   1214 		field = field->next;
   1215 	}
   1216 	error("struct has no such field '%s'", name->text);
   1217 	return nil;
   1218 }
   1219 
   1220 Ast parse_primary_expr() {
   1221 	Ast node = parse_operand();
   1222 	while (true) {
   1223 		if (ctx.tok == tOPAREN) {
   1224 			next();
   1225 			//VALIDATE as func or ptr-to-func
   1226 			if (node->kind != AST_SYMBOL) {
   1227 				error("cannot call '%s'", ast_kind[node->kind]);
   1228 			}
   1229 			if (node->sym->kind != SYM_FUNC) {
   1230 				error("cannot call non-function '%s'", node->sym->name->text);
   1231 			}
   1232 			u32 n = 0;
   1233 
   1234 			Ast call = ast_make_simple(AST_CALL, 0);
   1235 			call->c0 = node;
   1236 			call->type = node->sym->type->base;
   1237 			Ast last = call;
   1238 
   1239 			while (ctx.tok != tCPAREN) {
   1240 				// VALIDATE compatibility
   1241 				Ast param = parse_expr();
   1242 				last->c2 = param;
   1243 				last = param;
   1244 				if (ctx.tok != tCPAREN) {
   1245 					require(tCOMMA);
   1246 				}
   1247 				n++;
   1248 			}
   1249 			require(tCPAREN);
   1250 			call->ival = n;
   1251 			node = call;
   1252 		} else if (ctx.tok == tDOT) {
   1253 			next();
   1254 			node = ast_require_struct_type(node);
   1255 			String name = parse_name("field name");
   1256 			Ast dot = ast_make_simple(AST_FIELD, 0);
   1257 			dot->c0 = node;
   1258 			dot->c1 = ast_make_simple(AST_SYMBOL, 0);
   1259 			dot->c1->name = name;
   1260 			dot->c1->sym = type_find_field(node->type, name);
   1261 			dot->c1->type = dot->c1->sym->type;
   1262 			dot->type = dot->c1->type;
   1263 			node = dot;
   1264 		} else if (ctx.tok == tOBRACK) {
   1265 			next();
   1266 			node = ast_require_array_type(node);
   1267 			Ast index = ast_make_simple(AST_INDEX, 0);
   1268 			index->c0 = node;
   1269 			index->c1 = parse_expr();
   1270 			index->type = node->type->base;
   1271 			node = index;
   1272 			require(tCBRACK);
   1273 			//VALIDATE lhs is array, rhs is appropriate numeric
   1274 		} else {
   1275 			break;
   1276 		}
   1277 	}
   1278 	return node;
   1279 }
   1280 
   1281 Ast parse_unary_expr() {
   1282 	u32 op = ctx.tok;
   1283 	if (op == tPLUS) {
   1284 		next();
   1285 		return parse_unary_expr();
   1286 	} else if (op == tMINUS) {
   1287 		next();
   1288 		Ast node = ast_make_unop(AST_NEG, parse_unary_expr());
   1289 		ast_type_compat(node, ctx.type_i32);
   1290 		return node;
   1291 	} else if (op == tBANG) {
   1292 		next();
   1293 		Ast node = ast_make_unop(AST_BOOL_NOT, parse_unary_expr());
   1294 		ast_type_compat(node, ctx.type_bool);
   1295 		return node;
   1296 	} else if (op == tNOT) {
   1297 		next();
   1298 		Ast node = ast_make_unop(AST_NOT, parse_unary_expr());
   1299 		ast_type_compat(node, ctx.type_i32);
   1300 		return node;
   1301 	} else if (op == tAMP) {
   1302 		next();
   1303 		Ast node = ast_make_unop(AST_ADDROF, parse_unary_expr());
   1304 		node->type = type_make_ptr(node->c0->type);
   1305 		return node;
   1306 	} else {
   1307 		return parse_primary_expr();
   1308 	}
   1309 }
   1310 
   1311 Ast parse_mul_expr() {
   1312 	Ast node = parse_unary_expr();
   1313 	while ((ctx.tok & tcMASK) == tcMULOP) {
   1314 		u32 op = (ctx.tok - tSTAR) + AST_MUL;
   1315 		next();
   1316 		node = ast_make_binop(op, node, parse_unary_expr());
   1317 		ast_type_compat(node, ctx.type_i32);
   1318 	}
   1319 	return node;
   1320 }
   1321 
   1322 Ast parse_add_expr() {
   1323 	Ast node = parse_mul_expr();
   1324 	while ((ctx.tok & tcMASK) == tcADDOP) {
   1325 		u32 op = (ctx.tok - tPLUS) + AST_ADD;
   1326 		next();
   1327 		node = ast_make_binop(op, node, parse_mul_expr());
   1328 		ast_type_compat(node, ctx.type_i32);
   1329 	}
   1330 	return node;
   1331 }
   1332 
   1333 Ast parse_rel_expr() {
   1334 	Ast node = parse_add_expr();
   1335 	if ((ctx.tok & tcMASK) == tcRELOP) {
   1336 		u32 op = (ctx.tok - tEQ) + AST_EQ;
   1337 		next();
   1338 		node = ast_make_binop(op, node, parse_add_expr());
   1339 		ast_type_compat(node, ctx.type_bool);
   1340 	}
   1341 	return node;
   1342 }
   1343 
   1344 Ast parse_and_expr() {
   1345 	Ast node = parse_rel_expr();
   1346 	if (ctx.tok == tAND) {
   1347 		while (ctx.tok == tAND) {
   1348 			next();
   1349 			node = ast_make_binop(AST_BOOL_AND, node, parse_rel_expr());
   1350 			ast_type_compat(node, ctx.type_bool);
   1351 		}
   1352 	}
   1353 	return node;
   1354 }
   1355 
   1356 Ast parse_expr() {
   1357 	Ast node = parse_and_expr();
   1358 	if (ctx.tok == tOR) {
   1359 		while (ctx.tok == tOR) {
   1360 			next();
   1361 			node = ast_make_binop(AST_BOOL_OR, node, parse_and_expr());
   1362 			ast_type_compat(node, ctx.type_bool);
   1363 		}
   1364 	}
   1365 	return node;
   1366 }
   1367 
   1368 // fwd_ref_ok indicates that an undefined typename
   1369 // may be treated as a forward reference.  This is
   1370 // only used for pointers (because their size does
   1371 // not depend on their target).
   1372 Type parse_type(bool fwd_ref_ok);
   1373 
   1374 Type parse_struct_type() {
   1375 	Type rectype = type_make(TYPE_RECORD, nil, 0, 0);
   1376 	Symbol last = nil;
   1377 	require(tOBRACE);
   1378 	while (true) {
   1379 		if (ctx.tok == tCBRACE) {
   1380 			next();
   1381 			break;
   1382 		}
   1383 		String name = parse_name("field name");
   1384 		Type type = parse_type(false);
   1385 		Symbol field = symbol_make(SYM_FIELD, 0, name, type, rectype->size);
   1386 
   1387 		// TODO sub-word packing
   1388 		rectype->size += (type->size + 3) & (~3);
   1389 		rectype->len++;
   1390 
   1391 		// add field to record
   1392 		if (last == nil) {
   1393 			rectype->first = field;
   1394 		} else {
   1395 			last->next = field;
   1396 		}
   1397 		last = field;
   1398 
   1399 		if (ctx.tok != tCBRACE) {
   1400 			require(tCOMMA);
   1401 		}
   1402 	}
   1403 	return rectype;
   1404 }
   1405 
   1406 Type parse_array_type() {
   1407 	if (ctx.tok == tCBRACK) {
   1408 		next();
   1409 		return type_make(TYPE_SLICE, parse_type(false), 0, 8);
   1410 	} else {
   1411 		Ast expr = parse_expr();
   1412 		require(tCBRACK);
   1413 		i32 nelem = ast_get_const_i32(expr);
   1414 		if (nelem <= 0) {
   1415 			error("array size must be positive");
   1416 		}
   1417 		Type base = parse_type(false);
   1418 		u32 sz = nelem * base->size;
   1419 		if (sz < nelem) {
   1420 			error("array size overflow");
   1421 		}
   1422 		return type_make(TYPE_ARRAY, base, nelem, sz);
   1423 	}
   1424 }
   1425 
   1426 Type parse_func_type() {
   1427 	error("<TODO> func type");
   1428 	return nil;
   1429 }
   1430 
   1431 Type parse_type(bool fwd_ref_ok) {
   1432 	if (ctx.tok == tSTAR) { // pointer-to
   1433 		next();
   1434 		return type_make(TYPE_POINTER, parse_type(true), 0, 4);
   1435 	} else if (ctx.tok == tOBRACK) { // array-of
   1436 		next();
   1437 		return parse_array_type();
   1438 	} else if (ctx.tok == tFUNC) {
   1439 		next();
   1440 		return parse_func_type();
   1441 	} else if (ctx.tok == tSTRUCT) {
   1442 		next();
   1443 		return parse_struct_type();
   1444 	} else if (ctx.tok == tIDN) {
   1445 		String name = ctx.ident;
   1446 		next();
   1447 		Type type = type_find(name);
   1448 		if (type == nil) {
   1449 			if (fwd_ref_ok) {
   1450 				type = type_make(TYPE_UNDEFINED, nil, 0, 0);
   1451 				type_add(type, name);
   1452 			} else {
   1453 				error("undefined type '%s' not usable here", name->text);
   1454 			}
   1455 		}
   1456 		return type;
   1457 	} else {
   1458 		expected("type");
   1459 		return nil;
   1460 	}
   1461 }
   1462 
   1463 Ast parse_block();
   1464 
   1465 Ast parse_while() {
   1466 	Ast expr = parse_expr();
   1467 	require(tOBRACE);
   1468 	scope_push(SCOPE_LOOP, nil);
   1469 	Ast block = parse_block();
   1470 	block->sym = scope_pop();
   1471 	Ast node = ast_make_simple(AST_WHILE, 0);
   1472 	node->c0 = expr;
   1473 	node->c1 = block;
   1474 	return node;
   1475 }
   1476 
   1477 Ast parse_if() {
   1478 	// if expr { block }
   1479 	Ast node = ast_make_simple(AST_IF, 0);
   1480 	Ast ifnode = ast_make_simple(AST_IFELSE, 0);
   1481 	node->c0 = ifnode;
   1482 	ifnode->c0 = parse_expr();
   1483 	require(tOBRACE);
   1484 	scope_push(SCOPE_BLOCK, nil);
   1485 	ifnode->c1 = parse_block();
   1486 	ifnode->c1->sym = scope_pop();
   1487 	while (ctx.tok == tELSE) {
   1488 		next();
   1489 		// ... else ...
   1490 		if (ctx.tok == tIF) {
   1491 			// ... if expr { block }
   1492 			next();
   1493 			Ast ifelse = ast_make_simple(AST_IFELSE, 0);
   1494 			ifelse->c0 = parse_expr();
   1495 			require(tOBRACE);
   1496 			scope_push(SCOPE_BLOCK, nil);
   1497 			ifelse->c1 = parse_block();
   1498 			ifelse->c1->sym = scope_pop();
   1499 			ifnode->c2 = ifelse;
   1500 			ifnode = ifelse;
   1501 		} else {
   1502 			// ... { block }
   1503 			require(tOBRACE);
   1504 			scope_push(SCOPE_BLOCK, nil);
   1505 			ifnode->c2 = parse_block();
   1506 			ifnode->c2->sym = scope_pop();
   1507 			break;
   1508 		}
   1509 	}
   1510 	return node;
   1511 }
   1512 
   1513 Ast parse_return() {
   1514 	Ast node = ast_make_simple(AST_RETURN, 0);
   1515 	if (ctx.tok == tSEMI) {
   1516 		//if (ctx.fn->type->base != ctx.type_void) {
   1517 		//	error("function requires return type");
   1518 		//}
   1519 		next();
   1520 		//x.type = ctx.type_void;
   1521 	} else {
   1522 		node->c0 = parse_expr();
   1523 		//if (!compatible_type(ctx.fn->type->base, x.type, &x)) {
   1524 		//	error("return types do not match");
   1525 		//}
   1526 		require(tSEMI);
   1527 	}
   1528 	return node;
   1529 }
   1530 
   1531 Ast parse_break() {
   1532 	// XXX break-to-labeled-loop support
   1533 	Scope scope = scope_find(SCOPE_LOOP);
   1534 	if (scope == nil) {
   1535 		error("break must be used from inside a looping construct");
   1536 	}
   1537 	require(tSEMI);
   1538 	return ast_make_simple(AST_BREAK, 0);
   1539 }
   1540 
   1541 Ast parse_continue() {
   1542 	// XXX continue-to-labeled-loop support
   1543 	Scope scope = scope_find(SCOPE_LOOP);
   1544 	if (scope == nil) {
   1545 		error("continue must be used from inside a looping construct");
   1546 	}
   1547 	require(tSEMI);
   1548 	return ast_make_simple(AST_CONTINUE, 0);
   1549 }
   1550 
   1551 // unsafe write op
   1552 void STORE(u32 val, u32* ptr, u32 n, u32 sz) {
   1553 	if (sz == 4) {
   1554 		ptr[n >> 2] = val;
   1555 	} else if (sz == 1) {
   1556 		((u8*)ptr)[n] = val;
   1557 	}
   1558 }
   1559 
   1560 u32 parse_array_init(Symbol var, u32ptr data, u32 dmax, u32 sz) {
   1561 	memset(data, 0, dmax);
   1562 	u32 n = 0;
   1563 	while (true) {
   1564 		if (ctx.tok == tCBRACE) {
   1565 			next();
   1566 			break;
   1567 		}
   1568 		if (n >= dmax) {
   1569 			error("initializer too large");
   1570 		}
   1571 		Ast expr = parse_expr();
   1572 		i32 v = ast_get_const_i32(expr);
   1573 
   1574 		// VALIDATE type compat/fit
   1575 		STORE(v, data, n, sz);
   1576 		n += sz;
   1577 		if (ctx.tok != tCBRACE) {
   1578 			require(tCOMMA);
   1579 		}
   1580 	}
   1581 	return n;
   1582 }
   1583 
   1584 void parse_struct_init(Symbol var, u32ptr data) {
   1585 	memset(data, 0, var->type->size);
   1586 	while (true) {
   1587 		if (ctx.tok == tCBRACE) {
   1588 			next();
   1589 			break;
   1590 		}
   1591 		String name = parse_name("field name");
   1592 		Symbol field = var->type->first;
   1593 		while (true) {
   1594 			if (field == nil) {
   1595 				error("structure has no '%s' field", name->text);
   1596 			}
   1597 			if (field->name == name) {
   1598 				break;
   1599 			}
   1600 			field = field->next;
   1601 		}
   1602 		require(tCOLON);
   1603 		Ast expr = parse_expr();
   1604 		i32 v = ast_get_const_i32(expr);
   1605 		// VALIDATE type compat/fit
   1606 		STORE(v, data, field->value, 4);
   1607 		if (ctx.tok != tCBRACE) {
   1608 			require(tCOMMA);
   1609 		}
   1610 	}
   1611 }
   1612 
   1613 Ast parse_local_var() {
   1614 	String name = parse_name("variable name");
   1615 	// TODO: allow type inference
   1616 	Type type = parse_type(false);
   1617 
   1618 	Symbol sym = symbol_make(SYM_LOCAL, 0, name, type, ctx.alloc_stack);
   1619 	scope_add_symbol(ctx.scope, sym);
   1620 
   1621 	ctx.alloc_stack = ctx.alloc_stack + type->size;
   1622 	if (ctx.local_stack < ctx.alloc_stack) {
   1623 		ctx.local_stack = ctx.alloc_stack;
   1624 	}
   1625 
   1626 	Ast node = nil;
   1627 	if (ctx.tok == tASSIGN) {
   1628 		next();
   1629 		node = ast_make_simple(AST_EXPR, 0);
   1630 		node->c0 = ast_make_binop(AST_ASSIGN, ast_make_symbol(name, sym), parse_expr());
   1631 		node->c0->type = node->c0->c1->type;
   1632 	}
   1633 	require(tSEMI);
   1634 
   1635 	return node;
   1636 }
   1637 
   1638 void parse_global_var() {
   1639 	String name = parse_name("variable name");
   1640 	// TODO: allow type inference
   1641 	Type type = parse_type(false);
   1642 
   1643 	Symbol sym = symbol_make(SYM_GLOBAL, 0, name, type, ctx.gp);
   1644 	symbol_add_global(sym);
   1645 
   1646 	// advance global pointer and round to next workd
   1647 	ctx.gp = ctx.gp + type->size;
   1648 	ctx.gp = (ctx.gp + 3) & (~3);
   1649 
   1650 	if (ctx.tok == tASSIGN) {
   1651 		next();
   1652 		u32* data = ctx.data + (sym->value >> 2);
   1653 		if (ctx.tok == tOBRACE) {
   1654 			next();
   1655 			if (type->kind == TYPE_ARRAY) {
   1656 				parse_array_init(sym, data, type->size, type->base->size);
   1657 			} else if (type->kind == TYPE_RECORD) {
   1658 				parse_struct_init(sym, data);
   1659 			} else {
   1660 				error("cannot initialize this way");
   1661 			}
   1662 		} else {
   1663 			Ast expr = parse_expr();
   1664 			i32 v = ast_get_const_i32(expr);
   1665 			// DISCARD expr
   1666 			// VALIDATE compatible integer type
   1667 			ctx.data[sym->value >> 2] = v;
   1668 		}
   1669 	}
   1670 	require(tSEMI);
   1671 }
   1672 
   1673 Ast parse_expr_statement() {
   1674 	u32 srcloc = ctx.linenumber;
   1675 
   1676 	Ast node = nil;
   1677 	Ast left = parse_expr();
   1678 	Ast right = nil;
   1679 	if (ctx.tok == tASSIGN) {
   1680 		next();
   1681 		right = parse_expr();
   1682 		node = ast_make_binop(AST_ASSIGN, left, right);
   1683 		node->type = node->c1->type;
   1684 	} else if ((ctx.tok & tcMASK) == tcAEQOP) {
   1685 		u32 op = ctx.tok; // - tADDEQ;
   1686 		next();
   1687 		right = parse_expr();
   1688 		node = ast_make_binop(op, left, right);
   1689 		// TODO: type prop, convert x op= y  to x = x op y
   1690 	} else if ((ctx.tok & tcMASK) == tcMEQOP) {
   1691 		u32 op = ctx.tok; // - tMULEQ;
   1692 		next();
   1693 		right = parse_expr();
   1694 		node = ast_make_binop(op, left, right);
   1695 		// TODO: type prop, convert x op= y  to x = x op y
   1696 	} else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) {
   1697 		u32 op;
   1698 		if (ctx.tok == tINC) {
   1699 			op = AST_ADD;
   1700 		} else {
   1701 			op = AST_SUB;
   1702 		}
   1703 		next();
   1704 		right = ast_make_const(1, ctx.type_i32);
   1705 		right = ast_make_binop(op, left, right);
   1706 		right->type = ctx.type_i32;
   1707 		node = ast_make_binop(AST_ASSIGN, left, right);
   1708 		node->type = ctx.type_i32;
   1709 		// TODO: check type?
   1710 	} else {
   1711 		node = left;
   1712 	}
   1713 	require(tSEMI);
   1714 
   1715 	// wrap in a statement node
   1716 	Ast stmt = ast_make_simple(AST_EXPR, 0);
   1717 	stmt->c0 = node;
   1718 	stmt->srcloc = srcloc;
   1719 
   1720 	return stmt;
   1721 }
   1722 
   1723 Ast parse_block() {
   1724 	Ast block = ast_make_simple(AST_BLOCK, 0);
   1725 	Ast last = block;
   1726 	Ast node;
   1727 	while (true) {
   1728 		if (ctx.tok == tCBRACE) {
   1729 			next();
   1730 			break;
   1731 		} else if (ctx.tok == tRETURN) {
   1732 			next();
   1733 			node = parse_return();
   1734 		} else if (ctx.tok == tBREAK) {
   1735 			next();
   1736 			node = parse_break();
   1737 		} else if (ctx.tok == tCONTINUE) {
   1738 			next();
   1739 			node = parse_continue();
   1740 		} else if (ctx.tok == tWHILE) {
   1741 			next();
   1742 			node = parse_while();
   1743 		} else if (ctx.tok == tIF) {
   1744 			next();
   1745 			node = parse_if();
   1746 		} else if (ctx.tok == tVAR) {
   1747 			next();
   1748 			node = parse_local_var();
   1749 		} else if (ctx.tok == tSEMI) {
   1750 			next();
   1751 			// empty statement
   1752 			continue;
   1753 		} else {
   1754 			node = parse_expr_statement();
   1755 		}
   1756 
   1757 		// append to block's list of statements
   1758 		// some of these don't always return a node (like local var)
   1759 		if (node != nil) {
   1760 			last->c2 = node;
   1761 			last = node;
   1762 		}
   1763 	}
   1764 	return block;
   1765 }
   1766 
   1767 Ast parse_function_body(Symbol fn) {
   1768 	Ast node;
   1769 	ctx.fn = fn;
   1770 	ctx.local_stack = 0;
   1771 	ctx.alloc_stack = 0;
   1772 	scope_push(SCOPE_FUNC, fn->first); // scope for parameters
   1773 	scope_push(SCOPE_BLOCK, nil);      // top scope for function body
   1774 	node = parse_block();
   1775 	node->sym = scope_pop();
   1776 	scope_pop();
   1777 	ctx.fn = nil;
   1778 	fn->type->size = ctx.local_stack;
   1779 	return node;
   1780 }
   1781 
   1782 Symbol parse_param(String fname, u32 n, Symbol first, Symbol last) {
   1783 	String pname = parse_name("parameter name");
   1784 	Type ptype = parse_type(false);
   1785 
   1786 	// arrays and structs are always passed as reference parameters
   1787 	if ((ptype->kind == TYPE_ARRAY) || (ptype->kind == TYPE_RECORD)) {
   1788 		ptype = type_make_ptr(ptype);
   1789 	}
   1790 
   1791 	Symbol param = symbol_make(SYM_PARAM, 0, pname, ptype, /* 4 + */ n * 4);
   1792 
   1793 	Symbol sym = first;
   1794 	while (sym != nil) {
   1795 		if (sym->name == param->name) {
   1796 			error("duplicate parameter name '%s'", fname->text);
   1797 		}
   1798 		sym = sym->next;
   1799 	}
   1800 
   1801 	if (last != nil) {
   1802 		last->next = param;
   1803 	}
   1804 	return param;
   1805 }
   1806 
   1807 Ast parse_function() {
   1808 	Symbol first = nil;
   1809 	Symbol last = nil;
   1810 	u32 n = 0;
   1811 	String fname = parse_name("function name");
   1812 	Type rettype = ctx.type_void;
   1813 
   1814 	require(tOPAREN);
   1815 
   1816 	// process parameters
   1817 	if (ctx.tok != tCPAREN) {
   1818 		first = parse_param(fname, n, nil, nil);
   1819 		last = first;
   1820 		n++;
   1821 		while (ctx.tok == tCOMMA) {
   1822 			next();
   1823 			last = parse_param(fname, n, first, last);
   1824 			n++;
   1825 		}
   1826 	}
   1827 	require(tCPAREN);
   1828 
   1829 	if ((ctx.tok != tSEMI) && (ctx.tok != tOBRACE)) {
   1830 		rettype = parse_type(false);
   1831 	}
   1832 
   1833 	int isdef = 0;
   1834 	if (ctx.tok == tSEMI) {
   1835 		// declaration
   1836 		next();
   1837 	} else if (ctx.tok == tOBRACE) {
   1838 		// definition
   1839 		next();
   1840 		isdef = 1;
   1841 	} else {
   1842 		expected("semi or open brace");
   1843 	}
   1844 
   1845 	// Look for an existing declaration or definintion of this function
   1846 	Symbol sym = symbol_find(fname);
   1847 	if (sym != nil) {
   1848 		// such a named identifier exists
   1849 		// check to see if we are in agreement with it
   1850 		if (sym->kind != SYM_FUNC) {
   1851 			error("redefining '%s' as function", fname->text);
   1852 		}
   1853 		if (isdef && (sym->flags & SYM_IS_DEFINED)) {
   1854 			error("redefined function '%s'", fname->text);
   1855 		}
   1856 		if (rettype != sym->type->base) {
   1857 			error("func '%s' return type differs from decl", fname->text);
   1858 		}
   1859 		if (sym->type->len != n) {
   1860 			error("func '%s' parameter count differs from decl", fname->text);
   1861 		}
   1862 		Symbol pa = first;
   1863 		Symbol pb = sym->type->first;
   1864 		u32 i = 1;
   1865 		while ((pa != nil) && (pb != nil)) {
   1866 			if (!type_is_same(pa->type, pb->type)) {
   1867 				error("func '%s' param %u differs from decl", fname->text, i);
   1868 			}
   1869 			pa = pa->next;
   1870 			pb = pb->next;
   1871 		}
   1872 	} else {
   1873 		// if there was no existing record of this function, create one now
   1874 		Type type = type_make(TYPE_FUNC, rettype, n, 0);
   1875 		sym = symbol_make(SYM_FUNC, 0, fname, type, 0);
   1876 		sym->first = first;
   1877 		type->first = first;
   1878 		type->sym = sym;
   1879 		type->len = n; // parameter count
   1880 		symbol_add_global(sym);
   1881 	}
   1882 
   1883 	// handle definition if it is one
   1884 	if (isdef) {
   1885 		Ast node = ast_make_simple(AST_FUNC, 0);
   1886 		node->name = fname;
   1887 		node->sym = sym;
   1888 
   1889 		// mark as defined and save entry address
   1890 		sym->flags |= SYM_IS_DEFINED;
   1891 		node->c0 = parse_function_body(sym);
   1892 		return node;
   1893 	}
   1894 
   1895 	return nil;
   1896 }
   1897 
   1898 void parse_type_def() {
   1899 	String name = parse_name("type name");
   1900 	Type type = parse_type(false);
   1901 	Type prev = type_find(name);
   1902 	if (prev == nil) {
   1903 		type_add(type, name);
   1904 	} else {
   1905 		if (prev->kind != TYPE_UNDEFINED) {
   1906 			error("cannot redefine type '%s'\n", name->text);
   1907 		}
   1908 		prev->kind = type->kind;
   1909 		prev->base = type->base;
   1910 		prev->first = type->first;
   1911 		prev->len = type->len;
   1912 		prev->size = type->size;
   1913 		prev->sym->type = type;
   1914 		// XXX discard type
   1915 		type = prev;
   1916 	}
   1917 	require(tSEMI);
   1918 }
   1919 
   1920 void parse_enum_def() {
   1921 	require(tOBRACE);
   1922 	u32 val = 0;
   1923 	while (ctx.tok != tCBRACE) {
   1924 		String name = parse_name("enum tag name");
   1925 		Symbol sym = symbol_find(name);
   1926 		if (sym != nil) {
   1927 			error("cannot redefine %s as enum tag\n", name->text);
   1928 		}
   1929 		if (ctx.tok == tASSIGN) {
   1930 			next();
   1931 			Ast expr = parse_expr();
   1932 			val = ast_get_const_i32(expr);
   1933 			// typecheck? discard subtree
   1934 		}
   1935 		require(tCOMMA);
   1936 		symbol_add_global(symbol_make(SYM_CONST, 0, name, ctx.type_i32, val));
   1937 		val++;
   1938 	}
   1939 	require(tCBRACE);
   1940 	require(tSEMI);
   1941 }
   1942 
   1943 Ast parse_program() {
   1944 	Ast program = ast_make_simple(AST_PROGRAM, 0);
   1945 	Ast last = program;
   1946 	next();
   1947 	for (;;) {
   1948 		if (ctx.tok == tENUM) {
   1949 			next();
   1950 			parse_enum_def();
   1951 		} else if (ctx.tok == tTYPE) {
   1952 			next();
   1953 			parse_type_def();
   1954 		} else if (ctx.tok == tFUNC) {
   1955 			next();
   1956 			Ast node = parse_function();
   1957 			if (node != nil) {
   1958 				last->c2 = node;
   1959 				last = node;
   1960 			}
   1961 		} else if (ctx.tok == tVAR) {
   1962 			next();
   1963 			parse_global_var();
   1964 		} else if (ctx.tok == tEOF) {
   1965 			return program;
   1966 		} else {
   1967 			expected("function, variable, or type definition");
   1968 		}
   1969 	}
   1970 }
   1971 
   1972 void type_dump_compact(FILE* fp, Type type) {
   1973 	if (type->kind == TYPE_FUNC) {
   1974 		fprintf(fp, "fn(");
   1975 		Symbol param = type->first;
   1976 		while (param != nil) {
   1977 			type_dump_compact(fp, param->type);
   1978 			if (param->next != nil) {
   1979 				fprintf(fp, ", ");
   1980 			}
   1981 			param = param->next;
   1982 		}
   1983 		fprintf(fp, ") -> ");
   1984 		type_dump_compact(fp, type->base);
   1985 	} else if (type->sym != nil) {
   1986 		fprintf(fp, "%s", type->sym->name->text);
   1987 	} else if (type->kind == TYPE_ARRAY) {
   1988 		fprintf(fp, "[%u]", type->len);
   1989 		type_dump_compact(fp, type->base);
   1990 	} else if (type->kind == TYPE_RECORD) {
   1991 		fprintf(fp, "{...}");
   1992 	} else {
   1993 		fprintf(fp, "%s", type_kind[type->kind]);
   1994 		if ((type->kind == TYPE_POINTER) || (type->kind == TYPE_SLICE)) {
   1995 			type_dump_compact(fp, type->base);
   1996 		}
   1997 	}
   1998 }
   1999 
   2000 void ast_dump_syms(FILE* fp, Symbol sym, str tag, u32 indent) {
   2001 	while (sym != nil) {
   2002 		u32 i = 0;
   2003 		while (i < indent) { fprintf(fp, "  "); i++; }
   2004 		fprintf(fp, "%s '%s' ", tag, sym->name->text);
   2005 		type_dump_compact(fp, sym->type);
   2006 		fprintf(fp, "\n");
   2007 		sym = sym->next;
   2008 	}
   2009 }
   2010 
   2011 void ast_dump_rtype(FILE* fp, Symbol sym, u32 indent) {
   2012 	u32 i = 0;
   2013 	while (i < indent) { fprintf(fp, "  "); i++; }
   2014 	fprintf(fp, "Returns ");
   2015 	type_dump_compact(fp, sym->type->base);
   2016 	fprintf(fp, "\n");
   2017 }
   2018 
   2019 int _ast_dump(FILE* fp, Ast node, u32 indent, bool dumplist, Ast mark) {
   2020 	u32 i = 0;
   2021 	i32 r = 0;
   2022 
   2023 	if (mark == node) {
   2024 		while (i < indent) { fprintf(fp, ">>"); i++; }
   2025 	} else {
   2026 		while (i < indent) { fprintf(fp, "  "); i++; }
   2027 	}
   2028 	indent = indent + 1;
   2029 
   2030 	fprintf(fp, "%s ", ast_kind[node->kind]);
   2031 	if (node->kind == AST_SYMBOL) {
   2032 		fprintf(fp, "'%s' ", node->name->text);
   2033 		if (node->type != nil) {
   2034 			type_dump_compact(fp, node->type);
   2035 		}
   2036 		fprintf(fp, "\n");
   2037 	} else if (node->kind == AST_CONST) {
   2038 		fprintf(fp, "0x%x ", node->ival);
   2039 		if (node->type != nil) {
   2040 			type_dump_compact(fp, node->type);
   2041 		}
   2042 		fprintf(fp, "\n");
   2043 	} else {
   2044 		if (node->name) {
   2045 			fprintf(fp,"'%s' ",node->name->text);
   2046 		}
   2047 		if (node->type) {
   2048 			type_dump_compact(fp, node->type);
   2049 			fprintf(fp, " ");
   2050 		}
   2051 		if (node->ival) {
   2052 			fprintf(fp,"%u", node->ival);
   2053 		}
   2054 		fprintf(fp, "\n");
   2055 	}
   2056 	if (node->kind == AST_FUNC) {
   2057 		ast_dump_syms(fp, node->sym->first, "Param", indent);
   2058 		ast_dump_rtype(fp, node->sym, indent);
   2059 	}
   2060 	if (node->kind == AST_BLOCK) {
   2061 		ast_dump_syms(fp, node->sym, "Local", indent);
   2062 	}
   2063 	if (mark == node) {
   2064 		return 1;
   2065 	}
   2066 	if (node->c0 != nil) {
   2067 		if (_ast_dump(fp, node->c0, indent, true, mark)) {
   2068 			return 1;
   2069 		}
   2070 	}
   2071 	if (node->c1 != nil) {
   2072 		if (_ast_dump(fp, node->c1, indent, true, mark)) {
   2073 			return 1;
   2074 		}
   2075 	}
   2076 	if (dumplist) {
   2077 		node = node->c2;
   2078 		while (node != nil) {
   2079 			if (_ast_dump(fp, node, indent, false, mark)) {
   2080 				return 1;
   2081 			}
   2082 			node = node->c2;
   2083 		}
   2084 	}
   2085 	return 0;
   2086 }
   2087 
   2088 void ast_dump(FILE* fp, Ast node, Ast mark) {
   2089 	_ast_dump(fp, node, 0, true, mark);
   2090 }
   2091 
   2092 void ast_dump_node(FILE* fp, Ast node, bool dump_c2) {
   2093 	fprintf(fp, "\"%p\" [ label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n", node);
   2094 	fprintf(fp, "<TR><TD PORT=\"p0\" COLSPAN=\"3\">%s", ast_kind[node->kind]);
   2095 	if (node->kind == AST_CONST) {
   2096 		fprintf(fp, " 0x%x", node->ival);
   2097 	} else if (node->name != nil) {
   2098 		fprintf(fp, " %s", node->name->text);
   2099 	} else if (node->ival) {
   2100 		fprintf(fp, " %u", node->ival);
   2101 	}
   2102 	fprintf(fp, "</TD></TR>\n");
   2103 	if (node->type) {
   2104 		fprintf(fp, "<TR><TD COLSPAN=\"3\">");
   2105 		type_dump_compact(fp, node->type);
   2106 		fprintf(fp, "</TD></TR>");
   2107 	}
   2108 	fprintf(fp, "<TR><TD PORT=\"c0\"></TD>"
   2109 	        "<TD PORT=\"c1\"></TD>"
   2110 	        "<TD PORT=\"c2\"></TD></TR></TABLE>>; ];\n");
   2111 
   2112 	if (node->c0 != nil) {
   2113 		ast_dump_node(fp, node->c0, true);
   2114 		fprintf(fp, "\"%p\":c0 -> \"%p\" [ ];\n", node, node->c0);
   2115 	}
   2116 	if (node->c1 != nil) {
   2117 		ast_dump_node(fp, node->c1, true);
   2118 		fprintf(fp, "\"%p\":c1 -> \"%p\" [ ];\n", node, node->c1);
   2119 	}
   2120 	if ((node->c2 != nil) && dump_c2) {
   2121 		Ast list = node->c2;
   2122 		Ast prev = node;
   2123 		while (list != nil) {
   2124 			ast_dump_node(fp, list, false);
   2125 			fprintf(fp, "\"%p\":c2 -> \"%p\":p0 [ ];\n", prev, list);
   2126 			list = list->c2;
   2127 			prev = prev->c2;
   2128 		}
   2129 		fprintf(fp, "{ rank=same");
   2130 		if (node->c0 != nil) {
   2131 			fprintf(fp, " \"%p\"", node->c0);
   2132 		}
   2133 		if (node->c1 != nil) {
   2134 			fprintf(fp, " \"%p\"", node->c1);
   2135 		}
   2136 		list = node->c2;
   2137 		while (list != nil) {
   2138 			fprintf(fp, " \"%p\"", list);
   2139 			list = list->c2;
   2140 		}
   2141 		fprintf(fp," }\n");
   2142 	}
   2143 }
   2144 
   2145 void ast_dump_graph(FILE* fp, Ast node) {
   2146 	fprintf(fp,
   2147 "digraph g {\n"
   2148 "graph [ rankdir=TB; ];\n"
   2149 "node [ shape=plain; ];\n"
   2150 );
   2151 	ast_dump_node(fp, node, false);
   2152 	fprintf(fp, "}\n");
   2153 }
   2154 
   2155 void ast_dump_graphs(Ast node) {
   2156 	node = node->c2;
   2157 	while (node != nil) {
   2158 		if (node->kind == AST_FUNC) {
   2159 			char tmp[256];
   2160 			sprintf(tmp, "%s.ast.dot", node->sym->name->text);
   2161 			FILE* fp;
   2162 			if ((fp = fopen(tmp, "w")) != nil) {
   2163 				ast_dump_graph(fp, node);
   2164 				fclose(fp);
   2165 			}
   2166 		}
   2167 		node = node->c2;
   2168 	}
   2169 }
   2170 
   2171 #if IR
   2172 #include "codegen-ir.c"
   2173 #else
   2174 #include "codegen-risc5-simple.c"
   2175 #endif
   2176 
   2177 #if 0
   2178 void type_dump_all() {
   2179 	Symbol sym = ctx.typetab;
   2180 
   2181 	while (sym != nil) {
   2182 		fprintf(stderr, "Symbol %p '%s'\n", sym, sym->name->text);
   2183 		fprintf(stderr, "  Type %p %s len=%u sz=%u\n",
   2184 		        sym->type, type_id_tab[sym->type->kind],
   2185 		        sym->type->len, sym->type->size);
   2186 		fprintf(stderr, "       sym=%p first=%p\n",
   2187 		        sym->type->sym, sym->type->first);
   2188 		sym = sym->next;
   2189 	}
   2190 }
   2191 #endif
   2192 
   2193 // ================================================================
   2194 
   2195 i32 main(int argc, args argv) {
   2196 	str outname = "out.bin";
   2197 	str lstname = nil;
   2198 	str srcname = nil;
   2199 	str astname = nil;
   2200 	bool dump = false;
   2201 	bool scan_only = false;
   2202 	bool dump_graphs = false;
   2203 
   2204 	ctx_init();
   2205 	ctx.filename = "<commandline>";
   2206 
   2207 	while (argc > 1) {
   2208 		if (!strcmp(argv[1],"-o")) {
   2209 			if (argc < 2) {
   2210 				error("option -o requires argument");
   2211 			}
   2212 			outname = argv[2];
   2213 			argc--;
   2214 			argv++;
   2215 		} else if (!strcmp(argv[1], "-l")) {
   2216 			if (argc < 2) {
   2217 				error("option -l requires argument");
   2218 			}
   2219 			lstname = argv[2];
   2220 			argc--;
   2221 			argv++;
   2222 		} else if (!strcmp(argv[1], "-a")) {
   2223 			if (argc < 2) {
   2224 				error("option -a requires argument");
   2225 			}
   2226 			astname = argv[2];
   2227 			argc--;
   2228 			argv++;
   2229 		} else if (!strcmp(argv[1], "-p")) {
   2230 			dump = true;
   2231 		} else if (!strcmp(argv[1], "-g")) {
   2232 			dump_graphs = true;
   2233 		} else if (!strcmp(argv[1], "-v")) {
   2234 			ctx.flags |= cfTraceCodeGen;
   2235 		} else if (!strcmp(argv[1], "-s")) {
   2236 			scan_only = true;
   2237 		} else if (!strcmp(argv[1], "-A")) {
   2238 			ctx.flags |= cfAbortOnError;
   2239 		} else if (argv[1][0] == '-') {
   2240 			error("unknown option: %s", argv[1]);
   2241 		} else {
   2242 			if (srcname != nil) {
   2243 				error("multiple source files disallowed");
   2244 			} else {
   2245 				srcname = argv[1];
   2246 			}
   2247 		}
   2248 		argc--;
   2249 		argv++;
   2250 	}
   2251 
   2252 	if (srcname == nil) {
   2253 		printf(
   2254 "usage:    compiler [ <option> | <sourcefilename> ]*\n"
   2255 "\n"
   2256 "options:  -o <filename>    binary output (default 'out.bin')\n"
   2257 "          -l <filename>    listing output (default none)\n"
   2258 "          -a <filename>    dump AST tree\n"
   2259 "          -v               trace code generation\n"
   2260 "          -s               scan only\n"
   2261 "          -p               dump type context\n"
   2262 "          -A               abort on error\n");
   2263 		return 0;
   2264 	}
   2265 	ctx.filename = srcname;
   2266 
   2267 	ctx_open_source(srcname);
   2268 	ctx.linenumber = 1;
   2269 	ctx.lineoffset = 0;
   2270 	// prime the lexer
   2271 	scan();
   2272 
   2273 	if (scan_only) {
   2274 		ctx.flags |= 1;
   2275 		while (true) {
   2276 			next();
   2277 			token_print();
   2278 			if (ctx.tok == tEOF) {
   2279 				printf("\n");
   2280 				return 0;
   2281 			}
   2282 		}
   2283 	}
   2284 
   2285 	Ast a = parse_program();
   2286 
   2287 	if (astname != nil) {
   2288 		FILE *fp;
   2289 		if ((fp = fopen(astname, "w")) != nil) {
   2290 			ast_dump(fp, a, nil);
   2291 			fclose(fp);
   2292 		}
   2293 	}
   2294 
   2295 	if (dump_graphs) {
   2296 		ast_dump_graphs(a);
   2297 	}
   2298 
   2299 	gen_program(a);
   2300 
   2301 	binary_write(outname);
   2302 
   2303 	if (lstname != nil) {
   2304 		listing_write(lstname, ctx.filename);
   2305 	}
   2306 
   2307 	//type_dump_all();
   2308 
   2309 	return 0;
   2310 }