compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit f6aa98fb22a36686f04b8f279c6f9bd5eeff855b
parent bd7b4a25f295fa9041078f9cb4a9865ac84bad19
Author: Brian Swetland <swetland@frotz.net>
Date:   Fri, 26 Nov 2021 13:33:27 -0800

compiler2: rewinding to explore a different approach

This is compiler.c trimmed back to just the lexical scanner,
string table, main(), and a couple helpers.  The parser
is present, #if 0'd out, and will be used as the template
for a new AST-generating parser.

The straightline, emit code directly from the parser approach
was working, but getting increasingly brittle.  Introducing
an AST should simplify a number of operations, and allow for
many things that are infeasible to do in a single pass.

Diffstat:
MMakefile | 8+++++++-
Asrc/compiler2.c | 1521+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1528 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ -all: bin/compiler bin/fs bin/r5d bin/r5e bin/mkinstab out/test/summary.txt +all: bin/compiler bin/compiler2 bin/fs bin/r5d bin/r5e bin/mkinstab out/test/summary.txt clean: rm -rf bin out @@ -13,12 +13,18 @@ out/compiler.txt: src/compiler.c bin/preproc bin/preproc < src/compiler.c > out/compiler.txt CFLAGS := -Wall -O2 -g -Iexternal/oberon-risc-emu -Isrc -fno-builtin +CFLAGS += -Wno-unused-but-set-variable + CC := gcc bin/compiler: src/compiler.c src/risc5dis.c out/risc5ins.h @mkdir -p bin $(CC) -o $@ $(CFLAGS) -Wno-unused-result -DC src/compiler.c src/risc5dis.c +bin/compiler2: src/compiler2.c + @mkdir -p bin + $(CC) -o $@ $(CFLAGS) -Wno-unused-result -DC src/compiler2.c + bin/rewriter: src/rewriter.c @mkdir -p bin $(CC) -o $@ $(CFLAGS) src/rewriter.c diff --git a/src/compiler2.c b/src/compiler2.c @@ -0,0 +1,1521 @@ +// Copyright 2020, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +#if C +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdbool.h> +#include <strings.h> +#include <string.h> + +#include <fcntl.h> +#include <unistd.h> +#include <sys/stat.h> + +//#include "risc5.h" + +// builtin types +#define nil 0 +typedef uint32_t u32; +typedef int32_t i32; +typedef uint8_t u8; + +// rewriter only handles single word types +typedef uint32_t token_t; +typedef char* str; +typedef char** args; +typedef uint32_t* u32ptr; +#endif + +// token classes (tok & tcMASK) +enum { + tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, + tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, +}; + +enum { + // EndMarks, Braces, Brackets Parens + tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, + // RelOps (do not reorder) + tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, + // AddOps (do not reorder) + tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, + // MulOps (do not reorder) + tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F, + // AsnOps (do not reorder) + tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, + tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F, + // Various, UnaryNot, LogicalOps, + tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, + tASSIGN, tINC, tDEC, + // Keywords + tTYPE, tFUNC, tSTRUCT, tVAR, tENUM, + tIF, tELSE, tWHILE, + tBREAK, tCONTINUE, tRETURN, + tFOR, tSWITCH, tCASE, + tTRUE, tFALSE, tNIL, + tIDN, tNUM, tSTR, + // used internal to the lexer but never returned + tSPC, tINV, tDQT, tSQT, tMSC, +}; + +str tnames[] = { + "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", + "==", "!=", "<", "<=", ">", ">=", "", "", + "+", "-", "|", "^", "", "", "", "", + "*", "/", "%", "&", "&~", "<<", ">>", "", + "+=", "-=", "|=", "^=", "", "", "", "", + "*=", "/=", "%=", "&=", "&~=", "<<=", ">>=", "", + ";", ":", ".", ",", "~", "&&", "||", "!", + "=", "++", "--", + "type", "func", "struct", "var", "enum", + "if", "else", "while", + "break", "continue", "return", + "for", "switch", "case", + "true", "false", "nil", + "<ID>", "<NUM>", "<STR>", + "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", +}; + +u8 lextab[256] = { + tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, + tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, + tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, + tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, + tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, + tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, +}; + +typedef struct StringRec StringRec; +typedef struct CtxRec CtxRec; + +typedef struct StringRec* String; +typedef struct CtxRec* Ctx; + +struct StringRec { + String next; + u32 len; + char text[0]; +}; + +// ------------------------------------------------------------------ + +struct CtxRec { + const char* filename; // filename of active source + int fd; + + u8 iobuffer[1024]; // scanner file io buffer + u32 ionext; + u32 iolast; + + u32 linenumber; // line number of most recent line + u32 lineoffset; // position of start of most recent line + u32 byteoffset; // position of the most recent character + u32 flags; + u32 cc; // scanner: next character + + token_t tok; // most recent token + u32 num; // used for tNUM + char tmp[256]; // used for tIDN, tSTR; + String ident; // used for tIDN + + String strtab; // TODO: hashtable + + String idn_if; + String idn_for; + String idn_var; + String idn_nil; + String idn_case; + String idn_func; + String idn_else; + String idn_enum; + String idn_true; + String idn_type; + String idn_break; + String idn_while; + String idn_false; + String idn_switch; + String idn_struct; + String idn_return; + String idn_continue; +}; + +enum { + cfVisibleEOL = 1, + cfAbortOnError = 2, + cfTraceCodeGen = 3, +}; + +CtxRec ctx; + +String string_make(const char* text, u32 len) { + // OPT obviously this wants to be a hash table + String str = ctx.strtab; + while (str != nil) { + if ((str->len == len) && (memcmp(text, str->text, len) == 0)) { + return str; + } + str = str->next; + } + + str = malloc(sizeof(StringRec) + len + 1); + str->len = len; + memcpy(str->text, text, len); + str->text[len] = 0; + str->next = ctx.strtab; + ctx.strtab = str; + + return str; +} + +void ctx_init() { + memset(&ctx, 0, sizeof(ctx)); + + // pre-intern keywords + ctx.idn_if = string_make("if", 2); + ctx.idn_for = string_make("for", 3); + ctx.idn_var = string_make("var", 3); + ctx.idn_nil = string_make("nil", 3); + ctx.idn_case = string_make("case", 4); + ctx.idn_func = string_make("func", 4); + ctx.idn_else = string_make("else", 4); + ctx.idn_enum = string_make("enum", 4); + ctx.idn_true = string_make("true", 4); + ctx.idn_type = string_make("type", 4); + ctx.idn_break = string_make("break", 5); + ctx.idn_while = string_make("while", 5); + ctx.idn_false = string_make("false", 5); + ctx.idn_switch = string_make("switch", 6); + ctx.idn_struct = string_make("struct", 6); + ctx.idn_return = string_make("return", 6); + ctx.idn_continue = string_make("continue", 8); +} + +void dump_file_line(const char* fn, u32 offset); + +#if C +void error(const char *fmt, ...) { +#else +void error(const char *fmt) { +#endif + va_list ap; + + fprintf(stderr,"%s:%d: ", ctx.filename, ctx.linenumber); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + if (ctx.linenumber > 0) { + // dump_file_line(ctx.filename, ctx.lineoffset); + } + fprintf(stderr, "\n"); + + if (ctx.flags & cfAbortOnError) { + abort(); + } else { + exit(1); + } +} + +void ctx_open_source(const char* filename) { + ctx.filename = filename; + ctx.linenumber = 0; + + if (ctx.fd >= 0) { + close(ctx.fd); + } + ctx.fd = open(filename, O_RDONLY); + if (ctx.fd < 0) { + error("cannot open file '%s'", filename); + } + ctx.ionext = 0; + ctx.iolast = 0; + ctx.linenumber = 1; + ctx.lineoffset = 0; + ctx.byteoffset = 0; +} + +i32 unhex(u32 ch) { + if ((ch >= '0') && (ch <= '9')) { + return ch - '0'; + } + if ((ch >= 'a') && (ch <= 'f')) { + return ch - 'a' + 10; + } + if ((ch >= 'A') && (ch <= 'F')) { + return ch - 'A' + 10; + } + return -1; +} + +u32 scan() { + while (ctx.ionext == ctx.iolast) { + if (ctx.fd < 0) { + ctx.cc = 0; + return ctx.cc; + } + int r = read(ctx.fd, ctx.iobuffer, sizeof(ctx.iobuffer)); + if (r <= 0) { + ctx.fd = -1; + } else { + ctx.iolast = r; + ctx.ionext = 0; + } + } + ctx.cc = ctx.iobuffer[ctx.ionext]; + ctx.ionext++; + ctx.byteoffset++; + return ctx.cc; +} + +u32 unescape(u32 n) { + if (n == 'n') { + return 10; + } else if (n == 'r') { + return 13; + } else if (n == 't') { + return 9; + } else if (n == '"') { + return '"'; + } else if (n == '\'') { + return '\''; + } else if (n == '\\') { + return '\\'; + } else if (n == 'x') { + int x0 = unhex(scan()); + int x1 = unhex(scan()); + if ((x0 < 0) || (x1 < 0)) { + error("invalid hex escape"); + } + return (x0 << 4) | x1; + } else { + error("invalid escape 0x%02x", n); + return 0; + } +} + +token_t scan_string(u32 cc, u32 nc) { + u32 n = 0; + while (true) { + if (nc == '"') { + break; + } else if (nc == 0) { + error("unterminated string"); + } else if (nc == '\\') { + ctx.tmp[n] = unescape(scan()); + } else { + ctx.tmp[n] = nc; + } + nc = scan(); + n++; + if (n == 255) { + error("constant string too large"); + } + } + ctx.tmp[n] = 0; + return tSTR; +} + +token_t scan_keyword(u32 len) { + ctx.tmp[len] = 0; + String idn = string_make(ctx.tmp, len); + ctx.ident = idn; + + if (len == 2) { + if (idn == ctx.idn_if) { return tIF; }; + } else if (len == 3) { + if (idn == ctx.idn_for) { return tFOR; } + if (idn == ctx.idn_var) { return tVAR; } + if (idn == ctx.idn_nil) { return tNIL; } + } else if (len == 4) { + if (idn == ctx.idn_case) { return tCASE; } + if (idn == ctx.idn_func) { return tFUNC; } + if (idn == ctx.idn_else) { return tELSE; } + if (idn == ctx.idn_enum) { return tENUM; } + if (idn == ctx.idn_true) { return tTRUE; } + if (idn == ctx.idn_type) { return tTYPE; } + } else if (len == 5) { + if (idn == ctx.idn_break) { return tBREAK; } + if (idn == ctx.idn_while) { return tWHILE; } + if (idn == ctx.idn_false) { return tFALSE; } + } else if (len == 6) { + if (idn == ctx.idn_switch) { return tSWITCH; } + if (idn == ctx.idn_struct) { return tSTRUCT; } + if (idn == ctx.idn_return) { return tRETURN; } + } else if (len == 8) { + if (idn == ctx.idn_continue) { return tCONTINUE; } + } + return tIDN; +} + +token_t scan_number(u32 cc, u32 nc) { + u32 n = 1; + u32 val = cc - '0'; + + if ((cc == '0') && (nc == 'b')) { // binary + nc = scan(); + while ((nc == '0') || (nc == '1')) { + val = (val << 1) | (nc - '0'); + nc = scan(); + n++; + if (n == 34) { + error("binary constant too large"); + } + } + } else if ((cc == '0') && (nc == 'x')) { // hex + nc = scan(); + while (true) { + int tmp = unhex(nc); + if (tmp == -1) { + break; + } + val = (val << 4) | tmp; + nc = scan(); + n++; + if (n == 10) { + error("hex constant too large"); + } + } + } else { // decimal + while (lextab[nc] == tNUM) { + u32 tmp = (val * 10) + (nc - '0'); + if (tmp <= val) { + error("decimal constant too large"); + } + val = tmp; + nc = scan(); + n++; + } + } + ctx.num = val; + return tNUM; +} + +token_t scan_ident(u32 cc, u32 nc) { + ctx.tmp[0] = cc; + u32 n = 1; + + while (true) { + u32 tok = lextab[nc]; + if ((tok == tIDN) || (tok == tNUM)) { + ctx.tmp[n] = nc; + n++; + if (n == 32) { error("identifier too large"); } + nc = scan(); + } else { + break; + } + } + return scan_keyword(n); +} + +token_t _next() { + u8 nc = ctx.cc; + while (true) { + u8 cc = nc; + nc = scan(); + u32 tok = lextab[cc]; + if (tok == tNUM) { // 0..9 + return scan_number(cc, nc); + } else if (tok == tIDN) { // _ A..Z a..z + return scan_ident(cc, nc); + } else if (tok == tDQT) { // " + return scan_string(cc, nc); + } else if (tok == tSQT) { // ' + ctx.num = nc; + if (nc == '\\') { + ctx.num = unescape(scan()); + } + nc = scan(); + if (nc != '\'') { + error("unterminated character constant"); + } + nc = scan(); + return tNUM; + } else if (tok == tPLUS) { + if (nc == '+') { tok = tINC; nc = scan(); } + } else if (tok == tMINUS) { + if (nc == '-') { tok = tDEC; nc = scan(); } + } else if (tok == tAMP) { + if (nc == '&') { tok = tAND; nc = scan(); } + else if (nc == '~') { tok = tANDNOT; nc = scan(); } + } else if (tok == tPIPE) { + if (nc == '|') { tok = tOR; nc = scan(); } + } else if (tok == tGT) { + if (nc == '=') { tok = tGE; nc = scan(); } + else if (nc == '>') { tok = tRIGHT; nc = scan(); } + } else if (tok == tLT) { + if (nc == '=') { tok = tLE; nc = scan(); } + else if (nc == '<') { tok = tLEFT; nc = scan(); } + } else if (tok == tASSIGN) { + if (nc == '=') { tok = tEQ; nc = scan(); } + } else if (tok == tBANG) { + if (nc == '=') { tok = tNE; nc = scan(); } + } else if (tok == tSLASH) { + if (nc == '/') { + // comment -- consume until EOL or EOF + while ((nc != '\n') && (nc != 0)) { + nc = scan(); + } + continue; + } + } else if (tok == tEOL) { + ctx.linenumber++; + ctx.lineoffset = ctx.byteoffset; + //ctx.xref[ctx.pc / 4] = ctx.linenumber; + if (ctx.flags & cfVisibleEOL) { + return tEOL; + } + continue; + } else if (tok == tSPC) { + continue; + } else if ((tok == tMSC) || (tok == tINV)) { + error("unknown character 0x%02x", cc); + } + + // if we're an AddOp or MulOp, followed by an '=' + if (((tok & 0xF0) == 0x20) && (nc == '=')) { + nc = scan(); + // transform us to a XEQ operation + tok = tok + 0x10; + } + + return tok; + } +} + +token_t next() { + return (ctx.tok = _next()); +} + +void token_printstr(void) { + u32 n = 0; + printf("\""); + while (n < 256) { + u32 ch = ctx.tmp[n]; + if (ch == 0) { + break; + } else if ((ch < ' ') || (ch > '~')) { + printf("\\x%02x", ch); + } else if ((ch == '"') || (ch == '\\')) { + printf("\\%c", ch); + } else { + printf("%c", ch); + } + n++; + } + printf("\""); +} + +void token_print(void) { + if (ctx.tok == tNUM) { + printf("#%u ", ctx.num); + } else if (ctx.tok == tIDN) { + printf("@%s ", ctx.tmp); + } else if (ctx.tok == tEOL) { + printf("\n"); + } else if (ctx.tok == tSTR) { + token_printstr(); + } else { + printf("%s ", tnames[ctx.tok]); + } +} + +void expected(const char* what) { + error("expected %s, found %s", what, tnames[ctx.tok]); +} + +void expect(token_t tok) { + if (ctx.tok != tok) { + error("expected %s, found %s", tnames[tok], tnames[ctx.tok]); + } +} + +void require(token_t tok) { + expect(tok); + next(); +} + +// ================================================================ + +#if 0 +void parse_expr(Item x); + +String parse_name(const char* what) { + if (ctx.tok != tIDN) { + error("expected %s, found %s %u", what, tnames[ctx.tok], ctx.tok); + } + String str = ctx.ident; + next(); + return str; +} + +void parse_operand(Item x) { + if (ctx.tok == tNUM) { + set_item(x, iConst, ctx.type_int32, 0, ctx.num, 0); + } else if (ctx.tok == tSTR) { + error("<TODO> string const"); + } else if (ctx.tok == tTRUE) { + set_item(x, iConst, ctx.type_bool, 0, 1, 0); + } else if (ctx.tok == tFALSE) { + set_item(x, iConst, ctx.type_bool, 0, 0, 0); + } else if (ctx.tok == tNIL) { + set_item(x, iConst, ctx.type_nil, 0, 0, 0); + } else if (ctx.tok == tOPAREN) { + next(); + parse_expr(x); + require(tCPAREN); + return; + } else if (ctx.tok == tIDN) { + String str = ctx.ident; + Object obj = find(str); + if (obj == nil) { + error("unknown identifier '%s'", str->text); + } + gen_item_from_obj(x, obj); + } else { + error("invalid expression"); + } + next(); +} + +void dereference(Item x, String name) { + if (x->kind != iRegInd) { + error("internal: cannot deref via item kind %u", x->kind); + } + + if (x->type->kind == tRecord) { + Object field = x->type->first; + while (field != nil) { + if (field->name == name) { + x->type = field->type; + x->a = x->a + field->value; + return; + } + field = field->next; + } + error("field '%s' does not exist", name->text); + } else { + error("internal: cannot deref non-structs"); + } +} + +void parse_primary_expr(Item x) { + parse_operand(x); + while (true) { + if (ctx.tok == tOPAREN) { + next(); + if (x->kind != iFunc) { + error("cannot call non-function"); + } + // TODO ptr-to-func + u32 n = 0; + Object param = x->type->first; + while (param != nil) { + if (ctx.tok == tCPAREN) { + error("too few parameters for %s()", x->type->obj->name->text); + } + if (n != 0) { + require(tCOMMA); + } + ItemRec y; + parse_expr(&y); + if (!compatible_type(param->type, y.type, &y)) { + error("incompatible type for parameter '%s'\n", param->name->text); + } + if (param->type->kind == tArray) { + gen_ref_param(n, &y); + } else { + gen_param(n, &y); + } + param = param->next; + n++; + } + require(tCPAREN); + gen_call(x); + } else if (ctx.tok == tDOT) { + next(); + String name = parse_name("field name"); + if (x->type->kind == tRecord) { + dereference(x, name); + } else if ((x->type->kind == tPointer) && + (x->type->base->kind == tRecord)) { + gen_deref_ptr(x); + dereference(x, name); + } else { + error("can only use '.' with struct or pointer-to-struct"); + } + } else if (ctx.tok == tOBRACK) { + next(); + ItemRec y; + parse_expr(&y); + require(tCBRACK); + if (x->type->kind != tArray) { + error("can only use [] with array"); + } + if (x->kind != iRegInd) { + error("internal: cannot index via item kind %u", x->kind); + } + if (y.kind == iConst) { + if (y.a >= x->type->len) { + error("array index out of range"); + } + x->a = x->a + y.a * x->type->base->size; + x->type = x->type->base; + } else { + gen_index(x, &y); + } + } else { + break; + } + } +} + +void parse_unary_expr(Item x) { + if (ctx.tok == tPLUS) { + next(); + parse_unary_expr(x); + } else if ((ctx.tok == tMINUS) || (ctx.tok == tBANG) || (ctx.tok == tNOT)) { + u32 op = ctx.tok; + next(); + parse_unary_expr(x); + if ((x->kind == iComp) && (op == tBANG)) { + x->r = invert_relop(x->r); + } else { + gen_unary_op(op, x); + } + } else if (ctx.tok == tAMP) { + next(); + parse_unary_expr(x); + gen_get_ptr(x); + } else { + parse_primary_expr(x); + } +} + +void parse_mul_expr(Item x) { + parse_unary_expr(x); + while ((ctx.tok & tcMASK) == tcMULOP) { + u32 mulop = ctx.tok - tSTAR; + next(); + ItemRec y; + parse_unary_expr(&y); + gen_mul_op(mulop, x, &y); + } +} + +void parse_add_expr(Item x) { + parse_mul_expr(x); + while ((ctx.tok & tcMASK) == tcADDOP) { + u32 addop = ctx.tok - tPLUS; + next(); + ItemRec y; + parse_mul_expr(&y); + gen_add_op(addop, x, &y); + } +} + +void parse_rel_expr(Item x) { + parse_add_expr(x); + if ((ctx.tok & tcMASK) == tcRELOP) { + u32 relop = ctx.tok - tEQ; + next(); + ItemRec y; + parse_add_expr(&y); + gen_rel_op(relop, x, &y); + } +} + +void parse_and_expr(Item x) { + parse_rel_expr(x); + if (ctx.tok == tAND) { + Scope outer = push_scope(sBlock, nil); + + // if !x goto nope + gen_branch_cond(x, false); + add_scope_fixup(outer); + + while (ctx.tok == tAND) { + next(); + ItemRec y; + + // if !y goto nope + parse_rel_expr(&y); + gen_branch_cond(&y, false); + add_scope_fixup(outer); + } + // res = true, goto done + gen_load_bool1(x, true); + u32 l0_true = gen_branch_fwd(); + + // nope: res = false + pop_scope(); + gen_load_bool2(x); + + // done: + fixup_branch_fwd(l0_true); + } +} + +void parse_expr(Item x) { + parse_and_expr(x); + if (ctx.tok == tOR) { + Scope outer = push_scope(sBlock, nil); + + // if x goto yup + gen_branch_cond(x, true); + add_scope_fixup(outer); + + while (ctx.tok == tOR) { + next(); + ItemRec y; + + // if y goto yup + parse_and_expr(&y); + gen_branch_cond(&y, true); + add_scope_fixup(outer); + } + // res = false, goto done + gen_load_bool1(x, false); + u32 l0_false = gen_branch_fwd(); + + // yup: res = true + pop_scope(); + gen_load_bool2(x); + + // done: + fixup_branch_fwd(l0_false); + } +} + +Type find_type(String name) { + Object obj = ctx.typetab; + while (obj != nil) { + if (obj->name == name) { + return obj->type; + } + obj = obj->next; + } + return nil; +} + +// fwd_ref_ok indicates that an undefined typename +// may be treated as a forward reference. This is +// only used for pointers (size their size does not +// depend on their target). +Type parse_type(bool fwd_ref_ok); + +Type parse_struct_type() { + Type rectype = make_type(tRecord, nil, nil, nil, 0, 0); + Object last = nil; + require(tOBRACE); + while (true) { + if (ctx.tok == tCBRACE) { + next(); + break; + } + String name = parse_name("field name"); + Type type = parse_type(false); + Object field = make_object(oField, name, type, nil, 0, rectype->size); + + // TODO sub-word packing + rectype->size += (type->size + 3) & (~3); + rectype->len++; + + // add field to record + if (last == nil) { + rectype->first = field; + } else { + last->next = field; + } + last = field; + + if (ctx.tok != tCBRACE) { + require(tCOMMA); + } + } + return rectype; +} + +Type parse_array_type() { + if (ctx.tok == tCBRACK) { + next(); + return make_type(tSlice, parse_type(false), nil, nil, 0, 8); + } else { + ItemRec x; + parse_expr(&x); + require(tCBRACK); + if ((x.kind != iConst) || (x.type != ctx.type_int32)) { + error("array size must be integer constant"); + } + //XXX check for >0 + Type base = parse_type(false); + u32 sz = x.a * base->size; + if (sz < x.a) { + error("array size overflow"); + } + return make_type(tArray, base, nil, nil, x.a, sz); + } +} + +Type parse_func_type() { + error("<TODO> func type"); + return nil; +} + +Type parse_type(bool fwd_ref_ok) { + if (ctx.tok == tSTAR) { // pointer-to + next(); + return make_type(tPointer, parse_type(true), nil, nil, 0, 4); + } else if (ctx.tok == tOBRACK) { // array-of + next(); + return parse_array_type(); + } else if (ctx.tok == tFUNC) { + next(); + return parse_func_type(); + } else if (ctx.tok == tSTRUCT) { + next(); + return parse_struct_type(); + } else if (ctx.tok == tIDN) { + String name = ctx.ident; + next(); + Type type = find_type(name); + if (type == nil) { + if (fwd_ref_ok) { + type = make_type(tUndefined, nil, nil, nil, 0, 0); + add_type(type, name); + } else { + error("undefined type '%s' not usable here", name->text); + } + } + return type; + } else { + expected("type"); + return nil; + } +} + +void parse_block(); + +void parse_while() { + ItemRec x; + u32 save = ctx.pc_continue; + ctx.pc_continue = ctx.pc; // for backward branch + + parse_expr(&x); + u32 l1_br_false = gen_branch_cond(&x, false); + + require(tOBRACE); + push_scope(sLoop, nil); + parse_block(); + gen_branch_back(ctx.pc_continue); + pop_scope(); + + fixup_branch_fwd(l1_br_false); + + ctx.pc_continue = save; +} + +void parse_if() { + Scope outer = push_scope(sBlock, nil); + + ItemRec x; + parse_expr(&x); + // branch over "if" code + u32 l0_br_false = gen_branch_cond(&x, false); + + // generate "if" code + require(tOBRACE); + push_scope(sBlock, nil); + parse_block(); + pop_scope(); + + // branch past "else" code + gen_branch_fwd(); + add_scope_fixup(outer); + + while (ctx.tok == tELSE) { + next(); + fixup_branch_fwd(l0_br_false); + if (ctx.tok == tIF) { + next(); + parse_expr(&x); + // branch over "if" code + l0_br_false = gen_branch_cond(&x, false); + + // generate "if else" code + require(tOBRACE); + push_scope(sBlock, nil); + parse_block(); + pop_scope(); + + // branch past "else" code + gen_branch_fwd(); + add_scope_fixup(outer); + } else { + // generate "else" code + require(tOBRACE); + push_scope(sBlock, nil); + parse_block(); + pop_scope(); + + // close outer scope + pop_scope(); + return; // no further fixups needed + } + } + fixup_branch_fwd(l0_br_false); + + // close outer scope + pop_scope(); +} + +void parse_return() { + ItemRec x; + if (ctx.tok == tSEMI) { + if (ctx.fn->type->base != ctx.type_void) { + error("function requires return type"); + } + next(); + x.type = ctx.type_void; + } else { + parse_expr(&x); + if (!compatible_type(ctx.fn->type->base, x.type, &x)) { + error("return types do not match"); + } + require(tSEMI); + } + gen_return(&x); +} + +void parse_break() { + // XXX break-to-labeled-loop support + gen_branch_fwd(); + Scope scope = find_scope(sLoop); + if (scope == nil) { + error("break must be used from inside a looping construct"); + } + add_scope_fixup(scope); + require(tSEMI); +} + +void parse_continue() { + // XXX continue-to-labeled-loop support + if (ctx.pc_continue == 0) { + error("continue must be used from inside a looping construct"); + } + gen_branch_back(ctx.pc_continue); + require(tSEMI); +} + +void STORE(u32 val, u32* ptr, u32 n, u32 sz) { + if (sz == 4) { + ptr[n >> 2] = val; + } else if (sz == 1) { + ((u8*)ptr)[n] = val; + } +} + +u32 parse_init_constexpr(Type type) { + if (type->size > 4) { + error("<TODO> larger init constexpr types"); + } + ItemRec x; + parse_expr(&x); + if (x.kind != iConst) { + error("non-constant initializer"); + } + return x.a; +} + +u32 parse_array_init(Object var, u32ptr data, u32 dmax, u32 sz) { + memset(data, 0, dmax); + u32 n = 0; + while (true) { + if (ctx.tok == tCBRACE) { + next(); + break; + } + if (n >= dmax) { + error("initializer too large"); + } + u32 v = parse_init_constexpr(var->type->base); + STORE(v, data, n, sz); + n += sz; + if (ctx.tok != tCBRACE) { + require(tCOMMA); + } + } + return n; +} + +void parse_struct_init(Object var, u32ptr data) { + memset(data, 0, var->type->size); + while (true) { + if (ctx.tok == tCBRACE) { + next(); + break; + } + String name = parse_name("field name"); + Object field = var->type->first; + while (true) { + if (field == nil) { + error("structure has no '%s' field", name->text); + } + if (field->name == name) { + break; + } + field = field->next; + } + require(tCOLON); + u32 v = parse_init_constexpr(field->type); + STORE(v, data, field->value, 4); + if (ctx.tok != tCBRACE) { + require(tCOMMA); + } + } +} + +void parse_local_var() { + String name = parse_name("variable name"); + // TODO: allow type inference + Type type = parse_type(false); + + Object lvar = make_var(oVar, name, type, 0, ctx.alloc_stack); + lvar->next = ctx.scope->first; + ctx.scope->first = lvar; + + ctx.alloc_stack = ctx.alloc_stack + type->size; + if (ctx.local_stack < ctx.alloc_stack) { + ctx.local_stack = ctx.alloc_stack; + } + + if (ctx.tok == tASSIGN) { + next(); + ItemRec x, y; + parse_expr(&x); + gen_item_from_obj(&y, lvar); + gen_store(&x, &y); + } + require(tSEMI); +} + +void parse_global_var() { + String name = parse_name("variable name"); + // TODO: allow type inference + Type type = parse_type(false); + + Object gvar = make_var(oGlobal, name, type, 0, ctx.alloc_global); + gvar->next = ctx.scope->first; + ctx.scope->first = gvar; + ctx.alloc_global = ctx.alloc_global + type->size; + ctx.alloc_global = (ctx.alloc_global + 3) & (~3); // round to word + + if (ctx.tok == tASSIGN) { + next(); + if (ctx.tok == tOBRACE) { + next(); + u32* data = ctx.data + (gvar->value >> 2); + if (type->kind == tArray) { + parse_array_init(gvar, data, type->size, type->base->size); + } else if (type->kind == tRecord) { + parse_struct_init(gvar, data); + } else { + error("cannot initialize this way"); + } + } else { + ItemRec x; + parse_expr(&x); + if (x.kind != iConst) { + error("non-constant global initializer"); + } + //TODO: check type/size compat + ctx.data[gvar->value >> 2] = x.a; + } + } + require(tSEMI); +} + +void parse_expr_statement() { + ItemRec x; + parse_expr(&x); + if (ctx.tok == tASSIGN) { + next(); + ItemRec y; + parse_expr(&y); + if (!compatible_type(x.type, y.type, &x)) { + error("incompatible type in assignment"); + } + gen_store(&y, &x); + } else if ((ctx.tok & tcMASK) == tcAEQOP) { + u32 op = ctx.tok - tADDEQ; + next(); + ItemRec y, z; + parse_expr(&y); + copy_item(&x, &z); + gen_add_op(op, &x, &y); + gen_store(&x, &z); + } else if ((ctx.tok & tcMASK) == tcMEQOP) { + u32 op = ctx.tok - tMULEQ; + next(); + ItemRec y, z; + parse_expr(&y); + copy_item(&x, &z); + gen_add_op(op, &x, &y); + gen_store(&x, &z); + } else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) { + ItemRec y, z; + set_item(&y, iConst, ctx.type_int32, 0, 1, 0); + copy_item(&x, &z); + if (ctx.tok == tINC) { + gen_add_op(aADD, &x, &y); + } else { + gen_add_op(aSUB, &x, &y); + } + gen_store(&x, &z); + next(); + } else { + gen_discard(&x); + } + require(tSEMI); +} + +void parse_block() { + while (true) { + if (ctx.tok == tCBRACE) { + next(); + break; + } else if (ctx.tok == tRETURN) { + next(); + parse_return(); + } else if (ctx.tok == tBREAK) { + next(); + parse_break(); + } else if (ctx.tok == tCONTINUE) { + next(); + parse_continue(); + } else if (ctx.tok == tWHILE) { + next(); + parse_while(); + } else if (ctx.tok == tIF) { + next(); + parse_if(); + } else if (ctx.tok == tVAR) { + next(); + parse_local_var(); + } else if (ctx.tok == tSEMI) { + next(); + // empty statement + } else { + parse_expr_statement(); + } + } +} + +void parse_function_body(Object fn) { + ctx.fn = fn; + gen_prologue(fn); + push_scope(sFunc, fn->first); // scope for parameters + push_scope(sBlock, nil); // top scope for function body + parse_block(); + pop_scope(); + pop_scope(); + gen_epilogue(fn); +} + +Object parse_param(String fname, u32 n, Object first, Object last) { + if (n == FNMAXARGS) { + error("too many parameters (%d) for '%s'", FNMAXARGS, fname->text); + } + String pname = parse_name("parameter name"); + Type ptype = parse_type(false); + Object param = make_var(oParam, pname, ptype, 0, 4 + n * 4); + + Object obj = first; + while (obj != nil) { + if (obj->name == param->name) { + error("duplicate parameter name '%s'", fname->text); + } + obj = obj->next; + } + + if (last != nil) { + last->next = param; + } + return param; +} + +void make_builtin(const char* name, u32 id, Type p0, Type p1, Type rtn) { + String fname = string_make(name, strlen(name)); + Type type = make_type(tFunc, rtn, nil, nil, 0, 0); + type->obj = make_object(oFunc, fname, type, nil, ofBuiltin, id); + + if (p0 != nil) { + Object param = make_var(oParam, string_make("a", 1), p0, 0, 0); + type->obj->first = param; + type->first = param; + type->len = 1; + if (p1 != nil) { + param->next = make_var(oParam, string_make("b", 1), p1, 0, 1); + type->len = 2; + } + } + make_global(type->obj); +} + +void parse_function() { + Object first = nil; + Object last = nil; + u32 n = 0; + String fname = parse_name("function name"); + Type rettype = ctx.type_void; + + require(tOPAREN); + + // process parameters + if (ctx.tok != tCPAREN) { + first = parse_param(fname, n, nil, nil); + last = first; + n++; + while (ctx.tok == tCOMMA) { + next(); + last = parse_param(fname, n, first, last); + n++; + } + } + require(tCPAREN); + + if ((ctx.tok != tSEMI) && (ctx.tok != tOBRACE)) { + rettype = parse_type(false); + } + + int isdef = 0; + if (ctx.tok == tSEMI) { + // declaration + next(); + } else if (ctx.tok == tOBRACE) { + // definition + next(); + isdef = 1; + } else { + expected("semi or open brace"); + } + + // Look for an existing declaration or definintion of this function + Object obj = find(fname); + if (obj != nil) { + // such a named identifier exists + // check to see if we are in agreement with it + if (obj->kind != oFunc) { + error("redefining '%s' as function", fname->text); + } + if (isdef && (obj->flags & ofDefined)) { + error("redefined function '%s'", fname->text); + } + if (rettype != obj->type->base) { + error("func '%s' return type differs from decl", fname->text); + } + if (obj->type->len != n) { + error("func '%s' parameter count differs from decl", fname->text); + } + Object pa = first; + Object pb = obj->type->first; + u32 i = 1; + while ((pa != nil) && (pb != nil)) { + if (!same_type(pa->type, pb->type)) { + error("func '%s' param %u differs from decl", fname->text, i); + } + pa = pa->next; + pb = pb->next; + } + } else { + // if there was no existing record of this function, create one now + Type type = make_type(tFunc, rettype, nil, first, n, 0); + obj = make_object(oFunc, fname, type, first, 0, 0); + type->obj = obj; + make_global(obj); + } + + // handle definition if it is one + if (isdef) { + // patch any forward references + fixup_branches_fwd(obj->fixups); + + // mark as defined and save entry address + obj->flags |= ofDefined; + obj->value = ctx.pc; + parse_function_body(obj); + } +} + +void parse_type_def() { + String name = parse_name("type name"); + Type type = parse_type(false); + Type prev = find_type(name); + if (prev == nil) { + add_type(type, name); + } else { + if (prev->kind != tUndefined) { + error("cannot redefine type '%s'\n", name->text); + } + prev->kind = type->kind; + prev->base = type->base; + prev->first = type->first; + prev->len = type->len; + prev->size = type->size; + prev->obj->type = type; + // XXX discard type + } + require(tSEMI); +} + +void parse_enum_def() { + require(tOBRACE); + u32 val = 0; + while (ctx.tok != tCBRACE) { + String name = parse_name("enum tag name"); + Object obj = find(name); + if (obj != nil) { + error("cannot redefine %s as enum tag\n", name->text); + } + if (ctx.tok == tASSIGN) { + next(); + val = parse_init_constexpr(ctx.type_int32); + } + require(tCOMMA); + obj = make_var(oConst, name, ctx.type_int32, 0, val); + obj->next = ctx.scope->first; + ctx.scope->first = obj; + val++; + } + require(tCBRACE); + require(tSEMI); +} + +void parse_program() { + next(); + for (;;) { + if (ctx.tok == tFUNC) { + next(); + parse_function(); + } else if (ctx.tok == tTYPE) { + next(); + parse_type_def(); + } else if (ctx.tok == tVAR) { + next(); + parse_global_var(); + } else if (ctx.tok == tENUM) { + next(); + parse_enum_def(); + } else if (ctx.tok == tEOF) { + return; + } else { + expected("function, variable, or type definition"); + } + } +} +#endif + +// ================================================================ + +// ================================================================ + +i32 main(int argc, args argv) { + str outname = "out.bin"; + str lstname = nil; + str srcname = nil; + bool dump = false; + bool scan_only = false; + + ctx_init(); + ctx.filename = "<commandline>"; + + while (argc > 1) { + if (!strcmp(argv[1],"-o")) { + if (argc < 2) { + error("option -o requires argument"); + } + outname = argv[2]; + argc--; + argv++; + } else if (!strcmp(argv[1], "-l")) { + if (argc < 2) { + error("option -l requires argument"); + } + lstname = argv[2]; + argc--; + argv++; + } else if (!strcmp(argv[1], "-p")) { + dump = true; + } else if (!strcmp(argv[1], "-v")) { + ctx.flags |= cfTraceCodeGen; + } else if (!strcmp(argv[1], "-s")) { + scan_only = true; + } else if (!strcmp(argv[1], "-A")) { + ctx.flags |= cfAbortOnError; + } else if (argv[1][0] == '-') { + error("unknown option: %s", argv[1]); + } else { + if (srcname != nil) { + error("multiple source files disallowed"); + } else { + srcname = argv[1]; + } + } + argc--; + argv++; + } + + if (srcname == nil) { + error("no file specified"); + } + ctx.filename = srcname; + + ctx_open_source(srcname); + ctx.linenumber = 1; + ctx.lineoffset = 0; + // prime the lexer + scan(); + + if (scan_only) { + ctx.flags |= 1; + while (true) { + next(); + token_print(); + if (ctx.tok == tEOF) { + printf("\n"); + return 0; + } + } + } + + return 0; +}