commit 5833bfe8d8eff3d9a08f440f5b3905c78587f613
parent 823f7bf3b95488383ae412406fb57acac9a47955
Author: Brian Swetland <swetland@frotz.net>
Date: Mon, 2 Mar 2020 11:34:42 -0800
wip tlc compiler
Diffstat:
A | docs/tlc.bnf | | | 80 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/tlc.c | | | 651 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | test/hello.tl | | | 18 | ++++++++++++++++++ |
3 files changed, 749 insertions(+), 0 deletions(-)
diff --git a/docs/tlc.bnf b/docs/tlc.bnf
@@ -0,0 +1,80 @@
+
+program := { vardef | funcdef }
+
+
+vardef := "var" ident [ ":" type ] [ "=" <expr> ] ";"
+
+
+funcdef := "func" ident "(" [ paramdef { "," paramdef } ] ")" [ type ] ( ";" | block )
+
+paramdef := ident ":" type
+
+
+recorddef := "record" [ "(" qualident ")" ] "{" { fielddef } "}"
+
+fielddef := ident ":" type ","
+
+
+type := ident [ arraydef | "&"]
+
+arraydef := "[" [ integer { "," integer } ] "]"
+
+
+expr := simpleexpr [ relation simpleexpr ]
+
+relation := "==" | "!=" | "<" | "<=" | ">" | ">="
+
+simpleexpr := [ "+" | "-" ] term { addop term }
+
+addop := "+" | "-" | "||"
+
+term := factor { mulop factor }
+
+mulop := "*" | "/" | "%" | "&&"
+
+factor := integer | string | special | designator [ params ] | "(" expr ")" | "~" factor
+
+special := "nil" | "true" | "false"
+
+
+integer := digit {digit} | "0" "x" {hexdigit} | "0" "b" {bindigit}
+
+hexdigit := "A" .. "F" | "a" .. "f" | "0" .. "9"
+
+bindigit := "0" | "1"
+
+digit := "0" .. "9"
+
+
+designator := qualident { selector }
+
+selector := "." ident | "[" explist "]" | "->"
+
+explist := expr { "," expr }
+
+params := "(" explist ")"
+
+
+qualident := [ indent "." ] ident
+
+ident := ( letter | "_" ) | { letter | digit | "_" }
+
+letter := "A" .. "Z" | "a" .. "z"
+
+statement := [ assignment | if | while | for | break | return ]
+
+assignment := designator "=" expr ";"
+
+if := "if" expr block { "else" "if" expr block } [ "else" block ]
+
+while := "while" expr block
+
+for := "for" [ expr ] ";" [ expr ] ";" [ expr ] block
+
+break := "break" ";"
+
+return := "return" expr ";"
+
+block := "{" { statement ";" } "}"
+
+
diff --git a/src/tlc.c b/src/tlc.c
@@ -0,0 +1,651 @@
+// Copyright 2020, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <strings.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#define FNMAXARGS 8
+
+typedef uint32_t u32;
+typedef int32_t i32;
+
+typedef enum {
+ tEOF, tEOL,
+ tDOT, tCOMMA, tCOLON, tSEMI, tBANG, tOBRACK, tCBRACK,
+ tOPAREN, tCPAREN, tOBRACE, tCBRACE, tASSIGN,
+ tPLUS, tMINUS, tSTAR, tSLASH, tAMP, tPIPE, tCARET,
+ tAND, tOR, tEQ, tGT, tLT, tGE, tLE, tNE,
+ tINCR, tDECR,
+ tVAR, tSTRUCT, tFUNC, tRETURN, tIF, tELSE,
+ tWHILE, tFOR, tBREAK, tSWITCH, tCASE,
+ tNAME, tNUMBER, tSTRING,
+ NUMTOKENS,
+} token_t;
+
+char *tnames[] = {
+ "<EOF>", "<EOL>",
+ ".", ",", ":", ";", "!", "[", "]",
+ "(", ")","{", "}", "=",
+ "+", "-", "*", "/", "&", "|", "^",
+ "&&", "||", "==", ">", "<", ">=", "<=", "!=",
+ "++", "--",
+ "var", "struct", "func", "return", "if", "else",
+ "while", "for", "break", "switch", "case",
+ "<NAME>", "<NUMBER>", "<STRING>",
+};
+
+
+typedef struct StringRec* String;
+typedef struct SymbolRec* Symbol;
+typedef struct ScopeRec* Scope;
+typedef struct TypeRec* Type;
+typedef struct FuncRec* Func;
+typedef struct CtxRec* Ctx;
+
+typedef struct StringRec StringRec;
+typedef struct SymbolRec SymbolRec;
+typedef struct ScopeRec ScopeRec;
+typedef struct TypeRec TypeRec;
+typedef struct FuncRec FuncRec;
+typedef struct CtxRec CtxRec;
+
+struct StringRec {
+ String next;
+ u32 len;
+ char text[0];
+};
+
+#define TF_INTEGER 0x01
+#define TF_SIGNED 0x02
+#define TF_VOID 0x04
+
+struct TypeRec {
+ Type next;
+ String name;
+ u32 flags;
+ u32 width;
+};
+
+struct CtxRec {
+ const char* source; // entire source file
+ const char* sptr; // tokenizer source pointer
+ const char* line; // start of most recent line
+ const char* filename; // filename of active source
+ unsigned linenumber; // line number of most recent line
+
+ token_t tok; // most recent token
+ unsigned num;
+ char tmp[256]; // used for tNAME, tTYPE, tNUMBER, tSTRING;
+
+ String strtab; // TODO: hashtable
+ Type typetab; // TODO: hashtable
+ Symbol symtab; // TODO: hashtable, globals
+ Scope scope;
+
+ Type type_void;
+ Type type_i32;
+ Type type_u32;
+};
+
+struct ScopeRec {
+ Scope next;
+ Symbol first;
+};
+
+#define SF_REGISTER 0x01
+#define SF_FRAMEREL 0x02
+#define SF_GLOBAL 0x04 // global variable
+#define SF_FUNC 0x08 // function
+#define SF_DEFINED 0x10 // defined, not just declared (function)
+
+struct SymbolRec {
+ Symbol next;
+ String name;
+ Type type;
+ u32 flags;
+ i32 posn;
+ i32 regno;
+ Func func;
+};
+
+struct FuncRec {
+ ScopeRec scope;
+ u32 pcount;
+ Type type; // return type
+ SymbolRec param[0];
+};
+
+String mkstring(Ctx ctx, const char* text, unsigned len) {
+ String str;
+
+ for (str = ctx->strtab; str != NULL; str = str->next) {
+ if ((str->len == len) && (memcmp(text, str->text, len) == 0)) {
+ return str;
+ }
+ }
+
+ str = malloc(sizeof(StringRec) + len + 1);
+ str->len = len;
+ memcpy(str->text, text, len);
+ str->text[len] = 0;
+ str->next = ctx->strtab;
+ ctx->strtab = str;
+
+ return str;
+}
+
+Type mktype(Ctx ctx, const char* text, unsigned len, unsigned flags, unsigned width) {
+ String str = mkstring(ctx, text, len);
+ Type type = malloc(sizeof(TypeRec));
+ type->name = str;
+ type->width = width;
+ type->next = ctx->typetab;
+ ctx->typetab = type;
+ return type;
+}
+
+void init_ctx(Ctx ctx) {
+ memset(ctx, 0, sizeof(CtxRec));
+
+ // install built-in plain types
+ ctx->type_void = mktype(ctx, "void", 4, TF_VOID, 0);
+ ctx->type_i32 = mktype(ctx, "i32", 3, TF_INTEGER | TF_SIGNED, 4);
+ ctx->type_u32 = mktype(ctx, "u32", 3, TF_SIGNED, 4);
+}
+
+void error(Ctx ctx, const char *fmt, ...) {
+ va_list ap;
+
+ unsigned len = 0;
+ const char *s = ctx->line;
+ while (len < 255) {
+ if ((*s < ' ') && (*s != 9)) break;
+ s++;
+ len++;
+ }
+
+ fprintf(stderr,"%s:%d: ", ctx->filename, ctx->linenumber);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr,"\n%.*s\n", len, ctx->line);
+ exit(1);
+}
+
+void load(Ctx ctx, const char* filename) {
+ ctx->filename = filename;
+ ctx->linenumber = 0;
+
+ int fd;
+ struct stat s;
+ char* data;
+
+ if ((fd = open(filename, O_RDONLY)) < 0)
+ error(ctx, "cannot open file");
+ if (fstat(fd, &s) < 0)
+ error(ctx, "cannot stat file");
+ if ((data = malloc(s.st_size + 1)) == NULL)
+ error(ctx, "cannot allocate memory");
+ if (read(fd, data, s.st_size) != s.st_size)
+ error(ctx, "cannot read file");
+ close(fd);
+ data[s.st_size] = 0;
+
+ ctx->source = data;
+ ctx->sptr = data;
+ ctx->linenumber = 1;
+}
+
+int unhex(unsigned ch) {
+ switch (ch) {
+ case '0' ... '9': return ch - '0';
+ case 'a' ... 'f': return ch - 'a' + 10;
+ case 'A' ... 'F': return ch - 'A' + 10;
+ default: return -1;
+ }
+}
+
+token_t next_string(Ctx ctx, const char* s) {
+ unsigned ch, len = 0;
+ for (;;) {
+ switch ((ch = *s++)) {
+ case 0: error(ctx, "unterminated string");
+ case '"': goto done;
+ case '\\':
+ switch ((ch = *s++)) {
+ case '0': error(ctx, "unterminated string");
+ case 'n': ch = 10; break;
+ case 't': ch = 9; break;
+ case '"': ch = '"'; break;
+ case 'x': {
+ int x0 = unhex(s[0]);
+ int x1 = unhex(s[1]);
+ //TODO: if error() is ever non-fatal, this may leave
+ //sptr past end of input
+ if ((x0 < 0) || (x1 < 0)) error(ctx, "invalid hex escape");
+ ch = (x0 << 4) | x1;
+ s += 2;
+ break;
+ }
+ default: error(ctx, "invalid string escape 0x%02x", ch);
+ }
+ break;
+ default:
+ break;
+ }
+ if (len == 255) error(ctx, "string constant too long");
+ ctx->tmp[len++] = ch;
+ }
+done:
+ ctx->tmp[len] = 0;
+ ctx->sptr = s;
+ return tSTRING;
+}
+
+token_t next_num(Ctx ctx, u32 n, const char* str, size_t len) {
+ if (len > 255) error(ctx, "number too large");
+ memcpy(ctx->tmp, str, len);
+ ctx->tmp[len] = 0;
+ ctx->num = n;
+ ctx->sptr += len;
+ return ctx->tok = tNUMBER;
+}
+
+int streq(const char* s1, unsigned l1, const char* s2, unsigned l2) {
+ return (l1 == l2) && (!memcmp(s1, s2, l1));
+}
+
+token_t next_word(Ctx ctx, const char* str, size_t len) {
+ if (len > 255) error(ctx, "word too large");
+ memcpy(ctx->tmp, str, len);
+ ctx->tmp[len] = 0;
+ ctx->num = 0;
+ ctx->sptr += len;
+ switch (len) {
+ case 2:
+ if (streq(str, len, "if", 2)) return ctx->tok = tIF;
+ break;
+ case 3:
+ if (streq(str, len, "for", 3)) return ctx->tok = tFOR;
+ if (streq(str, len, "var", 3)) return ctx->tok = tVAR;
+ break;
+ case 4:
+ if (streq(str, len, "case", 4)) return ctx->tok = tCASE;
+ if (streq(str, len, "func", 4)) return ctx->tok = tFUNC;
+ if (streq(str, len, "else", 4)) return ctx->tok = tELSE;
+ break;
+ case 5:
+ if (streq(str, len, "break", 5)) return ctx->tok = tBREAK;
+ if (streq(str, len, "while", 5)) return ctx->tok = tWHILE;
+ break;
+ case 6:
+ if (streq(str, len, "switch", 6)) return ctx->tok = tSWITCH;
+ if (streq(str, len, "struct", 6)) return ctx->tok = tSTRUCT;
+ if (streq(str, len, "return", 6)) return ctx->tok = tRETURN;
+ break;
+ }
+ return ctx->tok = tNAME;
+}
+
+#define TOKEN(t) { ctx->sptr++; return ctx->tok = t; }
+#define TOKEN2(t) { ctx->sptr+=2; return ctx->tok = t; }
+
+token_t _next(Ctx ctx, int misc) {
+ for (;;) {
+ const char* s = ctx->sptr;
+
+ switch (*s) {
+ case 0:
+ return ctx->tok = tEOF;
+ case '\n':
+ ctx->linenumber++;
+ ctx->sptr++;
+ ctx->line = ctx->sptr;
+ if (misc) return ctx->tok = tEOL;
+ continue;
+ case ' ':
+ case '\t':
+ case '\r':
+ ctx->sptr++;
+ continue;
+ case '0':
+ if (s[1] == 'x') {
+ u32 n = 0;
+ s++;
+ for (;;) {
+ s++;
+ int x = unhex(*s);
+ if (x < 0) return next_num(ctx, n, ctx->sptr, s - ctx->sptr);
+ n = (n << 4) | x;
+ }
+ }
+ if (s[1] == 'b') {
+ u32 n = 0;
+ s += 2;
+ while ((*s == '1') || (*s == '0')) {
+ n = (n << 1) | (*s - '0');
+ s++;
+ }
+ return next_num(ctx, n, ctx->sptr, s - ctx->sptr);
+ }
+ case '1' ... '9': {
+ u32 n = 0;
+ for (;;) {
+ switch (*s) {
+ case '0' ... '9':
+ n = (n * 10) + (*s - '0');
+ break;
+ default:
+ return next_num(ctx, n, ctx->sptr, s - ctx->sptr);
+ }
+ s++;
+ }
+ }
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '_':
+ for (;;) {
+ s++;
+ switch (*s) {
+ case '0' ... '9':
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '_':
+ break;
+ default:
+ return next_word(ctx, ctx->sptr, s - ctx->sptr);
+ }
+ }
+ case '.': TOKEN(tDOT);
+ case ',': TOKEN(tCOMMA);
+ case ':': TOKEN(tCOLON);
+ case ';': TOKEN(tSEMI);
+ case '[': TOKEN(tOBRACK);
+ case ']': TOKEN(tCBRACK);
+ case '{': TOKEN(tOBRACE);
+ case '}': TOKEN(tCBRACE);
+ case '(': TOKEN(tOPAREN);
+ case ')': TOKEN(tCPAREN);
+ case '+': if (s[1] == '+') TOKEN2(tINCR) else TOKEN(tPLUS);
+ case '-': if (s[1] == '-') TOKEN2(tDECR) else TOKEN(tMINUS);
+ case '*': TOKEN(tSTAR);
+ case '^': TOKEN(tCARET);
+ case '=': if (s[1] == '=') TOKEN2(tEQ) else TOKEN(tASSIGN);
+ case '&': if (s[1] == '&') TOKEN2(tAND) else TOKEN(tAMP);
+ case '|': if (s[1] == '|') TOKEN2(tOR) else TOKEN(tPIPE);
+ case '>': if (s[1] == '=') TOKEN2(tGE) else TOKEN(tGT);
+ case '<': if (s[1] == '=') TOKEN2(tLE) else TOKEN(tLT);
+ case '!': if (s[1] == '=') TOKEN2(tNE) else TOKEN(tBANG);
+ case '/':
+ if (s[1] == '/') {
+ while ((*s != '\n') && (*s != 0)) s++;
+ ctx->sptr = s;
+ continue;
+ } else {
+ TOKEN(tSLASH);
+ }
+ case '"': return next_string(ctx, ctx->sptr + 1);
+ default:
+ error(ctx, "unknown character '%c' (0x%02x)\n",
+ ((*s > ' ') && (*s < 128)) ? *s : '.', *s);
+ }
+ }
+}
+
+token_t next(Ctx ctx, int misc) {
+ return (ctx->tok = _next(ctx, misc));
+}
+
+void printstr(const char* s) {
+ unsigned ch;
+ printf("\"");
+ while ((ch = *s++) != 0) {
+ if ((ch < ' ') || (ch > '~')) {
+ switch (ch) {
+ case 9: printf("\\t"); break;
+ case 10: printf("\\n"); break;
+ default: printf("\\x%02x", ch); break;
+ }
+ } else {
+ switch (ch) {
+ case '"': printf("\\\""); break;
+ case '\\': printf("\\\\"); break;
+ default: printf("%c", ch); break;
+ }
+ }
+ }
+ printf("\"");
+}
+
+void print(Ctx ctx) {
+ switch (ctx->tok) {
+ case tNUMBER: printf("#%u ", ctx->num); break;
+ case tNAME: printf("@%s ", ctx->tmp); break;
+ case tEOL: printf("\n"); break;
+ case tSTRING: printstr(ctx->tmp); break;
+ default: printf("%s ", tnames[ctx->tok]); break;
+ }
+}
+
+void expected(Ctx ctx, const char* what) {
+ error(ctx, "expected %s, found %s", what, tnames[ctx->tok]);
+}
+
+void expect(Ctx ctx, token_t tok) {
+ if (ctx->tok != tok) {
+ error(ctx, "expected %s, found %s", tnames[tok], tnames[ctx->tok]);
+ }
+}
+
+void require(Ctx ctx, token_t tok) {
+ expect(ctx, tok);
+ next(ctx, 0);
+}
+
+String parse_name(Ctx ctx, const char* what) {
+ if (ctx->tok != tNAME) {
+ error(ctx, "expected %s, found %s", what, tnames[ctx->tok]);
+ }
+ String str = mkstring(ctx, ctx->tmp, strlen(ctx->tmp));
+ next(ctx, 0);
+ return str;
+}
+
+Type parse_type(Ctx ctx) {
+ String tname = parse_name(ctx, "type name");
+ for (Type type = ctx->typetab; type != NULL; type = type->next) {
+ if (type->name == tname) {
+ return type;
+ }
+ }
+ error(ctx, "unknown type name '%s'", tname->text);
+ return NULL;
+}
+
+void parse_function_body(Ctx ctx) {
+ error(ctx, "unsupported");
+}
+
+void parse_function(Ctx ctx) {
+ SymbolRec param[FNMAXARGS];
+ unsigned n = 0;
+ String fname = parse_name(ctx, "funcion name");
+ Type ftype = ctx->type_void;
+
+ require(ctx, tOPAREN);
+
+ // process parameters
+ if (ctx->tok != tCPAREN) {
+ for (;;) {
+ if (n == FNMAXARGS) {
+ error(ctx, "too many parameters (%d)", FNMAXARGS);
+ }
+
+ String name = parse_name(ctx, "parameter name");
+ Type type = parse_type(ctx);
+
+ for (unsigned i = 0; i < n; i++) {
+ if (param[i].name == name) {
+ error(ctx, "duplicate parameter name '%s'", name->text);
+ }
+ }
+
+ param[n].name = name;
+ param[n].type = type;
+ param[n].flags = SF_FRAMEREL;
+ param[n].posn = -4 * (n + 1);
+ param[n].regno = 0;
+ param[n].next = NULL;
+ n++;
+
+ if (ctx->tok != tCOMMA) {
+ break;
+ }
+ next(ctx, 0);
+ }
+ }
+
+ require(ctx, tCPAREN);
+
+ if ((ctx->tok != tSEMI) && (ctx->tok != tOBRACE)) {
+ ftype = parse_type(ctx);
+ }
+
+ int isdef = 0;
+ if (ctx->tok == tSEMI) {
+ // declaration
+ next(ctx, 0);
+ } else if (ctx->tok == tOBRACE) {
+ // definition
+ next(ctx, 0);
+ isdef = 1;
+ } else {
+ expected(ctx, "semi or open brace");
+ }
+
+ // Look for an existing declaration or definintion of this function
+ // and if it exists, ensure that we are in argeement with it
+ Symbol sym;
+ for (sym = ctx->symtab; sym != NULL; sym = sym->next) {
+ if (sym->name == fname) {
+ if (!(sym->flags & SF_FUNC)) {
+ error(ctx, "redefining variable as function '%s'", fname->text);
+ }
+ if (!isdef) {
+ error(ctx, "redeclared function '%s'", fname->text);
+ }
+ if (sym->flags & SF_DEFINED) {
+ error(ctx, "redefined function '%s'", fname->text);
+ }
+ int bad = 0;
+ if (n != sym->func->pcount) {
+ bad = 1;
+ } else if (ftype != sym->func->type) {
+ bad = 1;
+ } else {
+ for (unsigned i = 0; i < n; i++) {
+ if (sym->func->param[i].type != param[i].type) {
+ bad = 1;
+ break;
+ }
+ }
+ }
+ if (bad) {
+ error(ctx, "function declaration/definition mismatch for '%s'", fname->text);
+ }
+ break;
+ }
+ }
+
+ // if there was no existing record of this function, create one now
+ if (sym == NULL) {
+ Func func = malloc(sizeof(FuncRec) + sizeof(SymbolRec) * n);
+ func->scope.next = NULL;
+ func->scope.first = NULL;
+ func->type = ftype;
+ func->pcount = n;
+ memcpy(func->param, param, sizeof(SymbolRec) * n);
+
+ sym = malloc(sizeof(SymbolRec));
+ sym->name = fname;
+ sym->type = NULL;
+ sym->flags = SF_FUNC;
+ sym->posn = 0;
+ sym->regno = 0;
+ sym->func = func;
+
+ sym->next = ctx->symtab;
+ ctx->symtab = sym;
+ }
+
+ // handle definition if it is one
+ if (isdef) {
+ sym->flags |= SF_DEFINED;
+ parse_function_body(ctx);
+ }
+}
+
+void parse_global_var(Ctx ctx) {
+ error(ctx, "unsupported");
+}
+
+void parse_program(Ctx ctx) {
+ next(ctx, 0);
+ for (;;) {
+ switch (ctx->tok) {
+ case tFUNC:
+ next(ctx, 0);
+ parse_function(ctx);
+ break;
+ case tVAR:
+ next(ctx, 0);
+ parse_global_var(ctx);
+ break;
+ case tEOF:
+ break;
+ default:
+ expected(ctx, "func or var");
+ }
+ }
+}
+
+int main(int argc, char **argv) {
+ const char *outname = "out.hex";
+
+ CtxRec ctx;
+ init_ctx(&ctx);
+
+ if (argc < 2) {
+ ctx.filename = "<commandline>";
+ error(&ctx, "no file specified");
+ }
+
+ ctx.filename = argv[1];
+ if (argc == 3)
+ outname = argv[2];
+
+ load(&ctx, argv[1]);
+ ctx.line = ctx.sptr;
+ ctx.linenumber = 1;
+
+#if 0
+ do {
+ next(&ctx, 1);
+ print(&ctx);
+ } while (ctx.tok != tEOF);
+ printf("\n");
+#else
+ parse_program(&ctx);
+#endif
+
+ return 0;
+}
diff --git a/test/hello.tl b/test/hello.tl
@@ -0,0 +1,18 @@
+
+func boring() ;
+
+func add(a i32, b i32) i32;
+
+func add(a i32, b i32) i32 {
+ return a + b;
+}
+
+func main(argc i32, argv string) {
+ var n i32 = 0;
+
+ while n < 100 {
+ n = n + 1;
+ }
+ "hello cruel\n world!";
+ return n - 1;
+}