commit 0abd0555d53c600b54f1731d44ceda56eebe4a48
parent cf4cc2d4398bcd919784036ae1b7c77298b9e684
Author: Brian Swetland <swetland@frotz.net>
Date: Tue, 10 Mar 2020 19:14:08 -0700
compiler: type system work
- rename mkstring() to make_string() for verb_noun() style
- same with setitem() becoming set_item()
- provide make_object() and make_type() helpers to declutter
object and type creation (and add_type(), setup_type())
- make_builtin(), parse_param(), and parse_function() all
get shorter as a result
- parse_type(), parse_array_type(), and parse_struct_type()
handle the bulk of parsing.
- todo: refactor parse_function() so the type parsing
code can be shared
- introduce "type" keyword and parse_type_def()
- we can now define complex types and type aliases!
- new -p (print context) option allows for dumping
state at the end of compilation
- dump the type table when -p is active
Diffstat:
M | src/compiler.c | | | 389 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
1 file changed, 267 insertions(+), 122 deletions(-)
diff --git a/src/compiler.c b/src/compiler.c
@@ -53,6 +53,8 @@ typedef enum {
tTRUE = 0x030, tFALSE = 0x031, tNIL = 0x032,
// Idenitfiers, Numbers, Strings
tNAME = 0x033, tNUMBER = 0x034, tSTRING = 0x035,
+ // To be resorted later...
+ tTYPE = 0x036,
} token_t;
char *tnames[] = {
@@ -72,6 +74,7 @@ char *tnames[] = {
"break", "continue", "switch", "case",
"true", "false", "nil",
"<NAME>", "<NUMBER>", "<STRING>",
+ "type",
};
// encodings for ops in Items
@@ -127,12 +130,12 @@ enum {
struct ObjectRec {
u32 kind;
+ String name;
+ Type type;
+ Object first; // list of...
+ Object next; // link in list
u32 flags;
u32 value;
- Object next; // link in list
- Object first; // list of...
- Type type;
- String name;
Fixup fixups; // forward func refs
};
@@ -158,9 +161,9 @@ enum { // value
struct TypeRec {
u32 kind;
+ Type base; // Pointer-to, Func-return, or Array-elem
Object obj; // if we're non-anonymous
Object first; // list of Params or Fields
- Type base; // Pointer-to, Func-return, or Array-elem
u32 len; // of Array, num of Params
u32 size; // of Type in Memory
};
@@ -172,11 +175,17 @@ enum {
tBool,
tInt32,
tNil,
- tString,
tPointer,
tArray,
- rRecord,
+ tSlice,
+ tRecord,
tFunc,
+ tUndefined,
+};
+
+const char* type_id_tab[] = {
+ "void", "byte", "bool", "int32", "nil", "*", "[]", "[]",
+ "struct", "func", "undef",
};
// ------------------------------------------------------------------
@@ -291,7 +300,7 @@ void fixup_branches_fwd(Fixup list);
// address where we will emit the next instruction
void fixup_branch_fwd(u32 addr);
-String mkstring(const char* text, u32 len) {
+String make_string(const char* text, u32 len) {
String str = ctx.strtab;
while (str != nil) {
if ((str->len == len) && (memcmp(text, str->text, len) == 0)) {
@@ -310,29 +319,64 @@ String mkstring(const char* text, u32 len) {
return str;
}
-Type make_type(const char* text, u32 len, u32 kind, u32 size) {
- String str = mkstring(text, len);
- Type type = malloc(sizeof(TypeRec));
+Object make_object(u32 kind, String name, Type type,
+ Object first, u32 flags, u32 value) {
Object obj = malloc(sizeof(ObjectRec));
+ obj->kind = kind;
+ obj->name = name;
+ obj->type = type;
+ obj->first = first;
+ obj->next = nil;
+ obj->flags = flags;
+ obj->value = value;
+ obj->fixups = nil;
+ return obj;
+}
+Type make_type(u32 kind, Type base, Object obj,
+ Object first, u32 len, u32 size) {
+ Type type = malloc(sizeof(TypeRec));
type->kind = kind;
+ type->base = base;
type->obj = obj;
- type->first = nil;
- type->base = nil;
- type->len = 0;
+ type->first = first;
+ type->len = len;
type->size = size;
+ return type;
+}
- obj->kind = oType;
- obj->flags = 0;
- obj->value = 0;
- obj->next = nil;
- obj->first = nil;
- obj->type = type;
- obj->name = str;
+Object make_param(String name, Type type, u32 flags, u32 value) {
+ Object param = malloc(sizeof(ObjectRec));
+ param->kind = oParam;
+ param->name = name;
+ param->type = type;
+ param->first = nil;
+ param->next = nil;
+ param->flags = flags;
+ param->value = value;
+ param->fixups = nil;
+ return param;
+}
+
+void set_item(Item itm, u32 kind, Type type, u32 r, u32 a, u32 b) {
+ itm->kind = kind;
+ itm->flags = 0;
+ itm->type = type;
+ itm->r = r;
+ itm->a = a;
+ itm->b = b;
+}
- obj->next = ctx.typetab;
- ctx.typetab = obj;
+void add_type(Type type, String name) {
+ type->obj = make_object(oType, name, type, nil, 0, 0);
+ type->obj->next = ctx.typetab;
+ ctx.typetab = type->obj;
+}
+Type setup_type(const char* text, u32 tlen, u32 kind, u32 size) {
+ String name = make_string(text, tlen);
+ Type type = make_type(kind, nil, nil, nil, 0, size);
+ add_type(type, name);
return type;
}
@@ -346,12 +390,13 @@ void init_ctx() {
memset(&ctx, 0, sizeof(ctx));
// install built-in basic types
- ctx.type_void = make_type("void", 4, tVoid, 0);
- ctx.type_byte = make_type("byte", 4, tByte, 1);
- ctx.type_bool = make_type("bool", 4, tBool, 1);
- ctx.type_int32 = make_type("i32", 3, tInt32, 4);
- ctx.type_nil = make_type("nil", 3, tNil, 4);
- ctx.type_string = make_type("str", 3, tString, 8);
+ ctx.type_void = setup_type("void", 4, tVoid, 0);
+ ctx.type_byte = setup_type("byte", 4, tByte, 1);
+ ctx.type_bool = setup_type("bool", 4, tBool, 1);
+ ctx.type_int32 = setup_type("i32", 3, tInt32, 4);
+ ctx.type_nil = setup_type("nil", 3, tNil, 4);
+ ctx.type_string = setup_type("str", 3, tSlice, 8);
+ ctx.type_string->base = ctx.type_byte;
ctx.scope = &(ctx.global);
ctx.line = "";
@@ -511,6 +556,7 @@ token_t next_word(const char* str, size_t len) {
if (streq(str, len, "func", 4)) return ctx.tok = tFUNC;
if (streq(str, len, "else", 4)) return ctx.tok = tELSE;
if (streq(str, len, "true", 4)) return ctx.tok = tTRUE;
+ if (streq(str, len, "type", 4)) return ctx.tok = tTYPE;
break;
case 5:
if (streq(str, len, "break", 5)) return ctx.tok = tBREAK;
@@ -771,15 +817,6 @@ void add_object_fixup(Object obj) {
obj->fixups = fixup;
}
-void setitem(Item itm, u32 kind, Type type, u32 r, u32 a, u32 b) {
- itm->kind = kind;
- itm->flags = 0;
- itm->type = type;
- itm->r = r;
- itm->a = a;
- itm->b = b;
-}
-
u32 invert_relop(u32 op) {
if (op > 5) { abort(); }
return invert_relop_tab[op];
@@ -790,30 +827,30 @@ void parse_expr(Item x);
void parse_operand(Item x) {
if (ctx.tok == tNUMBER) {
- setitem(x, iConst, ctx.type_int32, 0, ctx.num, 0);
+ set_item(x, iConst, ctx.type_int32, 0, ctx.num, 0);
} else if (ctx.tok == tSTRING) {
error("unsupported string const");
} else if (ctx.tok == tTRUE) {
- setitem(x, iConst, ctx.type_bool, 0, 1, 0);
+ set_item(x, iConst, ctx.type_bool, 0, 1, 0);
} else if (ctx.tok == tFALSE) {
- setitem(x, iConst, ctx.type_bool, 0, 0, 0);
+ set_item(x, iConst, ctx.type_bool, 0, 0, 0);
} else if (ctx.tok == tNIL) {
- setitem(x, iConst, ctx.type_nil, 0, 0, 0);
+ set_item(x, iConst, ctx.type_nil, 0, 0, 0);
} else if (ctx.tok == tOPAREN) {
next();
parse_expr(x);
require(tCPAREN);
return;
} else if (ctx.tok == tNAME) {
- String str = mkstring(ctx.tmp, strlen(ctx.tmp));
+ String str = make_string(ctx.tmp, strlen(ctx.tmp));
Object obj = find(str);
if (obj == nil) {
error("unknown identifier '%s'", str->text);
}
if (obj->kind == oParam) {
- setitem(x, iParam, obj->type, 0, obj->value, 0);
+ set_item(x, iParam, obj->type, 0, obj->value, 0);
} else if (obj->kind == oFunc) {
- setitem(x, iFunc, obj->type, 0, 0, 0);
+ set_item(x, iFunc, obj->type, 0, 0, 0);
} else {
error("unsupported identifier");
}
@@ -932,24 +969,118 @@ String parse_name(const char* what) {
if (ctx.tok != tNAME) {
error("expected %s, found %s", what, tnames[ctx.tok & 0x7F]);
}
- String str = mkstring(ctx.tmp, strlen(ctx.tmp));
+ String str = make_string(ctx.tmp, strlen(ctx.tmp));
next();
return str;
}
-Type parse_type() {
- String tname = parse_name("type name");
+Type find_type(String name) {
Object obj = ctx.typetab;
while (obj != nil) {
- if (obj->name == tname) {
+ if (obj->name == name) {
return obj->type;
}
obj = obj->next;
}
- error("unknown type name '%s'", tname->text);
return nil;
}
+// fwd_ref_ok indicates that an undefined typename
+// may be treated as a forward reference. This is
+// only used for pointers (size their size does not
+// depend on their target).
+Type parse_type(bool fwd_ref_ok);
+
+Type parse_struct_type() {
+ Type rectype = make_type(tRecord, nil, nil, nil, 0, 0);
+ Object last = nil;
+ require(tOBRACE);
+ while (true) {
+ if (ctx.tok == tCBRACE) {
+ next();
+ break;
+ }
+ String name = parse_name("field name");
+ Type type = parse_type(false);
+ Object field = make_object(oField, name, type, nil, 0, rectype->size);
+
+ // TODO sub-word packing
+ rectype->size += (type->size + 3) & (~3);
+ rectype->len++;
+
+ // add field to record
+ if (last == nil) {
+ rectype->first = field;
+ } else {
+ last->next = field;
+ }
+ last = field;
+
+ if (ctx.tok != tCBRACE) {
+ require(tCOMMA);
+ }
+ }
+ return rectype;
+}
+
+Type parse_array_type() {
+ if (ctx.tok == tCBRACK) {
+ next();
+ return make_type(tSlice, parse_type(false), nil, nil, 0, 8);
+ } else {
+ ItemRec x;
+ parse_expr(&x);
+ require(tCBRACK);
+ if ((x.kind != iConst) || (x.type != ctx.type_int32)) {
+ error("array size must be integer constant");
+ }
+ //XXX check for >0
+ Type base = parse_type(false);
+ u32 sz = x.a * base->size;
+ if (sz < x.a) {
+ error("array size overflow");
+ }
+ return make_type(tArray, base, nil, nil, x.a, sz);
+ }
+}
+
+Type parse_func_type() {
+ error("func type unsupported");
+ return nil;
+}
+
+Type parse_type(bool fwd_ref_ok) {
+ if (ctx.tok == tSTAR) { // pointer-to
+ next();
+ return make_type(tPointer, parse_type(true), nil, nil, 0, 0);
+ } else if (ctx.tok == tOBRACK) { // array-of
+ next();
+ return parse_array_type();
+ } else if (ctx.tok == tFUNC) {
+ next();
+ return parse_func_type();
+ } else if (ctx.tok == tSTRUCT) {
+ next();
+ return parse_struct_type();
+ } else if (ctx.tok == tNAME) {
+ String name = make_string(ctx.tmp, strlen(ctx.tmp));
+ next();
+ Type type = find_type(name);
+ if (type == nil) {
+ if (fwd_ref_ok) {
+ type = make_type(tUndefined, nil, nil, nil, 0, 4);
+ add_type(type, name);
+ } else {
+ error("undefined type '%s' not usable here", name->text);
+ }
+ }
+ return type;
+ } else {
+ expected("type");
+ return nil;
+ }
+}
+
void parse_block();
void parse_while() {
@@ -1081,7 +1212,7 @@ void parse_block() {
gen_store(&y, &x);
} else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) {
ItemRec y;
- setitem(&y, iConst, ctx.type_int32, 0, 1, 0);
+ set_item(&y, iConst, ctx.type_int32, 0, 1, 0);
next();
}
require(tSEMI);
@@ -1100,20 +1231,13 @@ void parse_function_body(Object fn) {
gen_epilogue(fn);
}
-
Object parse_param(String fname, u32 n, Object first, Object last) {
if (n == FNMAXARGS) {
error("too many parameters (%d) for '%s'", FNMAXARGS, fname->text);
}
- Object param = malloc(sizeof(ObjectRec));
- param->kind = oParam;
- param->flags = 0;
- param->value = n;
- param->next = nil;
- param->first = nil;
- param->name = parse_name("parameter name");
- param->type = parse_type();
- param->fixups = nil;
+ String pname = parse_name("parameter name");
+ Type ptype = parse_type(false);
+ Object param = make_param(pname, ptype, 0, n);
Object obj = first;
while (obj != nil) {
@@ -1130,53 +1254,21 @@ Object parse_param(String fname, u32 n, Object first, Object last) {
}
void make_builtin(const char* name, u32 id, Type p0, Type p1, Type rtn) {
- Type type = malloc(sizeof(TypeRec));
- Object obj = malloc(sizeof(ObjectRec));
-
- type->kind = tFunc;
- type->obj = obj;
- type->first = nil;
- type->base = rtn;
- type->len = 0;
- type->size = 0;
-
- obj->kind = oFunc;
- obj->flags = ofBuiltin;
- obj->value = id;
- obj->next = nil;
- obj->first = nil;
- obj->type = type;
- obj->name = mkstring(name, strlen(name));
- obj->fixups = nil;
+ String fname = make_string(name, strlen(name));
+ Type type = make_type(tFunc, rtn, nil, nil, 0, 0);
+ type->obj = make_object(oFunc, fname, type, nil, ofBuiltin, id);
if (p0 != nil) {
- Object param = malloc(sizeof(ObjectRec));
- obj->first = param;
+ Object param = make_param(make_string("a", 1), p0, 0, 0);
+ type->obj->first = param;
type->first = param;
- param->kind = oParam;
- param->flags = 0;
- param->value = 0;
- param->next = nil;
- param->first = nil;
- param->name = mkstring("a", 1);
- param->type = p0;
- param->fixups = nil;
type->len = 1;
if (p1 != nil) {
- param->next = malloc(sizeof(ObjectRec));
- param = param->next;
- param->kind = oParam;
- param->flags = 0;
- param->value = 1;
- param->next = nil;
- param->first = nil;
- param->name = mkstring("b", 1);
- param->type = p1;
- param->fixups = nil;
+ param->next = make_param(make_string("b", 1), p1, 0, 1);
type->len = 2;
}
}
- make_global(obj);
+ make_global(type->obj);
}
void parse_function() {
@@ -1184,7 +1276,7 @@ void parse_function() {
Object last = nil;
u32 n = 0;
String fname = parse_name("funcion name");
- Type ftype = ctx.type_void;
+ Type rettype = ctx.type_void;
require(tOPAREN);
@@ -1202,7 +1294,7 @@ void parse_function() {
require(tCPAREN);
if ((ctx.tok != tSEMI) && (ctx.tok != tOBRACE)) {
- ftype = parse_type();
+ rettype = parse_type(false);
}
int isdef = 0;
@@ -1228,7 +1320,7 @@ void parse_function() {
if (isdef && (obj->flags & ofDefined)) {
error("redefined function '%s'", fname->text);
}
- if (ftype != obj->type->base) {
+ if (rettype != obj->type->base) {
error("func '%s' return type differs from decl", fname->text);
}
if (obj->type->len != n) {
@@ -1246,25 +1338,9 @@ void parse_function() {
}
} else {
// if there was no existing record of this function, create one now
- Type type = malloc(sizeof(TypeRec));
- obj = malloc(sizeof(ObjectRec));
-
- type->kind = tFunc;
+ Type type = make_type(tFunc, rettype, nil, first, n, 0);
+ obj = make_object(oFunc, fname, type, first, 0, 0);
type->obj = obj;
- type->first = first;
- type->base = ftype;
- type->len = n;
- type->size = 0;
-
- obj->kind = oFunc;
- obj->flags = 0;
- obj->value = 0;
- obj->next = nil;
- obj->first = first;
- obj->type = type;
- obj->name = fname;
- obj->fixups = nil;
-
make_global(obj);
}
@@ -1280,6 +1356,27 @@ void parse_function() {
}
}
+void parse_type_def() {
+ String name = parse_name("type name");
+ Type type = parse_type(false);
+ Type prev = find_type(name);
+ if (prev == nil) {
+ add_type(type, name);
+ } else {
+ if (prev->kind != tUndefined) {
+ error("cannot redefine type '%s'\n", name->text);
+ }
+ prev->kind = type->kind;
+ prev->base = type->base;
+ prev->first = type->first;
+ prev->len = type->len;
+ prev->size = type->size;
+ prev->obj->type = type;
+ // XXX discard type
+ }
+ require(tSEMI);
+}
+
void parse_global_var() {
error("unsupported");
}
@@ -1292,6 +1389,10 @@ void parse_program() {
next();
parse_function();
break;
+ case tTYPE:
+ next();
+ parse_type_def();
+ break;
case tVAR:
next();
parse_global_var();
@@ -1305,6 +1406,7 @@ void parse_program() {
}
// ================================================================
+
u32 get_reg_tmp() {
u32 n = 8;
while (n < 12) {
@@ -1739,7 +1841,7 @@ void gen_start() {
}
void gen_end() {
- String str = mkstring("start", 5);
+ String str = make_string("start", 5);
Object obj = find(str);
while (obj != nil) {
if (obj->type->kind != tFunc) {
@@ -1817,10 +1919,48 @@ void gen_listing(const char* listfn, const char* srcfn) {
// ================================================================
+
+void dump_type(Type type, bool use_short_name) {
+ if (use_short_name && (type->obj != nil)) {
+ printf("%s", type->obj->name->text);
+ } else if (type->kind == tArray) {
+ printf("[%u]", type->len);
+ dump_type(type->base, true);
+ } else if (type->kind == tRecord) {
+ printf("struct {\n");
+ Object field = type->first;
+ while (field != nil) {
+ printf(" %s ", field->name->text);
+ dump_type(field->type, true);
+ printf(",\n");
+ field = field->next;
+ }
+ printf("}");
+ } else {
+ printf("%s", type_id_tab[type->kind]);
+ if ((type->kind == tPointer) || (type->kind == tSlice)) {
+ dump_type(type->base, true);
+ }
+ }
+}
+
+void dump_context() {
+ Object obj = ctx.typetab;
+ while (obj != nil) {
+ printf("type %s ", obj->name->text);
+ dump_type(obj->type, false);
+ printf(";\n");
+ obj = obj->next;
+ }
+}
+
+// ================================================================
+
int main(int argc, char **argv) {
const char *outname = "out.bin";
const char *lstname = nil;
const char *srcname = nil;
+ bool dump = false;
init_ctx();
ctx.filename = "<commandline>";
@@ -1840,6 +1980,8 @@ int main(int argc, char **argv) {
lstname = argv[2];
argc--;
argv++;
+ } else if (!strcmp(argv[1], "-p")) {
+ dump = true;
} else if (!strcmp(argv[1], "-A")) {
ctx.flags |= cfAbortOnError;
} else if (argv[1][0] == '-') {
@@ -1879,6 +2021,9 @@ int main(int argc, char **argv) {
if (lstname != nil) {
gen_listing(lstname, ctx.filename);
}
+ if (dump) {
+ dump_context();
+ }
#endif
return 0;