commit bf11df7b87b6877b0ed4d198b62fa339b136438e
parent 24d39cdff122eb375989894089d1fd21816b315c
Author: Brian Swetland <swetland@frotz.net>
Date: Tue, 17 Oct 2023 13:36:51 -0700
compiler: various structs, use named enums
Diffstat:
M | compiler/compiler.spl | | | 183 | +++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
1 file changed, 122 insertions(+), 61 deletions(-)
diff --git a/compiler/compiler.spl b/compiler/compiler.spl
@@ -37,6 +37,118 @@ struct String {
text [256]u8,
};
+enum SymbolKind {
+ SYMBOL_VAR,
+ SYMBOL_FLD, // struct field
+ SYMBOL_PTR, // struct *field
+ SYMBOL_DEF, // enum
+ SYMBOL_FN,
+};
+
+struct Symbol {
+ next *Symbol,
+ name *String,
+ type *Type,
+ kind SymbolKind,
+};
+
+enum ScopeKind {
+ SCOPE_GLOBAL,
+ SCOPE_FUNC,
+ SCOPE_BLOCK,
+ SCOPE_LOOP,
+ SCOPE_STRUCT,
+};
+
+struct Scope {
+ parent *Scope,
+ first *Symbol,
+ last *Symbol,
+ kind ScopeKind,
+};
+
+enum TypeKind {
+ TYPE_VOID,
+ TYPE_BOOL,
+ TYPE_U8,
+ TYPE_U32,
+ TYPE_NIL,
+ TYPE_POINTER,
+ TYPE_ARRAY,
+ TYPE_SLICE,
+ TYPE_STR,
+ TYPE_STRUCT,
+ TYPE_FUNC,
+ TYPE_ENUM,
+ TYPE_UNDEFINED,
+};
+
+struct Type {
+ next *Type,
+ name *String,
+ of *Type, // for slice, array, ptr
+ fields *Symbol, // for struct
+ kind TypeKind,
+ count u32,
+};
+
+// ================================================================
+// lexical scanner tokens
+
+// token classes (tok & tcMASK)
+enum TokenClass{
+ tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
+ tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
+};
+
+enum Token {
+ // EndMarks, Braces, Brackets Parens
+ tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
+ // RelOps (do not reorder)
+ tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
+ // AddOps (do not reorder)
+ tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
+ // MulOps (do not reorder)
+ tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
+ // AsnOps (do not reorder)
+ tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
+ tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
+ // Various, UnaryNot, LogicalOps,
+ tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
+ tASSIGN, tINC, tDEC,
+ // Keywords
+ tNEW, tFN, tSTRUCT, tVAR, tENUM,
+ tIF, tELSE, tWHILE,
+ tBREAK, tCONTINUE, tRETURN,
+ tFOR, tSWITCH, tCASE,
+ tTRUE, tFALSE, tNIL,
+ tIDN, tNUM, tSTR,
+ // used internal to the lexer but never returned
+ tSPC, tINV, tDQT, tSQT, tMSC,
+};
+
+var tnames []str = {
+ "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")",
+ "==", "!=", "<", "<=", ">", ">=", "", "",
+ "+", "-", "|", "^", "", "", "", "",
+ "*", "/", "%", "&", "<<", ">>", "", "",
+ "+=", "-=", "|=", "^=", "", "", "", "",
+ "*=", "/=", "%=", "&=", "<<=", ">>=", "", "",
+ ";", ":", ".", ",", "~", "&&", "||", "!",
+ "=", "++", "--",
+ "new", "fn", "struct", "var", "enum",
+ "if", "else", "while",
+ "break", "continue", "return",
+ "for", "switch", "case",
+ "true", "false", "nil",
+ "<ID>", "<NUM>", "<STR>",
+ "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
+};
+
+
+// ================================================================
+// lexer / parser / compiler context
+
struct Context {
stringlist *String, // intern table
@@ -46,7 +158,7 @@ struct Context {
flags u32,
cc u32, // scanner: next character
- tok u32, // most recent token
+ tok Token, // most recent token
num u32, // for tNUM
tmp [256]u8, // for tIDN, tSTR
ident *String, // for tSTR
@@ -113,56 +225,6 @@ fn ctx_init() {
// ================================================================
// lexical scanner
-// token classes (tok & tcMASK)
-enum {
- tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
- tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
-};
-
-enum {
- // EndMarks, Braces, Brackets Parens
- tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
- // RelOps (do not reorder)
- tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
- // AddOps (do not reorder)
- tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
- // MulOps (do not reorder)
- tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
- // AsnOps (do not reorder)
- tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
- tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
- // Various, UnaryNot, LogicalOps,
- tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
- tASSIGN, tINC, tDEC,
- // Keywords
- tNEW, tFN, tSTRUCT, tVAR, tENUM,
- tIF, tELSE, tWHILE,
- tBREAK, tCONTINUE, tRETURN,
- tFOR, tSWITCH, tCASE,
- tTRUE, tFALSE, tNIL,
- tIDN, tNUM, tSTR,
- // used internal to the lexer but never returned
- tSPC, tINV, tDQT, tSQT, tMSC,
-};
-
-var tnames []str = {
- "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")",
- "==", "!=", "<", "<=", ">", ">=", "", "",
- "+", "-", "|", "^", "", "", "", "",
- "*", "/", "%", "&", "<<", ">>", "", "",
- "+=", "-=", "|=", "^=", "", "", "", "",
- "*=", "/=", "%=", "&=", "<<=", ">>=", "", "",
- ";", ":", ".", ",", "~", "&&", "||", "!",
- "=", "++", "--",
- "new", "fn", "struct", "var", "enum",
- "if", "else", "while",
- "break", "continue", "return",
- "for", "switch", "case",
- "true", "false", "nil",
- "<ID>", "<NUM>", "<STR>",
- "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
-};
-
var lextab [256]u8 = {
tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV,
@@ -211,7 +273,7 @@ fn unhex(ch u32) i32 {
return -1;
}
-fn scan() u32 {
+fn scan() Token {
var ch i32 = readc(0);
if (ch < 0) {
ctx.cc = 0;
@@ -247,7 +309,7 @@ fn unescape(n u32) u32 {
}
}
-fn scan_string(cc u32, nc u32) u32 {
+fn scan_string(cc u32, nc u32) Token {
var n u32 = 0;
while (true) {
if (nc == '"') {
@@ -270,7 +332,7 @@ fn scan_string(cc u32, nc u32) u32 {
return tSTR;
}
-fn scan_keyword(len u32) u32 {
+fn scan_keyword(len u32) Token {
ctx.tmp[len] = 0;
var idn String = string_make(ctx.tmp, len);
ctx.ident = idn;
@@ -302,7 +364,7 @@ fn scan_keyword(len u32) u32 {
return tIDN;
}
-fn scan_number(cc u32, nc u32) u32 {
+fn scan_number(cc u32, nc u32) Token {
var n u32 = 1;
var val u32 = cc - '0';
@@ -345,12 +407,12 @@ fn scan_number(cc u32, nc u32) u32 {
return tNUM;
}
-fn scan_ident(cc u32, nc u32) u32 {
+fn scan_ident(cc u32, nc u32) Token {
ctx.tmp[0] = cc;
var n u32 = 1;
while (true) {
- var tok u32 = lextab[nc];
+ var tok Token = lextab[nc];
if ((tok == tIDN) || (tok == tNUM)) {
ctx.tmp[n] = nc;
n++;
@@ -363,12 +425,12 @@ fn scan_ident(cc u32, nc u32) u32 {
return scan_keyword(n);
}
-fn _next() u32 {
+fn _next() Token {
var nc u8 = ctx.cc;
while (1) {
var cc u8 = nc;
nc = scan();
- var tok u32 = lextab[cc];
+ var tok Token = lextab[cc];
if (tok == tNUM) { // 0..9
return scan_number(cc, nc);
} else if (tok == tIDN) { // _ A..Z a..z
@@ -436,7 +498,6 @@ fn _next() u32 {
}
}
-
fn token_printstr(fd i32) {
var n u32 = 0;
writec(fd, '"');
@@ -474,7 +535,7 @@ fn token_print(fd i32) {
writec(fd, ' ');
}
-fn next() u32 {
+fn next() Token {
ctx.tok = _next();
return ctx.tok;
}