spl

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit bf11df7b87b6877b0ed4d198b62fa339b136438e
parent 24d39cdff122eb375989894089d1fd21816b315c
Author: Brian Swetland <swetland@frotz.net>
Date:   Tue, 17 Oct 2023 13:36:51 -0700

compiler: various structs, use named enums

Diffstat:
Mcompiler/compiler.spl | 183+++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
1 file changed, 122 insertions(+), 61 deletions(-)

diff --git a/compiler/compiler.spl b/compiler/compiler.spl @@ -37,6 +37,118 @@ struct String { text [256]u8, }; +enum SymbolKind { + SYMBOL_VAR, + SYMBOL_FLD, // struct field + SYMBOL_PTR, // struct *field + SYMBOL_DEF, // enum + SYMBOL_FN, +}; + +struct Symbol { + next *Symbol, + name *String, + type *Type, + kind SymbolKind, +}; + +enum ScopeKind { + SCOPE_GLOBAL, + SCOPE_FUNC, + SCOPE_BLOCK, + SCOPE_LOOP, + SCOPE_STRUCT, +}; + +struct Scope { + parent *Scope, + first *Symbol, + last *Symbol, + kind ScopeKind, +}; + +enum TypeKind { + TYPE_VOID, + TYPE_BOOL, + TYPE_U8, + TYPE_U32, + TYPE_NIL, + TYPE_POINTER, + TYPE_ARRAY, + TYPE_SLICE, + TYPE_STR, + TYPE_STRUCT, + TYPE_FUNC, + TYPE_ENUM, + TYPE_UNDEFINED, +}; + +struct Type { + next *Type, + name *String, + of *Type, // for slice, array, ptr + fields *Symbol, // for struct + kind TypeKind, + count u32, +}; + +// ================================================================ +// lexical scanner tokens + +// token classes (tok & tcMASK) +enum TokenClass{ + tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, + tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, +}; + +enum Token { + // EndMarks, Braces, Brackets Parens + tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, + // RelOps (do not reorder) + tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, + // AddOps (do not reorder) + tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, + // MulOps (do not reorder) + tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, + // AsnOps (do not reorder) + tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, + tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, + // Various, UnaryNot, LogicalOps, + tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, + tASSIGN, tINC, tDEC, + // Keywords + tNEW, tFN, tSTRUCT, tVAR, tENUM, + tIF, tELSE, tWHILE, + tBREAK, tCONTINUE, tRETURN, + tFOR, tSWITCH, tCASE, + tTRUE, tFALSE, tNIL, + tIDN, tNUM, tSTR, + // used internal to the lexer but never returned + tSPC, tINV, tDQT, tSQT, tMSC, +}; + +var tnames []str = { + "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", + "==", "!=", "<", "<=", ">", ">=", "", "", + "+", "-", "|", "^", "", "", "", "", + "*", "/", "%", "&", "<<", ">>", "", "", + "+=", "-=", "|=", "^=", "", "", "", "", + "*=", "/=", "%=", "&=", "<<=", ">>=", "", "", + ";", ":", ".", ",", "~", "&&", "||", "!", + "=", "++", "--", + "new", "fn", "struct", "var", "enum", + "if", "else", "while", + "break", "continue", "return", + "for", "switch", "case", + "true", "false", "nil", + "<ID>", "<NUM>", "<STR>", + "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", +}; + + +// ================================================================ +// lexer / parser / compiler context + struct Context { stringlist *String, // intern table @@ -46,7 +158,7 @@ struct Context { flags u32, cc u32, // scanner: next character - tok u32, // most recent token + tok Token, // most recent token num u32, // for tNUM tmp [256]u8, // for tIDN, tSTR ident *String, // for tSTR @@ -113,56 +225,6 @@ fn ctx_init() { // ================================================================ // lexical scanner -// token classes (tok & tcMASK) -enum { - tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, - tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, -}; - -enum { - // EndMarks, Braces, Brackets Parens - tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, - // RelOps (do not reorder) - tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, - // AddOps (do not reorder) - tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, - // MulOps (do not reorder) - tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, - // AsnOps (do not reorder) - tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, - tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, - // Various, UnaryNot, LogicalOps, - tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, - tASSIGN, tINC, tDEC, - // Keywords - tNEW, tFN, tSTRUCT, tVAR, tENUM, - tIF, tELSE, tWHILE, - tBREAK, tCONTINUE, tRETURN, - tFOR, tSWITCH, tCASE, - tTRUE, tFALSE, tNIL, - tIDN, tNUM, tSTR, - // used internal to the lexer but never returned - tSPC, tINV, tDQT, tSQT, tMSC, -}; - -var tnames []str = { - "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", - "==", "!=", "<", "<=", ">", ">=", "", "", - "+", "-", "|", "^", "", "", "", "", - "*", "/", "%", "&", "<<", ">>", "", "", - "+=", "-=", "|=", "^=", "", "", "", "", - "*=", "/=", "%=", "&=", "<<=", ">>=", "", "", - ";", ":", ".", ",", "~", "&&", "||", "!", - "=", "++", "--", - "new", "fn", "struct", "var", "enum", - "if", "else", "while", - "break", "continue", "return", - "for", "switch", "case", - "true", "false", "nil", - "<ID>", "<NUM>", "<STR>", - "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", -}; - var lextab [256]u8 = { tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, @@ -211,7 +273,7 @@ fn unhex(ch u32) i32 { return -1; } -fn scan() u32 { +fn scan() Token { var ch i32 = readc(0); if (ch < 0) { ctx.cc = 0; @@ -247,7 +309,7 @@ fn unescape(n u32) u32 { } } -fn scan_string(cc u32, nc u32) u32 { +fn scan_string(cc u32, nc u32) Token { var n u32 = 0; while (true) { if (nc == '"') { @@ -270,7 +332,7 @@ fn scan_string(cc u32, nc u32) u32 { return tSTR; } -fn scan_keyword(len u32) u32 { +fn scan_keyword(len u32) Token { ctx.tmp[len] = 0; var idn String = string_make(ctx.tmp, len); ctx.ident = idn; @@ -302,7 +364,7 @@ fn scan_keyword(len u32) u32 { return tIDN; } -fn scan_number(cc u32, nc u32) u32 { +fn scan_number(cc u32, nc u32) Token { var n u32 = 1; var val u32 = cc - '0'; @@ -345,12 +407,12 @@ fn scan_number(cc u32, nc u32) u32 { return tNUM; } -fn scan_ident(cc u32, nc u32) u32 { +fn scan_ident(cc u32, nc u32) Token { ctx.tmp[0] = cc; var n u32 = 1; while (true) { - var tok u32 = lextab[nc]; + var tok Token = lextab[nc]; if ((tok == tIDN) || (tok == tNUM)) { ctx.tmp[n] = nc; n++; @@ -363,12 +425,12 @@ fn scan_ident(cc u32, nc u32) u32 { return scan_keyword(n); } -fn _next() u32 { +fn _next() Token { var nc u8 = ctx.cc; while (1) { var cc u8 = nc; nc = scan(); - var tok u32 = lextab[cc]; + var tok Token = lextab[cc]; if (tok == tNUM) { // 0..9 return scan_number(cc, nc); } else if (tok == tIDN) { // _ A..Z a..z @@ -436,7 +498,6 @@ fn _next() u32 { } } - fn token_printstr(fd i32) { var n u32 = 0; writec(fd, '"'); @@ -474,7 +535,7 @@ fn token_print(fd i32) { writec(fd, ' '); } -fn next() u32 { +fn next() Token { ctx.tok = _next(); return ctx.tok; }