spl

systems programming language
git clone http://frotz.net/git/spl.git
Log | Files | Refs | README | LICENSE

types.spl (11423B)


      1 // Copyright 2023, Brian Swetland <swetland@frotz.net>
      2 // Licensed under the Apache License, Version 2.0.
      3 
      4 // ================================================================
      5 // data types
      6 
      7 struct String {
      8 	next *String,
      9 	len u32,
     10 	text [256]u8,
     11 };
     12 
     13 enum SymbolKind {
     14 	SYMBOL_VAR,
     15 	SYMBOL_FLD, // struct field
     16 	SYMBOL_PTR, // struct *field
     17 	SYMBOL_DEF, // enum
     18 	SYMBOL_FN,
     19 };
     20 
     21 struct Symbol {
     22 	next *Symbol,
     23 	name *String,
     24 	type *Type,
     25 	kind SymbolKind,
     26 };
     27 
     28 enum ScopeKind {
     29 	SCOPE_GLOBAL,
     30 	SCOPE_FUNC,
     31 	SCOPE_BLOCK,
     32 	SCOPE_LOOP,
     33 	SCOPE_STRUCT,
     34 };
     35 
     36 struct Scope {
     37 	parent *Scope,
     38 	first *Symbol,
     39 	last *Symbol,
     40 	kind ScopeKind,
     41 };
     42 
     43 enum TypeKind {
     44 	TYPE_VOID,
     45 	TYPE_BOOL,
     46 	TYPE_U8,
     47 	TYPE_U32,
     48 	TYPE_I32,
     49 	TYPE_NIL,
     50 	TYPE_POINTER,
     51 	TYPE_ARRAY,
     52 	TYPE_SLICE,
     53 	TYPE_STR,
     54 	TYPE_STRUCT,
     55 	TYPE_FN,
     56 	TYPE_ENUM,
     57 	TYPE_UNDEFINED,
     58 };
     59 
     60 struct Type {
     61 	next *Type,
     62 	name *String,
     63 	of *Type,      // for slice, array, ptr, fn (return type)
     64 	list *Symbol,  // for struct (fields), fn (params)
     65 	kind TypeKind,
     66 	count u32,
     67 };
     68 
     69 // ================================================================
     70 // Abstract Syntax Tree
     71 
     72 enum AstKind {
     73 // top node
     74 	AST_PROGRAM,  // l=FUNC*
     75 // program components (chained into a list by next)
     76 	AST_FUNC,     // l=BLOCK
     77 // container of statements
     78 	AST_BLOCK,    // l=STMT*
     79 // statements (chained into a list by next)
     80 	AST_EXPR,     // l=EXPR
     81 	AST_WHILE,    // l=EXPR r=BLOCK
     82 	AST_BREAK,
     83 	AST_CONTINUE,
     84 	AST_RETURN,   // l=EXPR
     85 	AST_IF,       // l=CASE*
     86 // sub-parts of if
     87 	AST_CASE,     // l=EXPR r=BLOCK
     88 	AST_ELSE,     // l=BLOCK
     89 // expressions
     90 	AST_SYMBOL,
     91 	AST_CONST,    // numeric constant
     92 	AST_STRING,   // string constant
     93 	AST_DEREF,    // l=EXPR type: pointer-to-...
     94 	AST_INDEX,    // l=EXPR type: array-of-...  r=EXPR index
     95 	AST_FIELD,    // l=EXPR type: struct        r=SYMBOL field
     96 	AST_ADDROF,   // l=EXPR type: lvalue
     97 	AST_CALL,     // l=NAME r=EXPR*
     98 	AST_ASSIGN,   // l=lhsEXPR r=rhsEXPR
     99 	AST_NEW,      // l=TYPE
    100 // binary expressions
    101 	// Rel Ops (maintain order matched w/ lexer)
    102 	AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE,
    103 	// Add Ops (maintain order matched w/ lexer)
    104 	AST_ADD, AST_SUB, AST_OR, AST_XOR,
    105 	// Mul Ops (maintain order matched w/ lexer)
    106 	AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR,
    107 	// uncategorized ops
    108 	AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG,
    109 	AST_KIND_COUNT,
    110 };
    111 
    112 var ast_kind []str = {
    113 	"PROGRAM", "FUNC",
    114 	"BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE",
    115 	"RETURN", "IF", "CASE", "ELSE",
    116 	"SYMBOL", "CONST", "STRING",
    117 	"DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW",
    118 	"EQ", "NE", "LT", "LE", "GT", "GE",
    119 	"ADD", "SUB", "OR", "XOR",
    120 	"MUL", "DIV", "MOD", "AND", "LSL", "LSR",
    121 	"NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG",
    122 };
    123 
    124 fn ast_is_relop(kind AstKind) bool {
    125 	return (kind >= AST_EQ) && (kind <= AST_GE);
    126 }
    127 
    128 fn ast_is_addop(kind AstKind) bool {
    129 	return (kind >= AST_ADD) && (kind <= AST_XOR);
    130 }
    131 
    132 fn ast_is_mulop(kind AstKind) bool {
    133 	return (kind >= AST_MUL) && (kind <= AST_LSR);
    134 }
    135 
    136 fn ast_is_binop(kind AstKind) bool {
    137 	return (kind >= AST_EQ) && (kind <= AST_LSR);
    138 }
    139 
    140 fn ast_is_expr(kind AstKind) bool {
    141 	return (kind >= AST_SYMBOL);
    142 }
    143 
    144 fn ast_is_stmt(kind AstKind) bool {
    145 	return (kind >= AST_EXPR) && (kind <= AST_ELSE);
    146 }
    147 
    148 struct Ast {
    149 	kind AstKind,
    150 
    151 	left *Ast,
    152 	right *Ast,
    153 	next *Ast, // intrusive list
    154 
    155 	ival u32,
    156 	name *String,
    157 	sym *Symbol,
    158 	type *Type,
    159 
    160 	srcloc u32, // linenumber for now
    161 };
    162 
    163 
    164 // ================================================================
    165 // lexical scanner tokens
    166 
    167 // token classes (tok & tcMASK)
    168 enum TokenClass{
    169 	tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
    170 	tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
    171 };
    172 
    173 enum Token {
    174 	// EndMarks, Braces, Brackets Parens
    175 	tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
    176 	// RelOps (do not reorder)
    177 	tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
    178 	// AddOps (do not reorder)
    179 	tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
    180 	// MulOps (do not reorder)
    181 	tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F,
    182 	// AsnOps (do not reorder)
    183 	tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
    184 	tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F,
    185 	// Various, UnaryNot, LogicalOps,
    186 	tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
    187 	tASSIGN, tINC, tDEC,
    188 	tAT,
    189 	// Keywords
    190 	tNEW, tFN, tSTRUCT, tVAR, tENUM,
    191 	tIF, tELSE, tWHILE,
    192 	tBREAK, tCONTINUE, tRETURN,
    193 	tFOR, tSWITCH, tCASE,
    194 	tTRUE, tFALSE, tNIL,
    195 	tIDN, tNUM, tSTR,
    196 	// used internal to the lexer but never returned
    197 	tSPC, tINV, tDQT, tSQT, tMSC,
    198 };
    199 
    200 var tnames []str = {
    201 	"<EOF>", "<EOL>", "{",  "}",  "[",   "]",   "(",   ")",
    202 	"==",    "!=",    "<",  "<=", ">",   ">=",  "",    "",
    203 	"+",     "-",     "|",  "^",  "",    "",    "",    "",
    204 	"*",     "/",     "%",  "&",  "<<",  ">>",  "",    "",
    205 	"+=",    "-=",    "|=", "^=", "",    "",    "",    "",
    206 	"*=",    "/=",    "%=", "&=", "<<=", ">>=", "",    "",
    207 	";",     ":",     ".",  ",",  "~",   "&&",  "||",  "!",
    208 	"=",     "++",    "--",
    209 	"@",
    210 	"new", "fn", "struct", "var", "enum",
    211 	"if", "else", "while",
    212 	"break", "continue", "return",
    213 	"for", "switch", "case",
    214 	"true", "false", "nil",
    215 	"<ID>", "<NUM>", "<STR>",
    216 	"<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>",
    217 };
    218 
    219 
    220 // ================================================================
    221 // lexer / parser / compiler context
    222 
    223 struct Context {
    224 	filename str,		// filename of active source
    225 	outname str,		// base name for output files
    226 
    227 	fd_in i32,		// current input file
    228 	linenumber u32,		// line number of most recent line
    229 	lineoffset u32,		// position of start of most recent line
    230 	byteoffset u32,		// position of the most recent character
    231 	flags u32,
    232 	cc u32,			// scanner: next character
    233 
    234 	tok Token,		// most recent token
    235 	num u32,		// for tNUM
    236 	tmp [256]u8,		// for tIDN, tSTR
    237 	ident *String,		// for tSTR
    238 
    239 	stringlist *String,	// intern table
    240 	typelist *Type,		// all types
    241 
    242 	scope *Scope,		// top of Scope stack
    243 	global *Scope,		// the global scope
    244 	cur_fn *Symbol,		// fn being parsed
    245 
    246 	program *Ast,
    247 	last *Ast,
    248 
    249 	idn_if *String,		// identifier strings
    250 	idn_fn *String,
    251 	idn_for *String,
    252 	idn_var *String,
    253 	idn_nil *String,
    254 	idn_new *String,
    255 	idn_case *String,
    256 	idn_else *String,
    257 	idn_enum *String,
    258 	idn_true *String,
    259 	idn_break *String,
    260 	idn_while *String,
    261 	idn_false *String,
    262 	idn_switch *String,
    263 	idn_struct *String,
    264 	idn_return *String,
    265 	idn_continue *String,
    266 
    267 	type_void *Type,
    268 	type_bool *Type,
    269 	type_str *Type,
    270 	type_nil *Type,
    271 	type_u32 *Type,
    272 	type_i32 *Type,
    273 	type_u8 *Type,
    274 };
    275 
    276 var ctx Context;
    277 
    278 fn string_make(text str, len u32) String {
    279 	var s String = ctx.stringlist;
    280 	while s != nil {
    281 		if (s.len == len) && strneq(text, s.text, len) {
    282 			return s;
    283 		}
    284 		s = s.next;
    285 	}
    286 	s = new(String);
    287 	s.len = len;
    288 	strcpyn(s.text, text, len + 1);
    289 	s.next = ctx.stringlist;
    290 	ctx.stringlist = s;
    291 	return s;
    292 }
    293 
    294 fn scope_push(kind ScopeKind) Scope {
    295 	var scope Scope = new(Scope);
    296 	scope.first = nil;
    297 	scope.last = nil;
    298 	scope.parent = ctx.scope;
    299 	scope.kind = kind;
    300 	ctx.scope = scope;
    301 	return scope;
    302 }
    303 
    304 // returns symbol list for the popped scope
    305 fn scope_pop() Symbol {
    306 	var scope Scope = ctx.scope;
    307 	ctx.scope = scope.parent;
    308 	return scope.first;
    309 }
    310 
    311 fn scope_find(kind ScopeKind) Scope {
    312 	var scope Scope = ctx.scope;
    313 	while scope != nil {
    314 		if scope.kind == kind {
    315 			return scope;
    316 		}
    317 		scope = scope.parent;
    318 	}
    319 	return nil;
    320 }
    321 
    322 fn symbol_find_in(name String, scope Scope) Symbol {
    323 	var sym Symbol = scope.first;
    324 	while sym != nil {
    325 		if sym.name == name {
    326 			return sym;
    327 		}
    328 		sym = sym.next;
    329 	}
    330 	return nil;
    331 }
    332 
    333 // find the first surrounding scope of a specified kind
    334 fn symbol_find(name String) Symbol {
    335 	var scope Scope = ctx.scope;
    336 	while scope != nil {
    337 		var sym Symbol = symbol_find_in(name, scope);
    338 		if sym != nil {
    339 			return sym;
    340 		}
    341 		scope = scope.parent;
    342 	}
    343 	return nil;
    344 }
    345 
    346 fn symbol_make_in_scope(name String, type Type, scope Scope) Symbol {
    347 	var sym Symbol = new(Symbol);
    348 	sym.name = name;
    349 	sym.type = type;
    350 	sym.next = nil;
    351 	sym.kind = SYMBOL_VAR;
    352 	if scope.first == nil {
    353 		scope.first = sym;
    354 	} else {
    355 		scope.last.next = sym;
    356 	}
    357 	scope.last = sym;
    358 	return sym;
    359 }
    360 
    361 fn symbol_make_global(name String, type Type) Symbol {
    362 	return symbol_make_in_scope(name, type, ctx.global);
    363 }
    364 
    365 fn symbol_make(name String, type Type) Symbol {
    366 	return symbol_make_in_scope(name, type, ctx.scope);
    367 }
    368 
    369 fn type_make(name String, kind TypeKind, of Type, list Symbol, count u32) Type {
    370 	var type Type = new(Type);
    371 	type.name = name;
    372 	type.of = of;
    373 	type.list = list;
    374 	type.kind = kind;
    375 	type.count = count;
    376 	if name != nil {
    377 		type.next = ctx.typelist;
    378 		ctx.typelist = type;
    379 	} else {
    380 		type.next = nil;
    381 	}
    382 	return type;
    383 }
    384 
    385 fn type_find(name String) Type {
    386 	var t Type = ctx.typelist;
    387 	while t != nil {
    388 		if t.name == name {
    389 			return t;
    390 		}
    391 		t = t.next;
    392 	}
    393 	return nil;
    394 }
    395 
    396 fn type_find_field(type Type, name String) Symbol {
    397 	if type.kind != TYPE_STRUCT {
    398 		error("not a struct");
    399 	}
    400 	var s Symbol = type.list;
    401 	while s != nil {
    402 		if s.name == name {
    403 			return s;
    404 		}
    405 		s = s.next;
    406 	}
    407 	error("struct has no such field '", @str name.text, "'");
    408 	return nil;
    409 }
    410 
    411 fn ast_make(kind AstKind, ival u32, name String, sym Symbol, type Type) Ast {
    412 	var node Ast = new(Ast);
    413 	node.kind = kind;
    414 	node.ival = ival;
    415 	node.name = name;
    416 	node.sym = sym;
    417 	node.type = type;
    418 	node.srcloc = ctx.linenumber;
    419 	return node;
    420 }
    421 
    422 fn ast_make_lr(kind AstKind, left Ast, right Ast) Ast {
    423 	var node Ast = ast_make(kind, 0, nil, nil, nil);
    424 	node.left = left;
    425 	node.right = right;
    426 	return node;
    427 }
    428 
    429 fn ast_make_l(kind AstKind, child Ast) Ast {
    430 	var node Ast = ast_make(kind, 0, nil, nil, nil);
    431 	node.left = child;
    432 	return node;
    433 }
    434 
    435 fn ast_make_simple(kind AstKind, x u32) Ast {
    436 	return ast_make(kind, x, nil, nil, nil);
    437 }
    438 
    439 fn ast_make_const(value u32, type Type) Ast {
    440 	return ast_make(AST_CONST, value, nil, nil, type);
    441 }
    442 
    443 fn ast_make_symbol(name String, sym Symbol) Ast {
    444 	// TODO maybe handle nil sym differently?
    445 	var type Type = nil;
    446 	if sym != nil {
    447 		type = sym.type;
    448 	}
    449 	return ast_make(AST_SYMBOL, 0, name, sym, type);
    450 }
    451 
    452 fn ctx_init() {
    453 	ctx = new(Context);
    454 
    455 	ctx.idn_if       = string_make("if", 2);
    456 	ctx.idn_fn       = string_make("fn", 2);
    457 	ctx.idn_for      = string_make("for", 3);
    458 	ctx.idn_var      = string_make("var", 3);
    459 	ctx.idn_nil      = string_make("nil", 3);
    460 	ctx.idn_new      = string_make("new", 3);
    461 	ctx.idn_case     = string_make("case", 4);
    462 	ctx.idn_else     = string_make("else", 4);
    463 	ctx.idn_enum     = string_make("enum", 4);
    464 	ctx.idn_true     = string_make("true", 4);
    465 	ctx.idn_break    = string_make("break", 5);
    466 	ctx.idn_while    = string_make("while", 5);
    467 	ctx.idn_false    = string_make("false", 5);
    468 	ctx.idn_switch   = string_make("switch", 6);
    469 	ctx.idn_struct   = string_make("struct", 6);
    470 	ctx.idn_return   = string_make("return", 6);
    471 	ctx.idn_continue = string_make("continue", 8);
    472 
    473 	ctx.type_void    = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0);
    474 	ctx.type_bool    = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0);
    475 	ctx.type_nil     = type_make(string_make("nil", 3),  TYPE_NIL,  nil, nil, 0);
    476 	ctx.type_str     = type_make(string_make("str", 3),  TYPE_STR,  nil, nil, 0);
    477 	ctx.type_u32     = type_make(string_make("u32", 3),  TYPE_U32,  nil, nil, 0);
    478 	ctx.type_i32     = type_make(string_make("i32", 3),  TYPE_I32,  nil, nil, 0);
    479 	ctx.type_u8      = type_make(string_make("u8", 2),   TYPE_U8,   nil, nil, 0);
    480 
    481 	scope_push(SCOPE_GLOBAL);
    482 	ctx.global = ctx.scope;
    483 
    484 	ctx.linenumber = 1;
    485 	ctx.filename = "<stdin>";
    486 }
    487