types.spl (11423B)
1 // Copyright 2023, Brian Swetland <swetland@frotz.net> 2 // Licensed under the Apache License, Version 2.0. 3 4 // ================================================================ 5 // data types 6 7 struct String { 8 next *String, 9 len u32, 10 text [256]u8, 11 }; 12 13 enum SymbolKind { 14 SYMBOL_VAR, 15 SYMBOL_FLD, // struct field 16 SYMBOL_PTR, // struct *field 17 SYMBOL_DEF, // enum 18 SYMBOL_FN, 19 }; 20 21 struct Symbol { 22 next *Symbol, 23 name *String, 24 type *Type, 25 kind SymbolKind, 26 }; 27 28 enum ScopeKind { 29 SCOPE_GLOBAL, 30 SCOPE_FUNC, 31 SCOPE_BLOCK, 32 SCOPE_LOOP, 33 SCOPE_STRUCT, 34 }; 35 36 struct Scope { 37 parent *Scope, 38 first *Symbol, 39 last *Symbol, 40 kind ScopeKind, 41 }; 42 43 enum TypeKind { 44 TYPE_VOID, 45 TYPE_BOOL, 46 TYPE_U8, 47 TYPE_U32, 48 TYPE_I32, 49 TYPE_NIL, 50 TYPE_POINTER, 51 TYPE_ARRAY, 52 TYPE_SLICE, 53 TYPE_STR, 54 TYPE_STRUCT, 55 TYPE_FN, 56 TYPE_ENUM, 57 TYPE_UNDEFINED, 58 }; 59 60 struct Type { 61 next *Type, 62 name *String, 63 of *Type, // for slice, array, ptr, fn (return type) 64 list *Symbol, // for struct (fields), fn (params) 65 kind TypeKind, 66 count u32, 67 }; 68 69 // ================================================================ 70 // Abstract Syntax Tree 71 72 enum AstKind { 73 // top node 74 AST_PROGRAM, // l=FUNC* 75 // program components (chained into a list by next) 76 AST_FUNC, // l=BLOCK 77 // container of statements 78 AST_BLOCK, // l=STMT* 79 // statements (chained into a list by next) 80 AST_EXPR, // l=EXPR 81 AST_WHILE, // l=EXPR r=BLOCK 82 AST_BREAK, 83 AST_CONTINUE, 84 AST_RETURN, // l=EXPR 85 AST_IF, // l=CASE* 86 // sub-parts of if 87 AST_CASE, // l=EXPR r=BLOCK 88 AST_ELSE, // l=BLOCK 89 // expressions 90 AST_SYMBOL, 91 AST_CONST, // numeric constant 92 AST_STRING, // string constant 93 AST_DEREF, // l=EXPR type: pointer-to-... 94 AST_INDEX, // l=EXPR type: array-of-... r=EXPR index 95 AST_FIELD, // l=EXPR type: struct r=SYMBOL field 96 AST_ADDROF, // l=EXPR type: lvalue 97 AST_CALL, // l=NAME r=EXPR* 98 AST_ASSIGN, // l=lhsEXPR r=rhsEXPR 99 AST_NEW, // l=TYPE 100 // binary expressions 101 // Rel Ops (maintain order matched w/ lexer) 102 AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE, 103 // Add Ops (maintain order matched w/ lexer) 104 AST_ADD, AST_SUB, AST_OR, AST_XOR, 105 // Mul Ops (maintain order matched w/ lexer) 106 AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR, 107 // uncategorized ops 108 AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG, 109 AST_KIND_COUNT, 110 }; 111 112 var ast_kind []str = { 113 "PROGRAM", "FUNC", 114 "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE", 115 "RETURN", "IF", "CASE", "ELSE", 116 "SYMBOL", "CONST", "STRING", 117 "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", "NEW", 118 "EQ", "NE", "LT", "LE", "GT", "GE", 119 "ADD", "SUB", "OR", "XOR", 120 "MUL", "DIV", "MOD", "AND", "LSL", "LSR", 121 "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG", 122 }; 123 124 fn ast_is_relop(kind AstKind) bool { 125 return (kind >= AST_EQ) && (kind <= AST_GE); 126 } 127 128 fn ast_is_addop(kind AstKind) bool { 129 return (kind >= AST_ADD) && (kind <= AST_XOR); 130 } 131 132 fn ast_is_mulop(kind AstKind) bool { 133 return (kind >= AST_MUL) && (kind <= AST_LSR); 134 } 135 136 fn ast_is_binop(kind AstKind) bool { 137 return (kind >= AST_EQ) && (kind <= AST_LSR); 138 } 139 140 fn ast_is_expr(kind AstKind) bool { 141 return (kind >= AST_SYMBOL); 142 } 143 144 fn ast_is_stmt(kind AstKind) bool { 145 return (kind >= AST_EXPR) && (kind <= AST_ELSE); 146 } 147 148 struct Ast { 149 kind AstKind, 150 151 left *Ast, 152 right *Ast, 153 next *Ast, // intrusive list 154 155 ival u32, 156 name *String, 157 sym *Symbol, 158 type *Type, 159 160 srcloc u32, // linenumber for now 161 }; 162 163 164 // ================================================================ 165 // lexical scanner tokens 166 167 // token classes (tok & tcMASK) 168 enum TokenClass{ 169 tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, 170 tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, 171 }; 172 173 enum Token { 174 // EndMarks, Braces, Brackets Parens 175 tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, 176 // RelOps (do not reorder) 177 tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, 178 // AddOps (do not reorder) 179 tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, 180 // MulOps (do not reorder) 181 tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, 182 // AsnOps (do not reorder) 183 tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, 184 tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, 185 // Various, UnaryNot, LogicalOps, 186 tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, 187 tASSIGN, tINC, tDEC, 188 tAT, 189 // Keywords 190 tNEW, tFN, tSTRUCT, tVAR, tENUM, 191 tIF, tELSE, tWHILE, 192 tBREAK, tCONTINUE, tRETURN, 193 tFOR, tSWITCH, tCASE, 194 tTRUE, tFALSE, tNIL, 195 tIDN, tNUM, tSTR, 196 // used internal to the lexer but never returned 197 tSPC, tINV, tDQT, tSQT, tMSC, 198 }; 199 200 var tnames []str = { 201 "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", 202 "==", "!=", "<", "<=", ">", ">=", "", "", 203 "+", "-", "|", "^", "", "", "", "", 204 "*", "/", "%", "&", "<<", ">>", "", "", 205 "+=", "-=", "|=", "^=", "", "", "", "", 206 "*=", "/=", "%=", "&=", "<<=", ">>=", "", "", 207 ";", ":", ".", ",", "~", "&&", "||", "!", 208 "=", "++", "--", 209 "@", 210 "new", "fn", "struct", "var", "enum", 211 "if", "else", "while", 212 "break", "continue", "return", 213 "for", "switch", "case", 214 "true", "false", "nil", 215 "<ID>", "<NUM>", "<STR>", 216 "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", 217 }; 218 219 220 // ================================================================ 221 // lexer / parser / compiler context 222 223 struct Context { 224 filename str, // filename of active source 225 outname str, // base name for output files 226 227 fd_in i32, // current input file 228 linenumber u32, // line number of most recent line 229 lineoffset u32, // position of start of most recent line 230 byteoffset u32, // position of the most recent character 231 flags u32, 232 cc u32, // scanner: next character 233 234 tok Token, // most recent token 235 num u32, // for tNUM 236 tmp [256]u8, // for tIDN, tSTR 237 ident *String, // for tSTR 238 239 stringlist *String, // intern table 240 typelist *Type, // all types 241 242 scope *Scope, // top of Scope stack 243 global *Scope, // the global scope 244 cur_fn *Symbol, // fn being parsed 245 246 program *Ast, 247 last *Ast, 248 249 idn_if *String, // identifier strings 250 idn_fn *String, 251 idn_for *String, 252 idn_var *String, 253 idn_nil *String, 254 idn_new *String, 255 idn_case *String, 256 idn_else *String, 257 idn_enum *String, 258 idn_true *String, 259 idn_break *String, 260 idn_while *String, 261 idn_false *String, 262 idn_switch *String, 263 idn_struct *String, 264 idn_return *String, 265 idn_continue *String, 266 267 type_void *Type, 268 type_bool *Type, 269 type_str *Type, 270 type_nil *Type, 271 type_u32 *Type, 272 type_i32 *Type, 273 type_u8 *Type, 274 }; 275 276 var ctx Context; 277 278 fn string_make(text str, len u32) String { 279 var s String = ctx.stringlist; 280 while s != nil { 281 if (s.len == len) && strneq(text, s.text, len) { 282 return s; 283 } 284 s = s.next; 285 } 286 s = new(String); 287 s.len = len; 288 strcpyn(s.text, text, len + 1); 289 s.next = ctx.stringlist; 290 ctx.stringlist = s; 291 return s; 292 } 293 294 fn scope_push(kind ScopeKind) Scope { 295 var scope Scope = new(Scope); 296 scope.first = nil; 297 scope.last = nil; 298 scope.parent = ctx.scope; 299 scope.kind = kind; 300 ctx.scope = scope; 301 return scope; 302 } 303 304 // returns symbol list for the popped scope 305 fn scope_pop() Symbol { 306 var scope Scope = ctx.scope; 307 ctx.scope = scope.parent; 308 return scope.first; 309 } 310 311 fn scope_find(kind ScopeKind) Scope { 312 var scope Scope = ctx.scope; 313 while scope != nil { 314 if scope.kind == kind { 315 return scope; 316 } 317 scope = scope.parent; 318 } 319 return nil; 320 } 321 322 fn symbol_find_in(name String, scope Scope) Symbol { 323 var sym Symbol = scope.first; 324 while sym != nil { 325 if sym.name == name { 326 return sym; 327 } 328 sym = sym.next; 329 } 330 return nil; 331 } 332 333 // find the first surrounding scope of a specified kind 334 fn symbol_find(name String) Symbol { 335 var scope Scope = ctx.scope; 336 while scope != nil { 337 var sym Symbol = symbol_find_in(name, scope); 338 if sym != nil { 339 return sym; 340 } 341 scope = scope.parent; 342 } 343 return nil; 344 } 345 346 fn symbol_make_in_scope(name String, type Type, scope Scope) Symbol { 347 var sym Symbol = new(Symbol); 348 sym.name = name; 349 sym.type = type; 350 sym.next = nil; 351 sym.kind = SYMBOL_VAR; 352 if scope.first == nil { 353 scope.first = sym; 354 } else { 355 scope.last.next = sym; 356 } 357 scope.last = sym; 358 return sym; 359 } 360 361 fn symbol_make_global(name String, type Type) Symbol { 362 return symbol_make_in_scope(name, type, ctx.global); 363 } 364 365 fn symbol_make(name String, type Type) Symbol { 366 return symbol_make_in_scope(name, type, ctx.scope); 367 } 368 369 fn type_make(name String, kind TypeKind, of Type, list Symbol, count u32) Type { 370 var type Type = new(Type); 371 type.name = name; 372 type.of = of; 373 type.list = list; 374 type.kind = kind; 375 type.count = count; 376 if name != nil { 377 type.next = ctx.typelist; 378 ctx.typelist = type; 379 } else { 380 type.next = nil; 381 } 382 return type; 383 } 384 385 fn type_find(name String) Type { 386 var t Type = ctx.typelist; 387 while t != nil { 388 if t.name == name { 389 return t; 390 } 391 t = t.next; 392 } 393 return nil; 394 } 395 396 fn type_find_field(type Type, name String) Symbol { 397 if type.kind != TYPE_STRUCT { 398 error("not a struct"); 399 } 400 var s Symbol = type.list; 401 while s != nil { 402 if s.name == name { 403 return s; 404 } 405 s = s.next; 406 } 407 error("struct has no such field '", @str name.text, "'"); 408 return nil; 409 } 410 411 fn ast_make(kind AstKind, ival u32, name String, sym Symbol, type Type) Ast { 412 var node Ast = new(Ast); 413 node.kind = kind; 414 node.ival = ival; 415 node.name = name; 416 node.sym = sym; 417 node.type = type; 418 node.srcloc = ctx.linenumber; 419 return node; 420 } 421 422 fn ast_make_lr(kind AstKind, left Ast, right Ast) Ast { 423 var node Ast = ast_make(kind, 0, nil, nil, nil); 424 node.left = left; 425 node.right = right; 426 return node; 427 } 428 429 fn ast_make_l(kind AstKind, child Ast) Ast { 430 var node Ast = ast_make(kind, 0, nil, nil, nil); 431 node.left = child; 432 return node; 433 } 434 435 fn ast_make_simple(kind AstKind, x u32) Ast { 436 return ast_make(kind, x, nil, nil, nil); 437 } 438 439 fn ast_make_const(value u32, type Type) Ast { 440 return ast_make(AST_CONST, value, nil, nil, type); 441 } 442 443 fn ast_make_symbol(name String, sym Symbol) Ast { 444 // TODO maybe handle nil sym differently? 445 var type Type = nil; 446 if sym != nil { 447 type = sym.type; 448 } 449 return ast_make(AST_SYMBOL, 0, name, sym, type); 450 } 451 452 fn ctx_init() { 453 ctx = new(Context); 454 455 ctx.idn_if = string_make("if", 2); 456 ctx.idn_fn = string_make("fn", 2); 457 ctx.idn_for = string_make("for", 3); 458 ctx.idn_var = string_make("var", 3); 459 ctx.idn_nil = string_make("nil", 3); 460 ctx.idn_new = string_make("new", 3); 461 ctx.idn_case = string_make("case", 4); 462 ctx.idn_else = string_make("else", 4); 463 ctx.idn_enum = string_make("enum", 4); 464 ctx.idn_true = string_make("true", 4); 465 ctx.idn_break = string_make("break", 5); 466 ctx.idn_while = string_make("while", 5); 467 ctx.idn_false = string_make("false", 5); 468 ctx.idn_switch = string_make("switch", 6); 469 ctx.idn_struct = string_make("struct", 6); 470 ctx.idn_return = string_make("return", 6); 471 ctx.idn_continue = string_make("continue", 8); 472 473 ctx.type_void = type_make(string_make("void", 4), TYPE_VOID, nil, nil, 0); 474 ctx.type_bool = type_make(string_make("bool", 4), TYPE_BOOL, nil, nil, 0); 475 ctx.type_nil = type_make(string_make("nil", 3), TYPE_NIL, nil, nil, 0); 476 ctx.type_str = type_make(string_make("str", 3), TYPE_STR, nil, nil, 0); 477 ctx.type_u32 = type_make(string_make("u32", 3), TYPE_U32, nil, nil, 0); 478 ctx.type_i32 = type_make(string_make("i32", 3), TYPE_I32, nil, nil, 0); 479 ctx.type_u8 = type_make(string_make("u8", 2), TYPE_U8, nil, nil, 0); 480 481 scope_push(SCOPE_GLOBAL); 482 ctx.global = ctx.scope; 483 484 ctx.linenumber = 1; 485 ctx.filename = "<stdin>"; 486 } 487