compiler2.c (54119B)
1 // Copyright 2020, Brian Swetland <swetland@frotz.net> 2 // Licensed under the Apache License, Version 2.0. 3 4 #if C 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdarg.h> 8 #include <stdint.h> 9 #include <stdbool.h> 10 #include <strings.h> 11 #include <string.h> 12 13 #include <fcntl.h> 14 #include <unistd.h> 15 #include <sys/stat.h> 16 17 // builtin types 18 #define nil 0 19 typedef uint32_t u32; 20 typedef int32_t i32; 21 typedef uint8_t u8; 22 23 // rewriter only handles single word types 24 typedef uint32_t token_t; 25 typedef uint32_t ast_t; 26 typedef uint32_t type_t; 27 typedef uint32_t symbol_t; 28 typedef uint32_t scope_t; 29 typedef char* str; 30 typedef char** args; 31 typedef uint32_t* u32ptr; 32 33 void error(const char *fmt, ...); 34 #endif 35 36 // ------------------------------------------------------------------ 37 // structures 38 39 typedef struct StringRec StringRec; 40 typedef struct CtxRec CtxRec; 41 typedef struct AstRec AstRec; 42 typedef struct TypeRec TypeRec; 43 typedef struct SymbolRec SymbolRec; 44 typedef struct ScopeRec ScopeRec; 45 typedef struct FixupRec FixupRec; 46 47 typedef struct StringRec* String; 48 typedef struct CtxRec* Ctx; 49 typedef struct AstRec* Ast; 50 typedef struct TypeRec* Type; 51 typedef struct SymbolRec* Symbol; 52 typedef struct ScopeRec* Scope; 53 typedef struct FixupRec* Fixup; 54 55 struct StringRec { 56 String next; 57 u32 len; 58 char text[0]; 59 }; 60 61 enum { 62 // top node 63 AST_PROGRAM, // c2=FUNC* 64 // program components (chained into a list by c2) 65 AST_FUNC, // c0=BLOCK 66 // container of statements 67 AST_BLOCK, // c0=STMT 68 // statements (chained into a list by c2) 69 AST_EXPR, // c0=EXPR 70 AST_WHILE, // c0=EXPR c1=BLOCK 71 AST_BREAK, 72 AST_CONTINUE, 73 AST_RETURN, // c0=EXPR 74 AST_IF, // c0=IFELSE 75 // sub-part of if 76 AST_IFELSE, // c0=EXPR c1=BLOCKthen c2=BLOCKelse|IFELSE 77 // expression parts 78 AST_SYMBOL, 79 AST_CONST, 80 AST_STRING, 81 AST_DEREF, // c0=EXPR type: pointer-to-... 82 AST_INDEX, // c0=EXPR type: array-of-... c1=EXPR index 83 AST_FIELD, // c0=EXPR type: struct c1=SYMBOL field 84 AST_ADDROF, // c0=EXPR type: lvalue 85 AST_CALL, // c0=NAME c2=EXPR* 86 AST_ASSIGN, 87 88 // Rel Ops (maintain order matched w/ lexer) 89 AST_EQ, AST_NE, AST_LT, AST_LE, AST_GT, AST_GE, 90 // Add Ops (maintain order matched w/ lexer) 91 AST_ADD, AST_SUB, AST_OR, AST_XOR, 92 // Mul Ops (maintain order matched w/ lexer) 93 AST_MUL, AST_DIV, AST_MOD, AST_AND, AST_LSL, AST_LSR, 94 // uncategorized ops 95 AST_NOT, AST_BOOL_AND, AST_BOOL_OR, AST_BOOL_NOT, AST_NEG, 96 AST_KIND_COUNT 97 }; 98 99 bool ast_kind_is_relop(u32 kind) { 100 return (kind >= AST_EQ) && (kind <= AST_GE); 101 } 102 bool ast_kind_is_addop(u32 kind) { 103 return (kind >= AST_ADD) && (kind <= AST_XOR); 104 } 105 bool ast_kind_is_mulop(u32 kind) { 106 return (kind >= AST_MUL) && (kind <= AST_LSR); 107 } 108 bool ast_kind_is_binop(u32 kind) { 109 return (kind >= AST_EQ) && (kind <= AST_LSR); 110 } 111 112 str ast_kind[AST_KIND_COUNT] = { 113 "PROGRAM", "FUNC", 114 "BLOCK", "EXPR", "WHILE", "BREAK", "CONTINUE", 115 "RETURN", "IF", "IFELSE", 116 "SYMBOL", "CONST", "STR", 117 "DEREF", "INDEX", "FIELD", "ADDROF", "CALL", "ASSIGN", 118 "EQ", "NE", "LT", "LE", "GT", "GE", 119 "ADD", "SUB", "OR", "XOR", 120 "MUL", "DIV", "MOD", "AND", "LSL", "LSR", 121 "NOT", "BOOL AND", "BOOL OR", "BOOL NOT", "NEG", 122 }; 123 124 struct AstRec { 125 ast_t kind; 126 Ast c0; // left 127 Ast c1; // right 128 Ast c2; // third or listhead (chained on c2 fields) 129 130 u32 ival; 131 String name; 132 Symbol sym; 133 Type type; 134 135 u32 srcloc; // linenumber for now 136 }; 137 138 enum { 139 TYPE_VOID, 140 TYPE_BOOL, 141 TYPE_BYTE, 142 TYPE_I32, 143 TYPE_NIL, 144 TYPE_POINTER, 145 TYPE_ARRAY, 146 TYPE_SLICE, 147 TYPE_RECORD, 148 TYPE_FUNC, 149 TYPE_ENUM, 150 TYPE_UNDEFINED, 151 }; 152 153 str type_kind[TYPE_UNDEFINED + 1] = { 154 "void", "bool", "byte", "i32", "nil", "*", "[]", "[]", 155 "struct", "func", "enum", "undef" 156 }; 157 158 struct TypeRec { 159 type_t kind; 160 Type base; // pointer-to, func-return-type, array-of 161 Symbol sym; // if not anonymous 162 Symbol first; // list of params or fields 163 u32 len; // array elem count, param count 164 u32 size; // in bytes, local stack for funcs 165 }; 166 167 enum { 168 SYM_CONST, 169 SYM_TYPE, 170 SYM_FUNC, 171 SYM_GLOBAL, 172 SYM_LOCAL, 173 SYM_PARAM, 174 SYM_FIELD, 175 }; 176 177 str sym_kind[SYM_FIELD + 1] = { 178 "CONST", "TYPE", "FUNC", "GLOBAL", "LOCAL", "PARAM", "FIELD", 179 }; 180 181 enum { 182 SYM_IS_READ_ONLY = 0x01, 183 SYM_IS_PUBLIC = 0x02, 184 SYM_IS_DEFINED = 0x04, 185 SYM_IS_BUILTIN = 0x08, 186 SYM_IS_PLACED = 0x10, // exists at addr in sym->value 187 }; 188 189 struct SymbolRec { 190 symbol_t kind; 191 Symbol next; 192 u32 flags; 193 String name; 194 Type type; 195 Symbol first; // list of fields or params 196 u32 value; // SYM_CONST 197 Fixup fixups; // references from before placement 198 }; 199 200 enum { 201 SCOPE_GLOBAL, SCOPE_FUNC, SCOPE_BLOCK, SCOPE_LOOP, 202 }; 203 204 struct ScopeRec { 205 scope_t kind; 206 Scope next; 207 Symbol first; 208 u32 level; 209 u32 save_stack; 210 // Fixup? 211 }; 212 213 214 struct FixupRec { 215 Fixup next; 216 u32 addr; 217 }; 218 219 // ------------------------------------------------------------------ 220 // compiler global context 221 222 struct CtxRec { 223 const char* filename; // filename of active source 224 int fd; 225 226 u8 iobuffer[1024]; // scanner file io buffer 227 u32 ionext; 228 u32 iolast; 229 230 u32 linenumber; // line number of most recent line 231 u32 lineoffset; // position of start of most recent line 232 u32 byteoffset; // position of the most recent character 233 u32 flags; 234 u32 cc; // scanner: next character 235 236 token_t tok; // most recent token 237 u32 num; // used for tNUM 238 char tmp[256]; // used for tIDN, tSTR; 239 String ident; // used for tIDN 240 241 String strtab; // TODO: hashtable 242 Symbol typetab; // TODO: hashtable 243 Scope scope; // scope stack 244 245 Symbol fn; // active function being parsed 246 u32 spill_stack; // where to spill temp regs (TODO: dynamic) 247 u32 local_stack; // total stack for all locals (params, vars, tmps) 248 u32 alloc_stack; // where to allocate next var (grows/shrinks as 249 // we enter/exit scopes) 250 ScopeRec global; 251 252 String idn_if; 253 String idn_for; 254 String idn_var; 255 String idn_nil; 256 String idn_case; 257 String idn_func; 258 String idn_else; 259 String idn_enum; 260 String idn_true; 261 String idn_type; 262 String idn_break; 263 String idn_while; 264 String idn_false; 265 String idn_switch; 266 String idn_struct; 267 String idn_return; 268 String idn_continue; 269 270 Type type_void; 271 Type type_bool; 272 Type type_byte; 273 Type type_i32; 274 Type type_nil; 275 Type type_string; 276 277 u32 pc; // next ins to emit 278 u32 gp; // next global data to emit 279 280 u32 code[8192]; 281 u32 data[8192]; 282 u32 xref[8192]; 283 }; 284 285 CtxRec ctx; 286 287 // ------------------------------------------------------------------ 288 289 String string_make(const char* text, u32 len) { 290 // OPT obviously this wants to be a hash table 291 String str = ctx.strtab; 292 while (str != nil) { 293 if ((str->len == len) && (memcmp(text, str->text, len) == 0)) { 294 return str; 295 } 296 str = str->next; 297 } 298 299 str = malloc(sizeof(StringRec) + len + 1); 300 str->len = len; 301 memcpy(str->text, text, len); 302 str->text[len] = 0; 303 str->next = ctx.strtab; 304 ctx.strtab = str; 305 306 return str; 307 } 308 309 // ================================================================ 310 311 Ast ast_make(ast_t kind, u32 ival, String name, Symbol sym, Type type) { 312 Ast a = malloc(sizeof(AstRec)); 313 a->kind = kind; 314 a->c0 = nil; 315 a->c1 = nil; 316 a->c2 = nil; 317 a->ival = ival; 318 a->name = name; 319 a->sym = sym; 320 a->type = type; 321 a->srcloc = ctx.linenumber; 322 return a; 323 } 324 325 Ast ast_make_binop(u32 kind, Ast left, Ast right) { 326 Ast node = ast_make(kind, 0, nil, nil, nil); 327 node->c0 = left; 328 node->c1 = right; 329 return node; 330 } 331 332 Ast ast_make_unop(u32 kind, Ast child) { 333 Ast node = ast_make(kind, 0, nil, nil, nil); 334 node->c0 = child; 335 return node; 336 } 337 338 Ast ast_make_simple(ast_t kind, u32 x) { 339 return ast_make(kind, x, nil, nil, nil); 340 } 341 342 Ast ast_make_const(u32 value, Type type) { 343 return ast_make(AST_CONST, value, nil, nil, type); 344 } 345 346 Ast ast_make_symbol(String name, Symbol sym) { 347 return ast_make(AST_SYMBOL, 0, name, sym, sym->type); 348 } 349 350 // ================================================================ 351 352 Symbol symbol_make(symbol_t kind, u32 flags, String name, Type type, u32 value) { 353 Symbol s = malloc(sizeof(SymbolRec)); 354 s->kind = kind; 355 s->flags = flags; 356 s->name = name; 357 s->type = type; 358 s->value = value; 359 s->first = nil; 360 s->fixups = nil; 361 return s; 362 } 363 364 void symbol_add_global(Symbol sym) { 365 sym->next = ctx.global.first; 366 ctx.global.first = sym; 367 } 368 369 // search the scope stack for a symbol 370 Symbol symbol_find(String str) { 371 Scope scope = ctx.scope; 372 while (scope != nil) { 373 Symbol sym = scope->first; 374 while (sym != nil) { 375 if (sym->name == str) { 376 return sym; 377 } 378 sym = sym->next; 379 } 380 scope = scope->next; 381 } 382 return nil; 383 } 384 385 // ================================================================ 386 387 Type type_make(type_t kind, Type base, u32 len, u32 size) { 388 Type t = malloc(sizeof(TypeRec)); 389 t->kind = kind; 390 t->base = base; 391 t->sym = nil; 392 t->first = nil; 393 t->len = len; 394 t->size = size; 395 return t; 396 } 397 398 Type type_make_ptr(Type type) { 399 return type_make(TYPE_POINTER, type, 4, 0); 400 } 401 402 void type_add(Type type, String name) { 403 //fprintf(stderr,"type_add(%s, %s)\n",type_id_tab[type->kind],name->text); 404 Symbol sym = symbol_make(SYM_TYPE, 0, name, type, 0); 405 if (type->sym == nil) { 406 // only set the the type's object if it is 407 // not already set (otherwise aliases will 408 // clobber the canonical type names -- yuck!) 409 type->sym = sym; 410 } 411 sym->next = ctx.typetab; 412 ctx.typetab = sym; 413 } 414 415 Type type_find(String name) { 416 Symbol sym = ctx.typetab; 417 while (sym != nil) { 418 if (sym->name == name) { 419 return sym->type; 420 } 421 sym = sym->next; 422 } 423 return nil; 424 } 425 426 Type type_setup(const char* text, u32 tlen, u32 kind, u32 size) { 427 String name = string_make(text, tlen); 428 Type type = type_make(kind, nil, 0, size); 429 type_add(type, name); 430 return type; 431 } 432 433 bool type_is_same(Type a, Type b) { 434 if (a == b) { 435 return true; 436 } 437 if (a->kind != b->kind) { 438 return false; 439 } 440 if (a->base != b->base) { 441 return false; 442 } 443 if (a->len != b->len) { 444 return false; 445 } 446 Symbol a1 = a->first; 447 Symbol b1 = b->first; 448 while ((a1 != nil) && (b1 != nil)) { 449 // check that parameters and fields match 450 if (!type_is_same(a1->type, b1->type)) { 451 return false; 452 } 453 } 454 if ((a1 != nil) || (b1 != nil)) { 455 // mismatched number of parameters or fields 456 return false; 457 } 458 return true; 459 } 460 461 bool type_is_i32_shape(Type type) { 462 if ((type->kind == TYPE_I32) || 463 (type->kind == TYPE_BYTE) || 464 (type->kind == TYPE_BOOL)) { 465 return true; 466 } else { 467 return false; 468 } 469 } 470 471 bool type_is_compatible(Type dst, Type src) { 472 if (type_is_i32_shape(dst) && type_is_i32_shape(src)) { 473 return true; 474 } 475 return type_is_same(dst, src); 476 // check if deref of src is same as dst? 477 } 478 479 void type_dump(FILE* fp, Type type, bool use_short_name) { 480 if (use_short_name && (type->sym != nil)) { 481 fprintf(fp, "%s", type->sym->name->text); 482 } else if (type->kind == TYPE_ARRAY) { 483 fprintf(fp, "[%u]", type->len); 484 type_dump(fp, type->base, true); 485 } else if (type->kind == TYPE_RECORD) { 486 fprintf(fp, "struct {\n"); 487 Symbol field = type->first; 488 while (field != nil) { 489 fprintf(fp, " %s ", field->name->text); 490 type_dump(fp, field->type, true); 491 fprintf(fp, ", // off=%u, sz=%u\n", field->value, field->type->size); 492 field = field->next; 493 } 494 fprintf(fp, "}"); 495 } else { 496 fprintf(fp, "%s", type_kind[type->kind]); 497 if ((type->kind == TYPE_POINTER) || (type->kind == TYPE_SLICE)) { 498 type_dump(fp, type->base, true); 499 } 500 } 501 } 502 503 // ================================================================ 504 505 // push a new scope, adding list of symbols if provided 506 Scope scope_push(scope_t kind, Symbol list) { 507 Scope scope = malloc(sizeof(ScopeRec)); 508 scope->kind = kind; 509 scope->next = ctx.scope; 510 scope->first = list; 511 scope->level = ctx.scope->level + 1; 512 // save current stack offset (next local var) 513 scope->save_stack = ctx.alloc_stack; 514 ctx.scope = scope; 515 return scope; 516 } 517 518 void scope_add_symbol(Scope scope, Symbol sym) { 519 sym->next = scope->first; 520 scope->first = sym; 521 } 522 523 // pop a scope, return list of symbols 524 Symbol scope_pop() { 525 if (ctx.scope->level == 0) { 526 error("cannot pop the global scope"); 527 } 528 529 // restore prev stack offset (next local var) 530 ctx.alloc_stack = ctx.scope->save_stack; 531 532 Symbol list = ctx.scope->first; 533 // XXX dealloc 534 ctx.scope = ctx.scope->next; 535 return list; 536 } 537 538 // find the first surrounding scope of a specified kind 539 Scope scope_find(scope_t scope_kind) { 540 Scope scope = ctx.scope; 541 while (scope != nil) { 542 if (scope->kind == scope_kind) { 543 return scope; 544 } 545 scope = scope->next; 546 } 547 return nil; 548 } 549 550 enum { 551 BI_EXIT, 552 BI_PRINT_HEX_32, 553 BI_PUT_C, 554 }; 555 556 void builtin_make(const char* name, u32 id, Type p0, Type p1, Type rtn) { 557 String fname = string_make(name, strlen(name)); 558 Type type = type_make(TYPE_FUNC, rtn, 0, 0); 559 type->sym = symbol_make(SYM_FUNC, SYM_IS_DEFINED | SYM_IS_BUILTIN, fname, type, id); 560 561 if (p0 != nil) { 562 Symbol param = symbol_make(SYM_PARAM, 0, string_make("a", 1), p0, 0); 563 type->sym->first = param; 564 type->first = param; 565 type->len = 1; 566 if (p1 != nil) { 567 param->next = symbol_make(SYM_PARAM, 0, string_make("b", 1), p1, 1); 568 type->len = 2; 569 } 570 } 571 symbol_add_global(type->sym); 572 } 573 574 void fixup_add_list(Fixup list, u32 addr) { 575 Fixup fixup = malloc(sizeof(FixupRec)); 576 fixup->next = list->next; 577 fixup->addr = addr; 578 list->next = fixup; 579 } 580 581 void fixup_add_sym(Symbol sym, u32 addr) { 582 Fixup fixup = malloc(sizeof(FixupRec)); 583 fixup->next = sym->fixups; 584 fixup->addr = addr; 585 sym->fixups = fixup; 586 } 587 588 // ================================================================ 589 590 enum { 591 cfVisibleEOL = 1, 592 cfAbortOnError = 2, 593 cfTraceCodeGen = 3, 594 }; 595 596 void ctx_init() { 597 memset(&ctx, 0, sizeof(ctx)); 598 599 // pre-intern keywords 600 ctx.idn_if = string_make("if", 2); 601 ctx.idn_for = string_make("for", 3); 602 ctx.idn_var = string_make("var", 3); 603 ctx.idn_nil = string_make("nil", 3); 604 ctx.idn_case = string_make("case", 4); 605 ctx.idn_func = string_make("func", 4); 606 ctx.idn_else = string_make("else", 4); 607 ctx.idn_enum = string_make("enum", 4); 608 ctx.idn_true = string_make("true", 4); 609 ctx.idn_type = string_make("type", 4); 610 ctx.idn_break = string_make("break", 5); 611 ctx.idn_while = string_make("while", 5); 612 ctx.idn_false = string_make("false", 5); 613 ctx.idn_switch = string_make("switch", 6); 614 ctx.idn_struct = string_make("struct", 6); 615 ctx.idn_return = string_make("return", 6); 616 ctx.idn_continue = string_make("continue", 8); 617 618 // install built-in basic types 619 ctx.type_void = type_setup("void", 4, TYPE_VOID, 0); 620 ctx.type_byte = type_setup("byte", 4, TYPE_BYTE, 1); 621 ctx.type_bool = type_setup("bool", 4, TYPE_BOOL, 1); 622 ctx.type_i32 = type_setup("i32", 3, TYPE_I32, 4); 623 ctx.type_nil = type_setup("nil", 3, TYPE_NIL, 4); 624 ctx.type_string = type_setup("str", 3, TYPE_SLICE, 8); 625 ctx.type_string->base = ctx.type_byte; 626 627 // magic builtin functions 628 builtin_make("_hexout_", BI_PRINT_HEX_32, ctx.type_i32, nil, ctx.type_void); 629 builtin_make("_putc_", BI_PUT_C, ctx.type_i32, nil, ctx.type_void); 630 631 ctx.scope = &(ctx.global); 632 } 633 634 void dump_file_line(const char* fn, u32 offset); 635 void dump_error_ctxt(); 636 637 #if C 638 void error(const char *fmt, ...) 639 #else 640 void error(const char *fmt) 641 #endif 642 { 643 va_list ap; 644 645 fprintf(stderr,"\n%s:%d: ", ctx.filename, ctx.linenumber); 646 va_start(ap, fmt); 647 vfprintf(stderr, fmt, ap); 648 va_end(ap); 649 650 if (ctx.linenumber > 0) { 651 // dump_file_line(ctx.filename, ctx.lineoffset); 652 } 653 fprintf(stderr, "\n"); 654 655 dump_error_ctxt(); 656 657 if (ctx.flags & cfAbortOnError) { 658 abort(); 659 } else { 660 exit(1); 661 } 662 } 663 664 void ctx_open_source(const char* filename) { 665 ctx.filename = filename; 666 ctx.linenumber = 0; 667 668 if (ctx.fd >= 0) { 669 close(ctx.fd); 670 } 671 ctx.fd = open(filename, O_RDONLY); 672 if (ctx.fd < 0) { 673 error("cannot open file '%s'", filename); 674 } 675 ctx.ionext = 0; 676 ctx.iolast = 0; 677 ctx.linenumber = 1; 678 ctx.lineoffset = 0; 679 ctx.byteoffset = 0; 680 } 681 682 // ================================================================ 683 // lexical scanner 684 685 // token classes (tok & tcMASK) 686 enum { 687 tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, 688 tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, 689 }; 690 691 enum { 692 // EndMarks, Braces, Brackets Parens 693 tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, 694 // RelOps (do not reorder) 695 tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, 696 // AddOps (do not reorder) 697 tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, 698 // MulOps (do not reorder) 699 tSTAR, tSLASH, tPERCENT, tAMP, tLEFT, tRIGHT, tx1E, tx1F, 700 // AsnOps (do not reorder) 701 tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, 702 tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tLSEQ, tRSEQ, t2E, t2F, 703 // Various, UnaryNot, LogicalOps, 704 tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, 705 tASSIGN, tINC, tDEC, 706 // Keywords 707 tTYPE, tFUNC, tSTRUCT, tVAR, tENUM, 708 tIF, tELSE, tWHILE, 709 tBREAK, tCONTINUE, tRETURN, 710 tFOR, tSWITCH, tCASE, 711 tTRUE, tFALSE, tNIL, 712 tIDN, tNUM, tSTR, 713 // used internal to the lexer but never returned 714 tSPC, tINV, tDQT, tSQT, tMSC, 715 }; 716 717 str tnames[] = { 718 "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", 719 "==", "!=", "<", "<=", ">", ">=", "", "", 720 "+", "-", "|", "^", "", "", "", "", 721 "*", "/", "%", "&", "&~", "<<", ">>", "", 722 "+=", "-=", "|=", "^=", "", "", "", "", 723 "*=", "/=", "%=", "&=", "&~=", "<<=", ">>=", "", 724 ";", ":", ".", ",", "~", "&&", "||", "!", 725 "=", "++", "--", 726 "type", "func", "struct", "var", "enum", 727 "if", "else", "while", 728 "break", "continue", "return", 729 "for", "switch", "case", 730 "true", "false", "nil", 731 "<ID>", "<NUM>", "<STR>", 732 "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", 733 }; 734 735 u8 lextab[256] = { 736 tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 737 tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, 738 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 739 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 740 tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, 741 tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, 742 tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, 743 tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, 744 tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 745 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 746 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 747 tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, 748 tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 749 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 750 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 751 tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, 752 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 753 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 754 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 755 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 756 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 757 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 758 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 759 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 760 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 761 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 762 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 763 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 764 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 765 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 766 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 767 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 768 }; 769 770 i32 unhex(u32 ch) { 771 if ((ch >= '0') && (ch <= '9')) { 772 return ch - '0'; 773 } 774 if ((ch >= 'a') && (ch <= 'f')) { 775 return ch - 'a' + 10; 776 } 777 if ((ch >= 'A') && (ch <= 'F')) { 778 return ch - 'A' + 10; 779 } 780 return -1; 781 } 782 783 u32 scan() { 784 while (ctx.ionext == ctx.iolast) { 785 if (ctx.fd < 0) { 786 ctx.cc = 0; 787 return ctx.cc; 788 } 789 int r = read(ctx.fd, ctx.iobuffer, sizeof(ctx.iobuffer)); 790 if (r <= 0) { 791 ctx.fd = -1; 792 } else { 793 ctx.iolast = r; 794 ctx.ionext = 0; 795 } 796 } 797 ctx.cc = ctx.iobuffer[ctx.ionext]; 798 ctx.ionext++; 799 ctx.byteoffset++; 800 return ctx.cc; 801 } 802 803 u32 unescape(u32 n) { 804 if (n == 'n') { 805 return 10; 806 } else if (n == 'r') { 807 return 13; 808 } else if (n == 't') { 809 return 9; 810 } else if (n == '"') { 811 return '"'; 812 } else if (n == '\'') { 813 return '\''; 814 } else if (n == '\\') { 815 return '\\'; 816 } else if (n == 'x') { 817 int x0 = unhex(scan()); 818 int x1 = unhex(scan()); 819 if ((x0 < 0) || (x1 < 0)) { 820 error("invalid hex escape"); 821 } 822 return (x0 << 4) | x1; 823 } else { 824 error("invalid escape 0x%02x", n); 825 return 0; 826 } 827 } 828 829 token_t scan_string(u32 cc, u32 nc) { 830 u32 n = 0; 831 while (true) { 832 if (nc == '"') { 833 break; 834 } else if (nc == 0) { 835 error("unterminated string"); 836 } else if (nc == '\\') { 837 ctx.tmp[n] = unescape(scan()); 838 } else { 839 ctx.tmp[n] = nc; 840 } 841 nc = scan(); 842 n++; 843 if (n == 255) { 844 error("constant string too large"); 845 } 846 } 847 ctx.tmp[n] = 0; 848 return tSTR; 849 } 850 851 token_t scan_keyword(u32 len) { 852 ctx.tmp[len] = 0; 853 String idn = string_make(ctx.tmp, len); 854 ctx.ident = idn; 855 856 if (len == 2) { 857 if (idn == ctx.idn_if) { return tIF; }; 858 } else if (len == 3) { 859 if (idn == ctx.idn_for) { return tFOR; } 860 if (idn == ctx.idn_var) { return tVAR; } 861 if (idn == ctx.idn_nil) { return tNIL; } 862 } else if (len == 4) { 863 if (idn == ctx.idn_case) { return tCASE; } 864 if (idn == ctx.idn_func) { return tFUNC; } 865 if (idn == ctx.idn_else) { return tELSE; } 866 if (idn == ctx.idn_enum) { return tENUM; } 867 if (idn == ctx.idn_true) { return tTRUE; } 868 if (idn == ctx.idn_type) { return tTYPE; } 869 } else if (len == 5) { 870 if (idn == ctx.idn_break) { return tBREAK; } 871 if (idn == ctx.idn_while) { return tWHILE; } 872 if (idn == ctx.idn_false) { return tFALSE; } 873 } else if (len == 6) { 874 if (idn == ctx.idn_switch) { return tSWITCH; } 875 if (idn == ctx.idn_struct) { return tSTRUCT; } 876 if (idn == ctx.idn_return) { return tRETURN; } 877 } else if (len == 8) { 878 if (idn == ctx.idn_continue) { return tCONTINUE; } 879 } 880 return tIDN; 881 } 882 883 token_t scan_number(u32 cc, u32 nc) { 884 u32 n = 1; 885 u32 val = cc - '0'; 886 887 if ((cc == '0') && (nc == 'b')) { // binary 888 nc = scan(); 889 while ((nc == '0') || (nc == '1')) { 890 val = (val << 1) | (nc - '0'); 891 nc = scan(); 892 n++; 893 if (n == 34) { 894 error("binary constant too large"); 895 } 896 } 897 } else if ((cc == '0') && (nc == 'x')) { // hex 898 nc = scan(); 899 while (true) { 900 int tmp = unhex(nc); 901 if (tmp == -1) { 902 break; 903 } 904 val = (val << 4) | tmp; 905 nc = scan(); 906 n++; 907 if (n == 10) { 908 error("hex constant too large"); 909 } 910 } 911 } else { // decimal 912 while (lextab[nc] == tNUM) { 913 u32 tmp = (val * 10) + (nc - '0'); 914 if (tmp <= val) { 915 error("decimal constant too large"); 916 } 917 val = tmp; 918 nc = scan(); 919 n++; 920 } 921 } 922 ctx.num = val; 923 return tNUM; 924 } 925 926 token_t scan_ident(u32 cc, u32 nc) { 927 ctx.tmp[0] = cc; 928 u32 n = 1; 929 930 while (true) { 931 u32 tok = lextab[nc]; 932 if ((tok == tIDN) || (tok == tNUM)) { 933 ctx.tmp[n] = nc; 934 n++; 935 if (n == 32) { error("identifier too large"); } 936 nc = scan(); 937 } else { 938 break; 939 } 940 } 941 return scan_keyword(n); 942 } 943 944 token_t _next() { 945 u8 nc = ctx.cc; 946 while (true) { 947 u8 cc = nc; 948 nc = scan(); 949 u32 tok = lextab[cc]; 950 if (tok == tNUM) { // 0..9 951 return scan_number(cc, nc); 952 } else if (tok == tIDN) { // _ A..Z a..z 953 return scan_ident(cc, nc); 954 } else if (tok == tDQT) { // " 955 return scan_string(cc, nc); 956 } else if (tok == tSQT) { // ' 957 ctx.num = nc; 958 if (nc == '\\') { 959 ctx.num = unescape(scan()); 960 } 961 nc = scan(); 962 if (nc != '\'') { 963 error("unterminated character constant"); 964 } 965 nc = scan(); 966 return tNUM; 967 } else if (tok == tPLUS) { 968 if (nc == '+') { tok = tINC; nc = scan(); } 969 } else if (tok == tMINUS) { 970 if (nc == '-') { tok = tDEC; nc = scan(); } 971 } else if (tok == tAMP) { 972 if (nc == '&') { tok = tAND; nc = scan(); } 973 } else if (tok == tPIPE) { 974 if (nc == '|') { tok = tOR; nc = scan(); } 975 } else if (tok == tGT) { 976 if (nc == '=') { tok = tGE; nc = scan(); } 977 else if (nc == '>') { tok = tRIGHT; nc = scan(); } 978 } else if (tok == tLT) { 979 if (nc == '=') { tok = tLE; nc = scan(); } 980 else if (nc == '<') { tok = tLEFT; nc = scan(); } 981 } else if (tok == tASSIGN) { 982 if (nc == '=') { tok = tEQ; nc = scan(); } 983 } else if (tok == tBANG) { 984 if (nc == '=') { tok = tNE; nc = scan(); } 985 } else if (tok == tSLASH) { 986 if (nc == '/') { 987 // comment -- consume until EOL or EOF 988 while ((nc != '\n') && (nc != 0)) { 989 nc = scan(); 990 } 991 continue; 992 } 993 } else if (tok == tEOL) { 994 ctx.linenumber++; 995 ctx.lineoffset = ctx.byteoffset; 996 //ctx.xref[ctx.pc / 4] = ctx.linenumber; 997 if (ctx.flags & cfVisibleEOL) { 998 return tEOL; 999 } 1000 continue; 1001 } else if (tok == tSPC) { 1002 continue; 1003 } else if ((tok == tMSC) || (tok == tINV)) { 1004 error("unknown character 0x%02x", cc); 1005 } 1006 1007 // if we're an AddOp or MulOp, followed by an '=' 1008 if (((tok & 0xF0) == 0x20) && (nc == '=')) { 1009 nc = scan(); 1010 // transform us to a XEQ operation 1011 tok = tok + 0x10; 1012 } 1013 1014 return tok; 1015 } 1016 } 1017 1018 token_t next() { 1019 return (ctx.tok = _next()); 1020 } 1021 1022 void token_printstr(void) { 1023 u32 n = 0; 1024 printf("\""); 1025 while (n < 256) { 1026 u32 ch = ctx.tmp[n]; 1027 if (ch == 0) { 1028 break; 1029 } else if ((ch < ' ') || (ch > '~')) { 1030 printf("\\x%02x", ch); 1031 } else if ((ch == '"') || (ch == '\\')) { 1032 printf("\\%c", ch); 1033 } else { 1034 printf("%c", ch); 1035 } 1036 n++; 1037 } 1038 printf("\""); 1039 } 1040 1041 void token_print(void) { 1042 if (ctx.tok == tNUM) { 1043 printf("#%u ", ctx.num); 1044 } else if (ctx.tok == tIDN) { 1045 printf("@%s ", ctx.tmp); 1046 } else if (ctx.tok == tEOL) { 1047 printf("\n"); 1048 } else if (ctx.tok == tSTR) { 1049 token_printstr(); 1050 } else { 1051 printf("%s ", tnames[ctx.tok]); 1052 } 1053 } 1054 1055 void expected(const char* what) { 1056 error("expected %s, found %s", what, tnames[ctx.tok]); 1057 } 1058 1059 void expect(token_t tok) { 1060 if (ctx.tok != tok) { 1061 error("expected %s, found %s", tnames[tok], tnames[ctx.tok]); 1062 } 1063 } 1064 1065 void require(token_t tok) { 1066 expect(tok); 1067 next(); 1068 } 1069 1070 // ================================================================ 1071 1072 //TODO: handle overflow and div/mod-by-zero 1073 i32 ast_get_const_i32(Ast node) { 1074 if (node->kind == AST_CONST) { 1075 return node->ival; 1076 } else if (node->kind == AST_SYMBOL) { 1077 if (node->sym->kind != SYM_CONST) { 1078 error("non-const symbol (%s) in constexpr\n", node->sym->name); 1079 } 1080 return node->sym->value; 1081 } else if (ast_kind_is_binop(node->kind)) { 1082 i32 left = ast_get_const_i32(node->c0); 1083 i32 right = ast_get_const_i32(node->c1); 1084 u32 op = node->kind; 1085 if (op == AST_ADD) { 1086 return left + right; 1087 } else if (op == AST_SUB) { 1088 return left - right; 1089 } else if (op == AST_MUL) { 1090 return left * right; 1091 } else if (op == AST_DIV) { 1092 return left / right; 1093 } else if (op == AST_MOD) { 1094 return left % right; 1095 } else if (op == AST_AND) { 1096 return left & right; 1097 } else if (op == AST_OR) { 1098 return left | right; 1099 } else if (op == AST_XOR) { 1100 return left | right; 1101 } else if (op == AST_BOOL_AND) { 1102 return left && right; 1103 } else if (op == AST_BOOL_OR) { 1104 return left || right; 1105 } else { 1106 error("unsupported const op %s\n", ast_kind[op]); 1107 } 1108 } else if (node->kind == AST_NEG) { 1109 return -ast_get_const_i32(node->c0); 1110 } else if (node->kind == AST_NOT) { 1111 return ~ast_get_const_i32(node->c0); 1112 } else if (node->kind == AST_BOOL_NOT) { 1113 return !ast_get_const_i32(node->c0); 1114 } else { 1115 error("non-const expr (%s)\n", ast_kind[node->kind]); 1116 } 1117 return 0; 1118 } 1119 1120 void ast_type_compat(Ast node, Type type) { 1121 if (node->c0) { 1122 if (!type_is_compatible(type, node->c0->type)) { 1123 error("incompatible types"); 1124 } 1125 } 1126 if (node->c1) { 1127 if (!type_is_compatible(type, node->c1->type)) { 1128 error("incompatible types"); 1129 } 1130 } 1131 node->type = type; 1132 } 1133 1134 Ast ast_make_deref(Ast child) { 1135 Ast node = ast_make_simple(AST_DEREF, 0); 1136 node->c0 = child; 1137 node->type = child->type->base; 1138 return node; 1139 } 1140 1141 Ast ast_require_array_type(Ast node) { 1142 if (node->type->kind == TYPE_ARRAY) { 1143 return node; 1144 } 1145 if ((node->type->kind == TYPE_POINTER) && 1146 (node->type->base->kind == TYPE_ARRAY)) { 1147 return ast_make_deref(node); 1148 } 1149 error("expected an array"); 1150 return nil; 1151 } 1152 1153 Ast ast_require_struct_type(Ast node) { 1154 if (node->type->kind == TYPE_RECORD) { 1155 return node; 1156 } 1157 if ((node->type->kind == TYPE_POINTER) && 1158 (node->type->base->kind == TYPE_RECORD)) { 1159 return ast_make_deref(node); 1160 } 1161 error("expected a struct"); 1162 return nil; 1163 } 1164 1165 String parse_name(const char* what) { 1166 if (ctx.tok != tIDN) { 1167 error("expected %s, found %s %u", what, tnames[ctx.tok], ctx.tok); 1168 } 1169 String str = ctx.ident; 1170 next(); 1171 return str; 1172 } 1173 1174 Ast parse_expr(); 1175 1176 Ast parse_operand() { 1177 Ast node = nil; 1178 if (ctx.tok == tNUM) { 1179 node = ast_make_const(ctx.num, ctx.type_i32); 1180 } else if (ctx.tok == tSTR) { 1181 error("<TODO> string const"); 1182 } else if (ctx.tok == tTRUE) { 1183 node = ast_make_const(1, ctx.type_bool); 1184 } else if (ctx.tok == tFALSE) { 1185 node = ast_make_const(0, ctx.type_bool); 1186 } else if (ctx.tok == tNIL) { 1187 node = ast_make_const(0, ctx.type_nil); 1188 } else if (ctx.tok == tOPAREN) { 1189 next(); 1190 node = parse_expr(); 1191 require(tCPAREN); 1192 return node; 1193 } else if (ctx.tok == tIDN) { 1194 // TODO: could happen in an AST pass to allow 1195 // for forward references 1196 Symbol sym = symbol_find(ctx.ident); 1197 if (sym == nil) { 1198 error("undefined identifier '%s'", ctx.ident->text); 1199 } 1200 node = ast_make_symbol(ctx.ident, sym); 1201 } else { 1202 error("invalid expression"); 1203 } 1204 next(); 1205 return node; 1206 } 1207 1208 Symbol type_find_field(Type type, String name) { 1209 Symbol field = type->first; 1210 while (field != nil) { 1211 if (field->name == name) { 1212 return field; 1213 } 1214 field = field->next; 1215 } 1216 error("struct has no such field '%s'", name->text); 1217 return nil; 1218 } 1219 1220 Ast parse_primary_expr() { 1221 Ast node = parse_operand(); 1222 while (true) { 1223 if (ctx.tok == tOPAREN) { 1224 next(); 1225 //VALIDATE as func or ptr-to-func 1226 if (node->kind != AST_SYMBOL) { 1227 error("cannot call '%s'", ast_kind[node->kind]); 1228 } 1229 if (node->sym->kind != SYM_FUNC) { 1230 error("cannot call non-function '%s'", node->sym->name->text); 1231 } 1232 u32 n = 0; 1233 1234 Ast call = ast_make_simple(AST_CALL, 0); 1235 call->c0 = node; 1236 call->type = node->sym->type->base; 1237 Ast last = call; 1238 1239 while (ctx.tok != tCPAREN) { 1240 // VALIDATE compatibility 1241 Ast param = parse_expr(); 1242 last->c2 = param; 1243 last = param; 1244 if (ctx.tok != tCPAREN) { 1245 require(tCOMMA); 1246 } 1247 n++; 1248 } 1249 require(tCPAREN); 1250 call->ival = n; 1251 node = call; 1252 } else if (ctx.tok == tDOT) { 1253 next(); 1254 node = ast_require_struct_type(node); 1255 String name = parse_name("field name"); 1256 Ast dot = ast_make_simple(AST_FIELD, 0); 1257 dot->c0 = node; 1258 dot->c1 = ast_make_simple(AST_SYMBOL, 0); 1259 dot->c1->name = name; 1260 dot->c1->sym = type_find_field(node->type, name); 1261 dot->c1->type = dot->c1->sym->type; 1262 dot->type = dot->c1->type; 1263 node = dot; 1264 } else if (ctx.tok == tOBRACK) { 1265 next(); 1266 node = ast_require_array_type(node); 1267 Ast index = ast_make_simple(AST_INDEX, 0); 1268 index->c0 = node; 1269 index->c1 = parse_expr(); 1270 index->type = node->type->base; 1271 node = index; 1272 require(tCBRACK); 1273 //VALIDATE lhs is array, rhs is appropriate numeric 1274 } else { 1275 break; 1276 } 1277 } 1278 return node; 1279 } 1280 1281 Ast parse_unary_expr() { 1282 u32 op = ctx.tok; 1283 if (op == tPLUS) { 1284 next(); 1285 return parse_unary_expr(); 1286 } else if (op == tMINUS) { 1287 next(); 1288 Ast node = ast_make_unop(AST_NEG, parse_unary_expr()); 1289 ast_type_compat(node, ctx.type_i32); 1290 return node; 1291 } else if (op == tBANG) { 1292 next(); 1293 Ast node = ast_make_unop(AST_BOOL_NOT, parse_unary_expr()); 1294 ast_type_compat(node, ctx.type_bool); 1295 return node; 1296 } else if (op == tNOT) { 1297 next(); 1298 Ast node = ast_make_unop(AST_NOT, parse_unary_expr()); 1299 ast_type_compat(node, ctx.type_i32); 1300 return node; 1301 } else if (op == tAMP) { 1302 next(); 1303 Ast node = ast_make_unop(AST_ADDROF, parse_unary_expr()); 1304 node->type = type_make_ptr(node->c0->type); 1305 return node; 1306 } else { 1307 return parse_primary_expr(); 1308 } 1309 } 1310 1311 Ast parse_mul_expr() { 1312 Ast node = parse_unary_expr(); 1313 while ((ctx.tok & tcMASK) == tcMULOP) { 1314 u32 op = (ctx.tok - tSTAR) + AST_MUL; 1315 next(); 1316 node = ast_make_binop(op, node, parse_unary_expr()); 1317 ast_type_compat(node, ctx.type_i32); 1318 } 1319 return node; 1320 } 1321 1322 Ast parse_add_expr() { 1323 Ast node = parse_mul_expr(); 1324 while ((ctx.tok & tcMASK) == tcADDOP) { 1325 u32 op = (ctx.tok - tPLUS) + AST_ADD; 1326 next(); 1327 node = ast_make_binop(op, node, parse_mul_expr()); 1328 ast_type_compat(node, ctx.type_i32); 1329 } 1330 return node; 1331 } 1332 1333 Ast parse_rel_expr() { 1334 Ast node = parse_add_expr(); 1335 if ((ctx.tok & tcMASK) == tcRELOP) { 1336 u32 op = (ctx.tok - tEQ) + AST_EQ; 1337 next(); 1338 node = ast_make_binop(op, node, parse_add_expr()); 1339 ast_type_compat(node, ctx.type_bool); 1340 } 1341 return node; 1342 } 1343 1344 Ast parse_and_expr() { 1345 Ast node = parse_rel_expr(); 1346 if (ctx.tok == tAND) { 1347 while (ctx.tok == tAND) { 1348 next(); 1349 node = ast_make_binop(AST_BOOL_AND, node, parse_rel_expr()); 1350 ast_type_compat(node, ctx.type_bool); 1351 } 1352 } 1353 return node; 1354 } 1355 1356 Ast parse_expr() { 1357 Ast node = parse_and_expr(); 1358 if (ctx.tok == tOR) { 1359 while (ctx.tok == tOR) { 1360 next(); 1361 node = ast_make_binop(AST_BOOL_OR, node, parse_and_expr()); 1362 ast_type_compat(node, ctx.type_bool); 1363 } 1364 } 1365 return node; 1366 } 1367 1368 // fwd_ref_ok indicates that an undefined typename 1369 // may be treated as a forward reference. This is 1370 // only used for pointers (because their size does 1371 // not depend on their target). 1372 Type parse_type(bool fwd_ref_ok); 1373 1374 Type parse_struct_type() { 1375 Type rectype = type_make(TYPE_RECORD, nil, 0, 0); 1376 Symbol last = nil; 1377 require(tOBRACE); 1378 while (true) { 1379 if (ctx.tok == tCBRACE) { 1380 next(); 1381 break; 1382 } 1383 String name = parse_name("field name"); 1384 Type type = parse_type(false); 1385 Symbol field = symbol_make(SYM_FIELD, 0, name, type, rectype->size); 1386 1387 // TODO sub-word packing 1388 rectype->size += (type->size + 3) & (~3); 1389 rectype->len++; 1390 1391 // add field to record 1392 if (last == nil) { 1393 rectype->first = field; 1394 } else { 1395 last->next = field; 1396 } 1397 last = field; 1398 1399 if (ctx.tok != tCBRACE) { 1400 require(tCOMMA); 1401 } 1402 } 1403 return rectype; 1404 } 1405 1406 Type parse_array_type() { 1407 if (ctx.tok == tCBRACK) { 1408 next(); 1409 return type_make(TYPE_SLICE, parse_type(false), 0, 8); 1410 } else { 1411 Ast expr = parse_expr(); 1412 require(tCBRACK); 1413 i32 nelem = ast_get_const_i32(expr); 1414 if (nelem <= 0) { 1415 error("array size must be positive"); 1416 } 1417 Type base = parse_type(false); 1418 u32 sz = nelem * base->size; 1419 if (sz < nelem) { 1420 error("array size overflow"); 1421 } 1422 return type_make(TYPE_ARRAY, base, nelem, sz); 1423 } 1424 } 1425 1426 Type parse_func_type() { 1427 error("<TODO> func type"); 1428 return nil; 1429 } 1430 1431 Type parse_type(bool fwd_ref_ok) { 1432 if (ctx.tok == tSTAR) { // pointer-to 1433 next(); 1434 return type_make(TYPE_POINTER, parse_type(true), 0, 4); 1435 } else if (ctx.tok == tOBRACK) { // array-of 1436 next(); 1437 return parse_array_type(); 1438 } else if (ctx.tok == tFUNC) { 1439 next(); 1440 return parse_func_type(); 1441 } else if (ctx.tok == tSTRUCT) { 1442 next(); 1443 return parse_struct_type(); 1444 } else if (ctx.tok == tIDN) { 1445 String name = ctx.ident; 1446 next(); 1447 Type type = type_find(name); 1448 if (type == nil) { 1449 if (fwd_ref_ok) { 1450 type = type_make(TYPE_UNDEFINED, nil, 0, 0); 1451 type_add(type, name); 1452 } else { 1453 error("undefined type '%s' not usable here", name->text); 1454 } 1455 } 1456 return type; 1457 } else { 1458 expected("type"); 1459 return nil; 1460 } 1461 } 1462 1463 Ast parse_block(); 1464 1465 Ast parse_while() { 1466 Ast expr = parse_expr(); 1467 require(tOBRACE); 1468 scope_push(SCOPE_LOOP, nil); 1469 Ast block = parse_block(); 1470 block->sym = scope_pop(); 1471 Ast node = ast_make_simple(AST_WHILE, 0); 1472 node->c0 = expr; 1473 node->c1 = block; 1474 return node; 1475 } 1476 1477 Ast parse_if() { 1478 // if expr { block } 1479 Ast node = ast_make_simple(AST_IF, 0); 1480 Ast ifnode = ast_make_simple(AST_IFELSE, 0); 1481 node->c0 = ifnode; 1482 ifnode->c0 = parse_expr(); 1483 require(tOBRACE); 1484 scope_push(SCOPE_BLOCK, nil); 1485 ifnode->c1 = parse_block(); 1486 ifnode->c1->sym = scope_pop(); 1487 while (ctx.tok == tELSE) { 1488 next(); 1489 // ... else ... 1490 if (ctx.tok == tIF) { 1491 // ... if expr { block } 1492 next(); 1493 Ast ifelse = ast_make_simple(AST_IFELSE, 0); 1494 ifelse->c0 = parse_expr(); 1495 require(tOBRACE); 1496 scope_push(SCOPE_BLOCK, nil); 1497 ifelse->c1 = parse_block(); 1498 ifelse->c1->sym = scope_pop(); 1499 ifnode->c2 = ifelse; 1500 ifnode = ifelse; 1501 } else { 1502 // ... { block } 1503 require(tOBRACE); 1504 scope_push(SCOPE_BLOCK, nil); 1505 ifnode->c2 = parse_block(); 1506 ifnode->c2->sym = scope_pop(); 1507 break; 1508 } 1509 } 1510 return node; 1511 } 1512 1513 Ast parse_return() { 1514 Ast node = ast_make_simple(AST_RETURN, 0); 1515 if (ctx.tok == tSEMI) { 1516 //if (ctx.fn->type->base != ctx.type_void) { 1517 // error("function requires return type"); 1518 //} 1519 next(); 1520 //x.type = ctx.type_void; 1521 } else { 1522 node->c0 = parse_expr(); 1523 //if (!compatible_type(ctx.fn->type->base, x.type, &x)) { 1524 // error("return types do not match"); 1525 //} 1526 require(tSEMI); 1527 } 1528 return node; 1529 } 1530 1531 Ast parse_break() { 1532 // XXX break-to-labeled-loop support 1533 Scope scope = scope_find(SCOPE_LOOP); 1534 if (scope == nil) { 1535 error("break must be used from inside a looping construct"); 1536 } 1537 require(tSEMI); 1538 return ast_make_simple(AST_BREAK, 0); 1539 } 1540 1541 Ast parse_continue() { 1542 // XXX continue-to-labeled-loop support 1543 Scope scope = scope_find(SCOPE_LOOP); 1544 if (scope == nil) { 1545 error("continue must be used from inside a looping construct"); 1546 } 1547 require(tSEMI); 1548 return ast_make_simple(AST_CONTINUE, 0); 1549 } 1550 1551 // unsafe write op 1552 void STORE(u32 val, u32* ptr, u32 n, u32 sz) { 1553 if (sz == 4) { 1554 ptr[n >> 2] = val; 1555 } else if (sz == 1) { 1556 ((u8*)ptr)[n] = val; 1557 } 1558 } 1559 1560 u32 parse_array_init(Symbol var, u32ptr data, u32 dmax, u32 sz) { 1561 memset(data, 0, dmax); 1562 u32 n = 0; 1563 while (true) { 1564 if (ctx.tok == tCBRACE) { 1565 next(); 1566 break; 1567 } 1568 if (n >= dmax) { 1569 error("initializer too large"); 1570 } 1571 Ast expr = parse_expr(); 1572 i32 v = ast_get_const_i32(expr); 1573 1574 // VALIDATE type compat/fit 1575 STORE(v, data, n, sz); 1576 n += sz; 1577 if (ctx.tok != tCBRACE) { 1578 require(tCOMMA); 1579 } 1580 } 1581 return n; 1582 } 1583 1584 void parse_struct_init(Symbol var, u32ptr data) { 1585 memset(data, 0, var->type->size); 1586 while (true) { 1587 if (ctx.tok == tCBRACE) { 1588 next(); 1589 break; 1590 } 1591 String name = parse_name("field name"); 1592 Symbol field = var->type->first; 1593 while (true) { 1594 if (field == nil) { 1595 error("structure has no '%s' field", name->text); 1596 } 1597 if (field->name == name) { 1598 break; 1599 } 1600 field = field->next; 1601 } 1602 require(tCOLON); 1603 Ast expr = parse_expr(); 1604 i32 v = ast_get_const_i32(expr); 1605 // VALIDATE type compat/fit 1606 STORE(v, data, field->value, 4); 1607 if (ctx.tok != tCBRACE) { 1608 require(tCOMMA); 1609 } 1610 } 1611 } 1612 1613 Ast parse_local_var() { 1614 String name = parse_name("variable name"); 1615 // TODO: allow type inference 1616 Type type = parse_type(false); 1617 1618 Symbol sym = symbol_make(SYM_LOCAL, 0, name, type, ctx.alloc_stack); 1619 scope_add_symbol(ctx.scope, sym); 1620 1621 ctx.alloc_stack = ctx.alloc_stack + type->size; 1622 if (ctx.local_stack < ctx.alloc_stack) { 1623 ctx.local_stack = ctx.alloc_stack; 1624 } 1625 1626 Ast node = nil; 1627 if (ctx.tok == tASSIGN) { 1628 next(); 1629 node = ast_make_simple(AST_EXPR, 0); 1630 node->c0 = ast_make_binop(AST_ASSIGN, ast_make_symbol(name, sym), parse_expr()); 1631 node->c0->type = node->c0->c1->type; 1632 } 1633 require(tSEMI); 1634 1635 return node; 1636 } 1637 1638 void parse_global_var() { 1639 String name = parse_name("variable name"); 1640 // TODO: allow type inference 1641 Type type = parse_type(false); 1642 1643 Symbol sym = symbol_make(SYM_GLOBAL, 0, name, type, ctx.gp); 1644 symbol_add_global(sym); 1645 1646 // advance global pointer and round to next workd 1647 ctx.gp = ctx.gp + type->size; 1648 ctx.gp = (ctx.gp + 3) & (~3); 1649 1650 if (ctx.tok == tASSIGN) { 1651 next(); 1652 u32* data = ctx.data + (sym->value >> 2); 1653 if (ctx.tok == tOBRACE) { 1654 next(); 1655 if (type->kind == TYPE_ARRAY) { 1656 parse_array_init(sym, data, type->size, type->base->size); 1657 } else if (type->kind == TYPE_RECORD) { 1658 parse_struct_init(sym, data); 1659 } else { 1660 error("cannot initialize this way"); 1661 } 1662 } else { 1663 Ast expr = parse_expr(); 1664 i32 v = ast_get_const_i32(expr); 1665 // DISCARD expr 1666 // VALIDATE compatible integer type 1667 ctx.data[sym->value >> 2] = v; 1668 } 1669 } 1670 require(tSEMI); 1671 } 1672 1673 Ast parse_expr_statement() { 1674 u32 srcloc = ctx.linenumber; 1675 1676 Ast node = nil; 1677 Ast left = parse_expr(); 1678 Ast right = nil; 1679 if (ctx.tok == tASSIGN) { 1680 next(); 1681 right = parse_expr(); 1682 node = ast_make_binop(AST_ASSIGN, left, right); 1683 node->type = node->c1->type; 1684 } else if ((ctx.tok & tcMASK) == tcAEQOP) { 1685 u32 op = ctx.tok; // - tADDEQ; 1686 next(); 1687 right = parse_expr(); 1688 node = ast_make_binop(op, left, right); 1689 // TODO: type prop, convert x op= y to x = x op y 1690 } else if ((ctx.tok & tcMASK) == tcMEQOP) { 1691 u32 op = ctx.tok; // - tMULEQ; 1692 next(); 1693 right = parse_expr(); 1694 node = ast_make_binop(op, left, right); 1695 // TODO: type prop, convert x op= y to x = x op y 1696 } else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) { 1697 u32 op; 1698 if (ctx.tok == tINC) { 1699 op = AST_ADD; 1700 } else { 1701 op = AST_SUB; 1702 } 1703 next(); 1704 right = ast_make_const(1, ctx.type_i32); 1705 right = ast_make_binop(op, left, right); 1706 right->type = ctx.type_i32; 1707 node = ast_make_binop(AST_ASSIGN, left, right); 1708 node->type = ctx.type_i32; 1709 // TODO: check type? 1710 } else { 1711 node = left; 1712 } 1713 require(tSEMI); 1714 1715 // wrap in a statement node 1716 Ast stmt = ast_make_simple(AST_EXPR, 0); 1717 stmt->c0 = node; 1718 stmt->srcloc = srcloc; 1719 1720 return stmt; 1721 } 1722 1723 Ast parse_block() { 1724 Ast block = ast_make_simple(AST_BLOCK, 0); 1725 Ast last = block; 1726 Ast node; 1727 while (true) { 1728 if (ctx.tok == tCBRACE) { 1729 next(); 1730 break; 1731 } else if (ctx.tok == tRETURN) { 1732 next(); 1733 node = parse_return(); 1734 } else if (ctx.tok == tBREAK) { 1735 next(); 1736 node = parse_break(); 1737 } else if (ctx.tok == tCONTINUE) { 1738 next(); 1739 node = parse_continue(); 1740 } else if (ctx.tok == tWHILE) { 1741 next(); 1742 node = parse_while(); 1743 } else if (ctx.tok == tIF) { 1744 next(); 1745 node = parse_if(); 1746 } else if (ctx.tok == tVAR) { 1747 next(); 1748 node = parse_local_var(); 1749 } else if (ctx.tok == tSEMI) { 1750 next(); 1751 // empty statement 1752 continue; 1753 } else { 1754 node = parse_expr_statement(); 1755 } 1756 1757 // append to block's list of statements 1758 // some of these don't always return a node (like local var) 1759 if (node != nil) { 1760 last->c2 = node; 1761 last = node; 1762 } 1763 } 1764 return block; 1765 } 1766 1767 Ast parse_function_body(Symbol fn) { 1768 Ast node; 1769 ctx.fn = fn; 1770 ctx.local_stack = 0; 1771 ctx.alloc_stack = 0; 1772 scope_push(SCOPE_FUNC, fn->first); // scope for parameters 1773 scope_push(SCOPE_BLOCK, nil); // top scope for function body 1774 node = parse_block(); 1775 node->sym = scope_pop(); 1776 scope_pop(); 1777 ctx.fn = nil; 1778 fn->type->size = ctx.local_stack; 1779 return node; 1780 } 1781 1782 Symbol parse_param(String fname, u32 n, Symbol first, Symbol last) { 1783 String pname = parse_name("parameter name"); 1784 Type ptype = parse_type(false); 1785 1786 // arrays and structs are always passed as reference parameters 1787 if ((ptype->kind == TYPE_ARRAY) || (ptype->kind == TYPE_RECORD)) { 1788 ptype = type_make_ptr(ptype); 1789 } 1790 1791 Symbol param = symbol_make(SYM_PARAM, 0, pname, ptype, /* 4 + */ n * 4); 1792 1793 Symbol sym = first; 1794 while (sym != nil) { 1795 if (sym->name == param->name) { 1796 error("duplicate parameter name '%s'", fname->text); 1797 } 1798 sym = sym->next; 1799 } 1800 1801 if (last != nil) { 1802 last->next = param; 1803 } 1804 return param; 1805 } 1806 1807 Ast parse_function() { 1808 Symbol first = nil; 1809 Symbol last = nil; 1810 u32 n = 0; 1811 String fname = parse_name("function name"); 1812 Type rettype = ctx.type_void; 1813 1814 require(tOPAREN); 1815 1816 // process parameters 1817 if (ctx.tok != tCPAREN) { 1818 first = parse_param(fname, n, nil, nil); 1819 last = first; 1820 n++; 1821 while (ctx.tok == tCOMMA) { 1822 next(); 1823 last = parse_param(fname, n, first, last); 1824 n++; 1825 } 1826 } 1827 require(tCPAREN); 1828 1829 if ((ctx.tok != tSEMI) && (ctx.tok != tOBRACE)) { 1830 rettype = parse_type(false); 1831 } 1832 1833 int isdef = 0; 1834 if (ctx.tok == tSEMI) { 1835 // declaration 1836 next(); 1837 } else if (ctx.tok == tOBRACE) { 1838 // definition 1839 next(); 1840 isdef = 1; 1841 } else { 1842 expected("semi or open brace"); 1843 } 1844 1845 // Look for an existing declaration or definintion of this function 1846 Symbol sym = symbol_find(fname); 1847 if (sym != nil) { 1848 // such a named identifier exists 1849 // check to see if we are in agreement with it 1850 if (sym->kind != SYM_FUNC) { 1851 error("redefining '%s' as function", fname->text); 1852 } 1853 if (isdef && (sym->flags & SYM_IS_DEFINED)) { 1854 error("redefined function '%s'", fname->text); 1855 } 1856 if (rettype != sym->type->base) { 1857 error("func '%s' return type differs from decl", fname->text); 1858 } 1859 if (sym->type->len != n) { 1860 error("func '%s' parameter count differs from decl", fname->text); 1861 } 1862 Symbol pa = first; 1863 Symbol pb = sym->type->first; 1864 u32 i = 1; 1865 while ((pa != nil) && (pb != nil)) { 1866 if (!type_is_same(pa->type, pb->type)) { 1867 error("func '%s' param %u differs from decl", fname->text, i); 1868 } 1869 pa = pa->next; 1870 pb = pb->next; 1871 } 1872 } else { 1873 // if there was no existing record of this function, create one now 1874 Type type = type_make(TYPE_FUNC, rettype, n, 0); 1875 sym = symbol_make(SYM_FUNC, 0, fname, type, 0); 1876 sym->first = first; 1877 type->first = first; 1878 type->sym = sym; 1879 type->len = n; // parameter count 1880 symbol_add_global(sym); 1881 } 1882 1883 // handle definition if it is one 1884 if (isdef) { 1885 Ast node = ast_make_simple(AST_FUNC, 0); 1886 node->name = fname; 1887 node->sym = sym; 1888 1889 // mark as defined and save entry address 1890 sym->flags |= SYM_IS_DEFINED; 1891 node->c0 = parse_function_body(sym); 1892 return node; 1893 } 1894 1895 return nil; 1896 } 1897 1898 void parse_type_def() { 1899 String name = parse_name("type name"); 1900 Type type = parse_type(false); 1901 Type prev = type_find(name); 1902 if (prev == nil) { 1903 type_add(type, name); 1904 } else { 1905 if (prev->kind != TYPE_UNDEFINED) { 1906 error("cannot redefine type '%s'\n", name->text); 1907 } 1908 prev->kind = type->kind; 1909 prev->base = type->base; 1910 prev->first = type->first; 1911 prev->len = type->len; 1912 prev->size = type->size; 1913 prev->sym->type = type; 1914 // XXX discard type 1915 type = prev; 1916 } 1917 require(tSEMI); 1918 } 1919 1920 void parse_enum_def() { 1921 require(tOBRACE); 1922 u32 val = 0; 1923 while (ctx.tok != tCBRACE) { 1924 String name = parse_name("enum tag name"); 1925 Symbol sym = symbol_find(name); 1926 if (sym != nil) { 1927 error("cannot redefine %s as enum tag\n", name->text); 1928 } 1929 if (ctx.tok == tASSIGN) { 1930 next(); 1931 Ast expr = parse_expr(); 1932 val = ast_get_const_i32(expr); 1933 // typecheck? discard subtree 1934 } 1935 require(tCOMMA); 1936 symbol_add_global(symbol_make(SYM_CONST, 0, name, ctx.type_i32, val)); 1937 val++; 1938 } 1939 require(tCBRACE); 1940 require(tSEMI); 1941 } 1942 1943 Ast parse_program() { 1944 Ast program = ast_make_simple(AST_PROGRAM, 0); 1945 Ast last = program; 1946 next(); 1947 for (;;) { 1948 if (ctx.tok == tENUM) { 1949 next(); 1950 parse_enum_def(); 1951 } else if (ctx.tok == tTYPE) { 1952 next(); 1953 parse_type_def(); 1954 } else if (ctx.tok == tFUNC) { 1955 next(); 1956 Ast node = parse_function(); 1957 if (node != nil) { 1958 last->c2 = node; 1959 last = node; 1960 } 1961 } else if (ctx.tok == tVAR) { 1962 next(); 1963 parse_global_var(); 1964 } else if (ctx.tok == tEOF) { 1965 return program; 1966 } else { 1967 expected("function, variable, or type definition"); 1968 } 1969 } 1970 } 1971 1972 void type_dump_compact(FILE* fp, Type type) { 1973 if (type->kind == TYPE_FUNC) { 1974 fprintf(fp, "fn("); 1975 Symbol param = type->first; 1976 while (param != nil) { 1977 type_dump_compact(fp, param->type); 1978 if (param->next != nil) { 1979 fprintf(fp, ", "); 1980 } 1981 param = param->next; 1982 } 1983 fprintf(fp, ") -> "); 1984 type_dump_compact(fp, type->base); 1985 } else if (type->sym != nil) { 1986 fprintf(fp, "%s", type->sym->name->text); 1987 } else if (type->kind == TYPE_ARRAY) { 1988 fprintf(fp, "[%u]", type->len); 1989 type_dump_compact(fp, type->base); 1990 } else if (type->kind == TYPE_RECORD) { 1991 fprintf(fp, "{...}"); 1992 } else { 1993 fprintf(fp, "%s", type_kind[type->kind]); 1994 if ((type->kind == TYPE_POINTER) || (type->kind == TYPE_SLICE)) { 1995 type_dump_compact(fp, type->base); 1996 } 1997 } 1998 } 1999 2000 void ast_dump_syms(FILE* fp, Symbol sym, str tag, u32 indent) { 2001 while (sym != nil) { 2002 u32 i = 0; 2003 while (i < indent) { fprintf(fp, " "); i++; } 2004 fprintf(fp, "%s '%s' ", tag, sym->name->text); 2005 type_dump_compact(fp, sym->type); 2006 fprintf(fp, "\n"); 2007 sym = sym->next; 2008 } 2009 } 2010 2011 void ast_dump_rtype(FILE* fp, Symbol sym, u32 indent) { 2012 u32 i = 0; 2013 while (i < indent) { fprintf(fp, " "); i++; } 2014 fprintf(fp, "Returns "); 2015 type_dump_compact(fp, sym->type->base); 2016 fprintf(fp, "\n"); 2017 } 2018 2019 int _ast_dump(FILE* fp, Ast node, u32 indent, bool dumplist, Ast mark) { 2020 u32 i = 0; 2021 i32 r = 0; 2022 2023 if (mark == node) { 2024 while (i < indent) { fprintf(fp, ">>"); i++; } 2025 } else { 2026 while (i < indent) { fprintf(fp, " "); i++; } 2027 } 2028 indent = indent + 1; 2029 2030 fprintf(fp, "%s ", ast_kind[node->kind]); 2031 if (node->kind == AST_SYMBOL) { 2032 fprintf(fp, "'%s' ", node->name->text); 2033 if (node->type != nil) { 2034 type_dump_compact(fp, node->type); 2035 } 2036 fprintf(fp, "\n"); 2037 } else if (node->kind == AST_CONST) { 2038 fprintf(fp, "0x%x ", node->ival); 2039 if (node->type != nil) { 2040 type_dump_compact(fp, node->type); 2041 } 2042 fprintf(fp, "\n"); 2043 } else { 2044 if (node->name) { 2045 fprintf(fp,"'%s' ",node->name->text); 2046 } 2047 if (node->type) { 2048 type_dump_compact(fp, node->type); 2049 fprintf(fp, " "); 2050 } 2051 if (node->ival) { 2052 fprintf(fp,"%u", node->ival); 2053 } 2054 fprintf(fp, "\n"); 2055 } 2056 if (node->kind == AST_FUNC) { 2057 ast_dump_syms(fp, node->sym->first, "Param", indent); 2058 ast_dump_rtype(fp, node->sym, indent); 2059 } 2060 if (node->kind == AST_BLOCK) { 2061 ast_dump_syms(fp, node->sym, "Local", indent); 2062 } 2063 if (mark == node) { 2064 return 1; 2065 } 2066 if (node->c0 != nil) { 2067 if (_ast_dump(fp, node->c0, indent, true, mark)) { 2068 return 1; 2069 } 2070 } 2071 if (node->c1 != nil) { 2072 if (_ast_dump(fp, node->c1, indent, true, mark)) { 2073 return 1; 2074 } 2075 } 2076 if (dumplist) { 2077 node = node->c2; 2078 while (node != nil) { 2079 if (_ast_dump(fp, node, indent, false, mark)) { 2080 return 1; 2081 } 2082 node = node->c2; 2083 } 2084 } 2085 return 0; 2086 } 2087 2088 void ast_dump(FILE* fp, Ast node, Ast mark) { 2089 _ast_dump(fp, node, 0, true, mark); 2090 } 2091 2092 void ast_dump_node(FILE* fp, Ast node, bool dump_c2) { 2093 fprintf(fp, "\"%p\" [ label=<<TABLE BORDER=\"0\" CELLBORDER=\"1\" CELLSPACING=\"0\">\n", node); 2094 fprintf(fp, "<TR><TD PORT=\"p0\" COLSPAN=\"3\">%s", ast_kind[node->kind]); 2095 if (node->kind == AST_CONST) { 2096 fprintf(fp, " 0x%x", node->ival); 2097 } else if (node->name != nil) { 2098 fprintf(fp, " %s", node->name->text); 2099 } else if (node->ival) { 2100 fprintf(fp, " %u", node->ival); 2101 } 2102 fprintf(fp, "</TD></TR>\n"); 2103 if (node->type) { 2104 fprintf(fp, "<TR><TD COLSPAN=\"3\">"); 2105 type_dump_compact(fp, node->type); 2106 fprintf(fp, "</TD></TR>"); 2107 } 2108 fprintf(fp, "<TR><TD PORT=\"c0\"></TD>" 2109 "<TD PORT=\"c1\"></TD>" 2110 "<TD PORT=\"c2\"></TD></TR></TABLE>>; ];\n"); 2111 2112 if (node->c0 != nil) { 2113 ast_dump_node(fp, node->c0, true); 2114 fprintf(fp, "\"%p\":c0 -> \"%p\" [ ];\n", node, node->c0); 2115 } 2116 if (node->c1 != nil) { 2117 ast_dump_node(fp, node->c1, true); 2118 fprintf(fp, "\"%p\":c1 -> \"%p\" [ ];\n", node, node->c1); 2119 } 2120 if ((node->c2 != nil) && dump_c2) { 2121 Ast list = node->c2; 2122 Ast prev = node; 2123 while (list != nil) { 2124 ast_dump_node(fp, list, false); 2125 fprintf(fp, "\"%p\":c2 -> \"%p\":p0 [ ];\n", prev, list); 2126 list = list->c2; 2127 prev = prev->c2; 2128 } 2129 fprintf(fp, "{ rank=same"); 2130 if (node->c0 != nil) { 2131 fprintf(fp, " \"%p\"", node->c0); 2132 } 2133 if (node->c1 != nil) { 2134 fprintf(fp, " \"%p\"", node->c1); 2135 } 2136 list = node->c2; 2137 while (list != nil) { 2138 fprintf(fp, " \"%p\"", list); 2139 list = list->c2; 2140 } 2141 fprintf(fp," }\n"); 2142 } 2143 } 2144 2145 void ast_dump_graph(FILE* fp, Ast node) { 2146 fprintf(fp, 2147 "digraph g {\n" 2148 "graph [ rankdir=TB; ];\n" 2149 "node [ shape=plain; ];\n" 2150 ); 2151 ast_dump_node(fp, node, false); 2152 fprintf(fp, "}\n"); 2153 } 2154 2155 void ast_dump_graphs(Ast node) { 2156 node = node->c2; 2157 while (node != nil) { 2158 if (node->kind == AST_FUNC) { 2159 char tmp[256]; 2160 sprintf(tmp, "%s.ast.dot", node->sym->name->text); 2161 FILE* fp; 2162 if ((fp = fopen(tmp, "w")) != nil) { 2163 ast_dump_graph(fp, node); 2164 fclose(fp); 2165 } 2166 } 2167 node = node->c2; 2168 } 2169 } 2170 2171 #if IR 2172 #include "codegen-ir.c" 2173 #else 2174 #include "codegen-risc5-simple.c" 2175 #endif 2176 2177 #if 0 2178 void type_dump_all() { 2179 Symbol sym = ctx.typetab; 2180 2181 while (sym != nil) { 2182 fprintf(stderr, "Symbol %p '%s'\n", sym, sym->name->text); 2183 fprintf(stderr, " Type %p %s len=%u sz=%u\n", 2184 sym->type, type_id_tab[sym->type->kind], 2185 sym->type->len, sym->type->size); 2186 fprintf(stderr, " sym=%p first=%p\n", 2187 sym->type->sym, sym->type->first); 2188 sym = sym->next; 2189 } 2190 } 2191 #endif 2192 2193 // ================================================================ 2194 2195 i32 main(int argc, args argv) { 2196 str outname = "out.bin"; 2197 str lstname = nil; 2198 str srcname = nil; 2199 str astname = nil; 2200 bool dump = false; 2201 bool scan_only = false; 2202 bool dump_graphs = false; 2203 2204 ctx_init(); 2205 ctx.filename = "<commandline>"; 2206 2207 while (argc > 1) { 2208 if (!strcmp(argv[1],"-o")) { 2209 if (argc < 2) { 2210 error("option -o requires argument"); 2211 } 2212 outname = argv[2]; 2213 argc--; 2214 argv++; 2215 } else if (!strcmp(argv[1], "-l")) { 2216 if (argc < 2) { 2217 error("option -l requires argument"); 2218 } 2219 lstname = argv[2]; 2220 argc--; 2221 argv++; 2222 } else if (!strcmp(argv[1], "-a")) { 2223 if (argc < 2) { 2224 error("option -a requires argument"); 2225 } 2226 astname = argv[2]; 2227 argc--; 2228 argv++; 2229 } else if (!strcmp(argv[1], "-p")) { 2230 dump = true; 2231 } else if (!strcmp(argv[1], "-g")) { 2232 dump_graphs = true; 2233 } else if (!strcmp(argv[1], "-v")) { 2234 ctx.flags |= cfTraceCodeGen; 2235 } else if (!strcmp(argv[1], "-s")) { 2236 scan_only = true; 2237 } else if (!strcmp(argv[1], "-A")) { 2238 ctx.flags |= cfAbortOnError; 2239 } else if (argv[1][0] == '-') { 2240 error("unknown option: %s", argv[1]); 2241 } else { 2242 if (srcname != nil) { 2243 error("multiple source files disallowed"); 2244 } else { 2245 srcname = argv[1]; 2246 } 2247 } 2248 argc--; 2249 argv++; 2250 } 2251 2252 if (srcname == nil) { 2253 printf( 2254 "usage: compiler [ <option> | <sourcefilename> ]*\n" 2255 "\n" 2256 "options: -o <filename> binary output (default 'out.bin')\n" 2257 " -l <filename> listing output (default none)\n" 2258 " -a <filename> dump AST tree\n" 2259 " -v trace code generation\n" 2260 " -s scan only\n" 2261 " -p dump type context\n" 2262 " -A abort on error\n"); 2263 return 0; 2264 } 2265 ctx.filename = srcname; 2266 2267 ctx_open_source(srcname); 2268 ctx.linenumber = 1; 2269 ctx.lineoffset = 0; 2270 // prime the lexer 2271 scan(); 2272 2273 if (scan_only) { 2274 ctx.flags |= 1; 2275 while (true) { 2276 next(); 2277 token_print(); 2278 if (ctx.tok == tEOF) { 2279 printf("\n"); 2280 return 0; 2281 } 2282 } 2283 } 2284 2285 Ast a = parse_program(); 2286 2287 if (astname != nil) { 2288 FILE *fp; 2289 if ((fp = fopen(astname, "w")) != nil) { 2290 ast_dump(fp, a, nil); 2291 fclose(fp); 2292 } 2293 } 2294 2295 if (dump_graphs) { 2296 ast_dump_graphs(a); 2297 } 2298 2299 gen_program(a); 2300 2301 binary_write(outname); 2302 2303 if (lstname != nil) { 2304 listing_write(lstname, ctx.filename); 2305 } 2306 2307 //type_dump_all(); 2308 2309 return 0; 2310 }