compiler.c (66299B)
1 // Copyright 2020, Brian Swetland <swetland@frotz.net> 2 // Licensed under the Apache License, Version 2.0. 3 4 #if C 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <stdarg.h> 8 #include <stdint.h> 9 #include <stdbool.h> 10 #include <strings.h> 11 #include <string.h> 12 13 #include <fcntl.h> 14 #include <unistd.h> 15 #include <sys/stat.h> 16 17 #include "risc5.h" 18 19 // builtin types 20 #define nil 0 21 typedef uint32_t u32; 22 typedef int32_t i32; 23 typedef uint8_t u8; 24 25 // rewriter only handles single word types 26 typedef uint32_t token_t; 27 typedef char* str; 28 typedef char** args; 29 typedef uint32_t* u32ptr; 30 #endif 31 32 enum { FNMAXARGS = 8, }; 33 34 // token classes (tok & tcMASK) 35 enum { 36 tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, 37 tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, 38 }; 39 40 enum { 41 // EndMarks, Braces, Brackets Parens 42 tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, 43 // RelOps (do not reorder) 44 tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, 45 // AddOps (do not reorder) 46 tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, 47 // MulOps (do not reorder) 48 tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F, 49 // AsnOps (do not reorder) 50 tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, 51 tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F, 52 // Various, UnaryNot, LogicalOps, 53 tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, 54 tASSIGN, tINC, tDEC, 55 // Keywords 56 tTYPE, tFUNC, tSTRUCT, tVAR, tENUM, 57 tIF, tELSE, tWHILE, 58 tBREAK, tCONTINUE, tRETURN, 59 tFOR, tSWITCH, tCASE, 60 tTRUE, tFALSE, tNIL, 61 tIDN, tNUM, tSTR, 62 // used internal to the lexer but never returned 63 tSPC, tINV, tDQT, tSQT, tMSC, 64 }; 65 66 str tnames[] = { 67 "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", 68 "==", "!=", "<", "<=", ">", ">=", "", "", 69 "+", "-", "|", "^", "", "", "", "", 70 "*", "/", "%", "&", "&~", "<<", ">>", "", 71 "+=", "-=", "|=", "^=", "", "", "", "", 72 "*=", "/=", "%=", "&=", "&~=", "<<=", ">>=", "", 73 ";", ":", ".", ",", "~", "&&", "||", "!", 74 "=", "++", "--", 75 "type", "func", "struct", "var", "enum", 76 "if", "else", "while", 77 "break", "continue", "return", 78 "for", "switch", "case", 79 "true", "false", "nil", 80 "<ID>", "<NUM>", "<STR>", 81 "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", 82 }; 83 84 u8 lextab[256] = { 85 tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 86 tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, 87 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 88 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 89 tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, 90 tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, 91 tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, 92 tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, 93 tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 94 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 95 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 96 tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, 97 tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 98 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 99 tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, 100 tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, 101 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 102 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 103 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 104 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 105 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 106 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 107 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 108 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 109 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 110 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 111 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 112 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 113 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 114 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 115 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 116 tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, 117 }; 118 119 // encodings for ops in Items 120 enum { rEQ, rNE, rLT, rLE, rGT, rGE, }; // RelOps 121 enum { aADD, aSUB, aIOR, aXOR, }; // AddOps 122 enum { mMUL, mDIV, mMOD, mAND, mANN, mLSL, mLSR, }; // MulOps 123 124 u8 invert_relop_tab[6] = { rNE, rEQ, rGE, rGT, rLE, rLT, }; 125 126 typedef struct StringRec StringRec; 127 typedef struct ObjectRec ObjectRec; 128 typedef struct ScopeRec ScopeRec; 129 typedef struct FixupRec FixupRec; 130 typedef struct TypeRec TypeRec; 131 typedef struct ItemRec ItemRec; 132 typedef struct CtxRec CtxRec; 133 134 typedef struct StringRec* String; 135 typedef struct ObjectRec* Object; 136 typedef struct ScopeRec* Scope; 137 typedef struct FixupRec* Fixup; 138 typedef struct TypeRec* Type; 139 typedef struct ItemRec* Item; 140 typedef struct CtxRec* Ctx; 141 142 struct StringRec { 143 String next; 144 u32 len; 145 char text[0]; 146 }; 147 148 struct ScopeRec { 149 u32 kind; 150 Scope next; // next in scope stack 151 Object first; // first object in this scope 152 u32 level; // height in stack (0 == globals, ...) 153 u32 save_stack; // previous alloc_stack to restore on pop 154 Fixup fixups; 155 }; 156 157 struct FixupRec { 158 Fixup next; 159 u32 pc; 160 }; 161 162 // Scope Kind IDs 163 enum { 164 sBlock, 165 sFunc, 166 sLoop, 167 }; 168 169 // ------------------------------------------------------------------ 170 171 struct ObjectRec { 172 u32 kind; 173 String name; 174 Type type; 175 Object first; // list of... 176 Object next; // link in list 177 u32 flags; 178 u32 value; 179 Fixup fixups; // forward func refs 180 }; 181 182 // Object Kind IDs 183 enum { // value 184 oConst, // const value 185 oGlobal, // static base offset 186 oVar, // frame offset 187 oParam, // frame offset 188 oField, // record offset 189 oType, // type-desc-ptr 190 oFunc, // address 191 }; 192 193 // Object Flags 194 enum { 195 ofReadOnly = 1, 196 ofPublic = 2, 197 ofDefined = 4, 198 ofBuiltin = 8, // for builtin functions 199 }; 200 201 // ------------------------------------------------------------------ 202 203 struct TypeRec { 204 u32 kind; 205 Type base; // Pointer-to, Func-return, or Array-elem 206 Object obj; // if we're non-anonymous 207 Object first; // list of Params or Fields 208 u32 len; // of Array, num of Params 209 u32 size; // of Type in Memory 210 }; 211 212 // Type Kind IDs 213 enum { 214 tVoid, 215 tByte, 216 tBool, 217 tInt32, 218 tNil, 219 tPointer, 220 tArray, 221 tSlice, 222 tRecord, 223 tFunc, 224 tUndefined, 225 }; 226 227 str type_id_tab[] = { 228 "void", "byte", "bool", "int32", "nil", "*", "[]", "[]", 229 "struct", "func", "undef", 230 }; 231 void print_type(Type t); 232 233 // ------------------------------------------------------------------ 234 235 struct ItemRec { 236 u32 kind; 237 u32 flags; 238 Type type; 239 u32 r; 240 u32 a; 241 u32 b; 242 }; 243 244 // Item Kind IDs 245 enum { // r a b 246 iConst, // - value - 247 iReg, // regno 248 iRegInd, // regno offset 249 iComp, // relop regno-a regno-b 250 iFunc, 251 }; 252 253 str item_id_tab[] = { "Const", "Reg", "RegInd", "Comp", "Func" }; 254 void print_item(Item x); 255 256 257 // Item Flags 258 enum { 259 ifReadOnly = 1, 260 }; 261 262 // ------------------------------------------------------------------ 263 264 struct CtxRec { 265 const char* filename; // filename of active source 266 int fd; 267 268 u8 iobuffer[1024]; // scanner file io buffer 269 u32 ionext; 270 u32 iolast; 271 272 u32 linenumber; // line number of most recent line 273 u32 lineoffset; // position of start of most recent line 274 u32 byteoffset; // position of the most recent character 275 u32 flags; 276 u32 cc; // scanner: next character 277 278 token_t tok; // most recent token 279 u32 num; // used for tNUM 280 char tmp[256]; // used for tIDN, tSTR; 281 String ident; // used for tIDN 282 283 String strtab; // TODO: hashtable 284 Object typetab; // TODO: hashtable 285 Scope scope; // scope stack 286 ScopeRec global; 287 288 u32 alloc_global; // next available global offset 289 290 Object fn; // function being compiled if non-nil 291 u32 spill_stack; // where to spill temp regs (TODO: dynamic) 292 u32 local_stack; // total stack for all locals (params, vars, tmps) 293 u32 alloc_stack; // where to allocate next var (grows/shrinks as 294 // we enter/exit scopes) 295 296 Type type_void; 297 Type type_byte; 298 Type type_bool; 299 Type type_int32; 300 Type type_nil; 301 Type type_string; 302 303 String idn_if; 304 String idn_for; 305 String idn_var; 306 String idn_nil; 307 String idn_case; 308 String idn_func; 309 String idn_else; 310 String idn_enum; 311 String idn_true; 312 String idn_type; 313 String idn_break; 314 String idn_while; 315 String idn_false; 316 String idn_switch; 317 String idn_struct; 318 String idn_return; 319 String idn_continue; 320 321 u32 code[8192]; 322 u32 data[8192]; 323 u32 pc; 324 325 // if nonzero, target for continue 326 u32 pc_continue; 327 328 // The latest pc that a forward branch has been 329 // fixed up to. cannot safely move code at or 330 // before this address. 331 u32 last_target_pc; 332 333 u32 regbits; 334 335 u32 xref[8192]; 336 }; 337 338 enum { 339 cfVisibleEOL = 1, 340 cfAbortOnError = 2, 341 }; 342 343 CtxRec ctx; 344 345 bool TRACE_CODEGEN = false; 346 347 void gen_trace(const char* fn, Item x, Item y); 348 void gen_trace_n(const char* fn, u32 n, Item x, Item y); 349 void gen_trace_code(const char* msg, u32 pc); 350 351 // initialize x as appropriate for obj 352 // (which must be a local or global variable or function) 353 void gen_item_from_obj(Item x, Object obj); 354 355 // generate function prologue and epilogue 356 void gen_prologue(Object fn); 357 void gen_epilogue(Object fn); 358 359 // return from function, consumes x 360 void gen_return(Item x); 361 362 // generate a binary op, consumes y, transforms x 363 void gen_add_op(u32 op, Item x, Item y); 364 void gen_mul_op(u32 op, Item x, Item y); 365 void gen_rel_op(u32 op, Item x, Item y); 366 367 // generate unary op, transforms x 368 void gen_unary_op(u32 op, Item x); 369 370 // stores val into var, consuming both 371 void gen_store(Item val, Item var); 372 373 // replace array item x with element item (at idx) 374 void gen_index(Item x, Item idx); 375 376 // consume ptr item, transforming into thing-pointed-at 377 void gen_deref_ptr(Item x); 378 379 // consume a memory-based item, transforming into an address (ptr) 380 void gen_get_ptr(Item x); 381 382 // release any registers or other resources held by this item 383 void gen_discard(Item val); 384 385 // sets up call param #n, consuming val 386 void gen_param(u32 n, Item val); 387 void gen_ref_param(u32 n, Item val); 388 389 // call func, consumes parameters and func 390 void gen_call(Item func); 391 392 void gen_bool_from_comp(Item x); 393 394 // generate a forward conditional branch 395 // consumes x which must be Bool or Cond 396 // returns address to fixup 397 u32 gen_branch_cond(Item x, bool sense); 398 399 // used by && and || on opposing code flows 400 // bool1 version loads val, bool2 version loads !val 401 void gen_load_bool1(Item x, bool val); 402 void gen_load_bool2(Item x); 403 404 // generate a backward branch to addr 405 void gen_branch_back(u32 addr); 406 407 // generate an unconditional forward branch 408 // returns address for later fixup 409 u32 gen_branch_fwd(); 410 411 // patches provided list of branch fixups to branch to 412 // the address where we will emit the next instruction 413 void fixup_branches_fwd(Fixup list); 414 415 // patches a single branch at addr to branch to the 416 // address where we will emit the next instruction 417 void fixup_branch_fwd(u32 addr); 418 419 String make_string(const char* text, u32 len) { 420 // OPT obviously this wants to be a hash table 421 String str = ctx.strtab; 422 while (str != nil) { 423 if ((str->len == len) && (memcmp(text, str->text, len) == 0)) { 424 return str; 425 } 426 str = str->next; 427 } 428 429 str = malloc(sizeof(StringRec) + len + 1); 430 str->len = len; 431 memcpy(str->text, text, len); 432 str->text[len] = 0; 433 str->next = ctx.strtab; 434 ctx.strtab = str; 435 436 return str; 437 } 438 439 Object make_object(u32 kind, String name, Type type, 440 Object first, u32 flags, u32 value) { 441 Object obj = malloc(sizeof(ObjectRec)); 442 obj->kind = kind; 443 obj->name = name; 444 obj->type = type; 445 obj->first = first; 446 obj->next = nil; 447 obj->flags = flags; 448 obj->value = value; 449 obj->fixups = nil; 450 return obj; 451 } 452 453 Type make_type(u32 kind, Type base, Object obj, 454 Object first, u32 len, u32 size) { 455 Type type = malloc(sizeof(TypeRec)); 456 type->kind = kind; 457 type->base = base; 458 type->obj = obj; 459 type->first = first; 460 type->len = len; 461 type->size = size; 462 return type; 463 } 464 465 Object make_var(u32 kind, String name, Type type, u32 flags, u32 value) { 466 Object var = malloc(sizeof(ObjectRec)); 467 var->kind = kind; 468 var->name = name; 469 var->type = type; 470 var->first = nil; 471 var->next = nil; 472 var->flags = flags; 473 var->value = value; 474 var->fixups = nil; 475 return var; 476 } 477 478 void set_item(Item itm, u32 kind, Type type, u32 r, u32 a, u32 b) { 479 itm->kind = kind; 480 itm->flags = 0; 481 itm->type = type; 482 itm->r = r; 483 itm->a = a; 484 itm->b = b; 485 } 486 487 void copy_item(Item src, Item dst) { 488 dst->kind = src->kind; 489 dst->flags = src->flags; 490 dst->type = src->type; 491 dst->r = src->r; 492 dst->a = src->a; 493 dst->b = src->b; 494 } 495 496 void add_type(Type type, String name) { 497 Object obj = make_object(oType, name, type, nil, 0, 0); 498 if (type->obj == nil) { 499 // only set the the type's object if it is 500 // not already set (otherwise aliases will 501 // clobber the canonical type names -- yuck!) 502 type->obj = obj; 503 } 504 obj->next = ctx.typetab; 505 ctx.typetab = obj; 506 } 507 508 Type setup_type(const char* text, u32 tlen, u32 kind, u32 size) { 509 String name = make_string(text, tlen); 510 Type type = make_type(kind, nil, nil, nil, 0, size); 511 add_type(type, name); 512 return type; 513 } 514 515 enum { 516 biPrintHex32, 517 biPutC, 518 }; 519 520 void make_builtin(const char* name, u32 id, Type p0, Type p1, Type rtn); 521 522 void init_ctx() { 523 memset(&ctx, 0, sizeof(ctx)); 524 525 // install built-in basic types 526 ctx.type_void = setup_type("void", 4, tVoid, 0); 527 ctx.type_byte = setup_type("byte", 4, tByte, 1); 528 ctx.type_bool = setup_type("bool", 4, tBool, 1); 529 ctx.type_int32 = setup_type("i32", 3, tInt32, 4); 530 ctx.type_nil = setup_type("nil", 3, tNil, 4); 531 ctx.type_string = setup_type("str", 3, tSlice, 8); 532 ctx.type_string->base = ctx.type_byte; 533 534 ctx.scope = &(ctx.global); 535 536 make_builtin("_hexout_", biPrintHex32, ctx.type_int32, nil, ctx.type_void); 537 make_builtin("_putc_", biPutC, ctx.type_int32, nil, ctx.type_void); 538 539 // pre-intern keywords 540 ctx.idn_if = make_string("if", 2); 541 ctx.idn_for = make_string("for", 3); 542 ctx.idn_var = make_string("var", 3); 543 ctx.idn_nil = make_string("nil", 3); 544 ctx.idn_case = make_string("case", 4); 545 ctx.idn_func = make_string("func", 4); 546 ctx.idn_else = make_string("else", 4); 547 ctx.idn_enum = make_string("enum", 4); 548 ctx.idn_true = make_string("true", 4); 549 ctx.idn_type = make_string("type", 4); 550 ctx.idn_break = make_string("break", 5); 551 ctx.idn_while = make_string("while", 5); 552 ctx.idn_false = make_string("false", 5); 553 ctx.idn_switch = make_string("switch", 6); 554 ctx.idn_struct = make_string("struct", 6); 555 ctx.idn_return = make_string("return", 6); 556 ctx.idn_continue = make_string("continue", 8); 557 558 } 559 560 bool same_type(Type a, Type b) { 561 if (a->kind != b->kind) { 562 return false; 563 } 564 if (a->base != b->base) { 565 return false; 566 } 567 if (a->len != b->len) { 568 return false; 569 } 570 Object a1 = a->first; 571 Object b1 = b->first; 572 while ((a1 != nil) && (b1 != nil)) { 573 // check that parameters and fields match 574 if (!same_type(a1->type, b1->type)) { 575 return false; 576 } 577 } 578 if ((a1 != nil) || (b1 != nil)) { 579 // mismatched number of parameters or fields 580 return false; 581 } 582 return true; 583 } 584 585 // for assignments, etc 586 bool compatible_type(Type dst, Type src, Item x) { 587 if (x->kind == iComp) { 588 // collapse comparisons into bools for storage 589 gen_bool_from_comp(x); 590 src = ctx.type_bool; 591 } 592 if (dst->kind == tInt32) { 593 if ((src->kind == tByte) || (src->kind == tBool)) { 594 return true; 595 } 596 } 597 // TODO: should we care about int to byte truncation anywhere? 598 if (dst->kind == tByte) { 599 if ((src->kind == tInt32) || (src->kind == tBool)) { 600 return true; 601 } 602 } 603 return same_type(dst, src); 604 } 605 606 void dump_file_line(const char* fn, u32 offset); 607 608 #if C 609 void error(const char *fmt, ...) { 610 #else 611 void error(const char *fmt) { 612 #endif 613 va_list ap; 614 615 fprintf(stderr,"%s:%d: ", ctx.filename, ctx.linenumber); 616 va_start(ap, fmt); 617 vfprintf(stderr, fmt, ap); 618 va_end(ap); 619 620 if (ctx.linenumber > 0) { 621 dump_file_line(ctx.filename, ctx.lineoffset); 622 } 623 fprintf(stderr, "\n"); 624 625 if (ctx.flags & cfAbortOnError) { 626 abort(); 627 } else { 628 exit(1); 629 } 630 } 631 632 void load(const char* filename) { 633 ctx.filename = filename; 634 ctx.linenumber = 0; 635 636 if (ctx.fd >= 0) { 637 close(ctx.fd); 638 } 639 ctx.fd = open(filename, O_RDONLY); 640 if (ctx.fd < 0) { 641 error("cannot open file '%s'", filename); 642 } 643 ctx.ionext = 0; 644 ctx.iolast = 0; 645 ctx.linenumber = 1; 646 ctx.lineoffset = 0; 647 ctx.byteoffset = 0; 648 } 649 650 i32 unhex(u32 ch) { 651 if ((ch >= '0') && (ch <= '9')) { 652 return ch - '0'; 653 } 654 if ((ch >= 'a') && (ch <= 'f')) { 655 return ch - 'a' + 10; 656 } 657 if ((ch >= 'A') && (ch <= 'F')) { 658 return ch - 'A' + 10; 659 } 660 return -1; 661 } 662 663 u32 scan() { 664 while (ctx.ionext == ctx.iolast) { 665 if (ctx.fd < 0) { 666 ctx.cc = 0; 667 return ctx.cc; 668 } 669 int r = read(ctx.fd, ctx.iobuffer, sizeof(ctx.iobuffer)); 670 if (r <= 0) { 671 ctx.fd = -1; 672 } else { 673 ctx.iolast = r; 674 ctx.ionext = 0; 675 } 676 } 677 ctx.cc = ctx.iobuffer[ctx.ionext]; 678 ctx.ionext++; 679 ctx.byteoffset++; 680 return ctx.cc; 681 } 682 683 u32 unescape(u32 n) { 684 if (n == 'n') { 685 return 10; 686 } else if (n == 'r') { 687 return 13; 688 } else if (n == 't') { 689 return 9; 690 } else if (n == '"') { 691 return '"'; 692 } else if (n == '\'') { 693 return '\''; 694 } else if (n == '\\') { 695 return '\\'; 696 } else if (n == 'x') { 697 int x0 = unhex(scan()); 698 int x1 = unhex(scan()); 699 if ((x0 < 0) || (x1 < 0)) { 700 error("invalid hex escape"); 701 } 702 return (x0 << 4) | x1; 703 } else { 704 error("invalid escape 0x%02x", n); 705 return 0; 706 } 707 } 708 709 token_t scan_string(u32 cc, u32 nc) { 710 u32 n = 0; 711 while (true) { 712 if (nc == '"') { 713 break; 714 } else if (nc == 0) { 715 error("unterminated string"); 716 } else if (nc == '\\') { 717 ctx.tmp[n] = unescape(scan()); 718 } else { 719 ctx.tmp[n] = nc; 720 } 721 nc = scan(); 722 n++; 723 if (n == 255) { 724 error("constant string too large"); 725 } 726 } 727 ctx.tmp[n] = 0; 728 return tSTR; 729 } 730 731 token_t scan_keyword(u32 len) { 732 ctx.tmp[len] = 0; 733 String idn = make_string(ctx.tmp, len); 734 ctx.ident = idn; 735 736 if (len == 2) { 737 if (idn == ctx.idn_if) { return tIF; }; 738 } else if (len == 3) { 739 if (idn == ctx.idn_for) { return tFOR; } 740 if (idn == ctx.idn_var) { return tVAR; } 741 if (idn == ctx.idn_nil) { return tNIL; } 742 } else if (len == 4) { 743 if (idn == ctx.idn_case) { return tCASE; } 744 if (idn == ctx.idn_func) { return tFUNC; } 745 if (idn == ctx.idn_else) { return tELSE; } 746 if (idn == ctx.idn_enum) { return tENUM; } 747 if (idn == ctx.idn_true) { return tTRUE; } 748 if (idn == ctx.idn_type) { return tTYPE; } 749 } else if (len == 5) { 750 if (idn == ctx.idn_break) { return tBREAK; } 751 if (idn == ctx.idn_while) { return tWHILE; } 752 if (idn == ctx.idn_false) { return tFALSE; } 753 } else if (len == 6) { 754 if (idn == ctx.idn_switch) { return tSWITCH; } 755 if (idn == ctx.idn_struct) { return tSTRUCT; } 756 if (idn == ctx.idn_return) { return tRETURN; } 757 } else if (len == 8) { 758 if (idn == ctx.idn_continue) { return tCONTINUE; } 759 } 760 return tIDN; 761 } 762 763 token_t scan_number(u32 cc, u32 nc) { 764 u32 n = 1; 765 u32 val = cc - '0'; 766 767 if ((cc == '0') && (nc == 'b')) { // binary 768 nc = scan(); 769 while ((nc == '0') || (nc == '1')) { 770 val = (val << 1) | (nc - '0'); 771 nc = scan(); 772 n++; 773 if (n == 34) { 774 error("binary constant too large"); 775 } 776 } 777 } else if ((cc == '0') && (nc == 'x')) { // hex 778 nc = scan(); 779 while (true) { 780 int tmp = unhex(nc); 781 if (tmp == -1) { 782 break; 783 } 784 val = (val << 4) | tmp; 785 nc = scan(); 786 n++; 787 if (n == 10) { 788 error("hex constant too large"); 789 } 790 } 791 } else { // decimal 792 while (lextab[nc] == tNUM) { 793 u32 tmp = (val * 10) + (nc - '0'); 794 if (tmp <= val) { 795 error("decimal constant too large"); 796 } 797 val = tmp; 798 nc = scan(); 799 n++; 800 } 801 } 802 ctx.num = val; 803 return tNUM; 804 } 805 806 token_t scan_ident(u32 cc, u32 nc) { 807 ctx.tmp[0] = cc; 808 u32 n = 1; 809 810 while (true) { 811 u32 tok = lextab[nc]; 812 if ((tok == tIDN) || (tok == tNUM)) { 813 ctx.tmp[n] = nc; 814 n++; 815 if (n == 32) { error("identifier too large"); } 816 nc = scan(); 817 } else { 818 break; 819 } 820 } 821 return scan_keyword(n); 822 } 823 824 token_t _next() { 825 u8 nc = ctx.cc; 826 while (true) { 827 u8 cc = nc; 828 nc = scan(); 829 u32 tok = lextab[cc]; 830 if (tok == tNUM) { // 0..9 831 return scan_number(cc, nc); 832 } else if (tok == tIDN) { // _ A..Z a..z 833 return scan_ident(cc, nc); 834 } else if (tok == tDQT) { // " 835 return scan_string(cc, nc); 836 } else if (tok == tSQT) { // ' 837 ctx.num = nc; 838 if (nc == '\\') { 839 ctx.num = unescape(scan()); 840 } 841 nc = scan(); 842 if (nc != '\'') { 843 error("unterminated character constant"); 844 } 845 nc = scan(); 846 return tNUM; 847 } else if (tok == tPLUS) { 848 if (nc == '+') { tok = tINC; nc = scan(); } 849 } else if (tok == tMINUS) { 850 if (nc == '-') { tok = tDEC; nc = scan(); } 851 } else if (tok == tAMP) { 852 if (nc == '&') { tok = tAND; nc = scan(); } 853 else if (nc == '~') { tok = tANDNOT; nc = scan(); } 854 } else if (tok == tPIPE) { 855 if (nc == '|') { tok = tOR; nc = scan(); } 856 } else if (tok == tGT) { 857 if (nc == '=') { tok = tGE; nc = scan(); } 858 else if (nc == '>') { tok = tRIGHT; nc = scan(); } 859 } else if (tok == tLT) { 860 if (nc == '=') { tok = tLE; nc = scan(); } 861 else if (nc == '<') { tok = tLEFT; nc = scan(); } 862 } else if (tok == tASSIGN) { 863 if (nc == '=') { tok = tEQ; nc = scan(); } 864 } else if (tok == tBANG) { 865 if (nc == '=') { tok = tNE; nc = scan(); } 866 } else if (tok == tSLASH) { 867 if (nc == '/') { 868 // comment -- consume until EOL or EOF 869 while ((nc != '\n') && (nc != 0)) { 870 nc = scan(); 871 } 872 continue; 873 } 874 } else if (tok == tEOL) { 875 ctx.linenumber++; 876 ctx.lineoffset = ctx.byteoffset; 877 ctx.xref[ctx.pc / 4] = ctx.linenumber; 878 if (ctx.flags & cfVisibleEOL) { 879 return tEOL; 880 } 881 continue; 882 } else if (tok == tSPC) { 883 continue; 884 } else if ((tok == tMSC) || (tok == tINV)) { 885 error("unknown character 0x%02x", cc); 886 } 887 888 // if we're an AddOp or MulOp, followed by an '=' 889 if (((tok & 0xF0) == 0x20) && (nc == '=')) { 890 nc = scan(); 891 // transform us to a XEQ operation 892 tok = tok + 0x10; 893 } 894 895 return tok; 896 } 897 } 898 899 token_t next() { 900 return (ctx.tok = _next()); 901 } 902 903 void printstr() { 904 u32 n = 0; 905 printf("\""); 906 while (n < 256) { 907 u32 ch = ctx.tmp[n]; 908 if (ch == 0) { 909 break; 910 } else if ((ch < ' ') || (ch > '~')) { 911 printf("\\x%02x", ch); 912 } else if ((ch == '"') || (ch == '\\')) { 913 printf("\\%c", ch); 914 } else { 915 printf("%c", ch); 916 } 917 n++; 918 } 919 printf("\""); 920 } 921 922 void print() { 923 if (ctx.tok == tNUM) { 924 printf("#%u ", ctx.num); 925 } else if (ctx.tok == tIDN) { 926 printf("@%s ", ctx.tmp); 927 } else if (ctx.tok == tEOL) { 928 printf("\n"); 929 } else if (ctx.tok == tSTR) { 930 printstr(); 931 } else { 932 printf("%s ", tnames[ctx.tok]); 933 } 934 } 935 936 void expected(const char* what) { 937 error("expected %s, found %s", what, tnames[ctx.tok]); 938 } 939 940 void expect(token_t tok) { 941 if (ctx.tok != tok) { 942 error("expected %s, found %s", tnames[tok], tnames[ctx.tok]); 943 } 944 } 945 946 void require(token_t tok) { 947 expect(tok); 948 next(); 949 } 950 951 // Look for an identifier in the scope stack 952 Object find(String str) { 953 Scope scope = ctx.scope; 954 while (scope != nil) { 955 Object obj = scope->first; 956 while (obj != nil) { 957 if (obj->name == str) { 958 return obj; 959 } 960 obj = obj->next; 961 } 962 scope = scope->next; 963 } 964 return nil; 965 } 966 967 void make_global(Object obj) { 968 obj->next = ctx.global.first; 969 ctx.global.first = obj; 970 } 971 972 // push a scope on the scope stack 973 // if obj is non-nil, it is the first of a list of items in that scope 974 Scope push_scope(u32 kind, Object obj) { 975 Scope scope = malloc(sizeof(ScopeRec)); 976 scope->kind = kind; 977 scope->next = ctx.scope; 978 scope->first = obj; 979 scope->level = ctx.scope->level + 1; 980 scope->fixups = nil; 981 982 // save current stack offset (next local var) 983 scope->save_stack = ctx.alloc_stack; 984 985 ctx.scope = scope; 986 return scope; 987 // XXX lazy scopes 988 } 989 990 void pop_scope() { 991 if (ctx.scope->level == 0) { 992 error("cannot pop the global scope"); 993 } 994 995 // restore prev stack offset (next local var) 996 ctx.alloc_stack = ctx.scope->save_stack; 997 998 fixup_branches_fwd(ctx.scope->fixups); 999 // XXX delete? 1000 ctx.scope = ctx.scope->next; 1001 } 1002 1003 // find the first surrounding scope of a specified kind 1004 Scope find_scope(u32 scope_kind) { 1005 Scope scope = ctx.scope; 1006 while (scope != nil) { 1007 if (scope->kind == scope_kind) { 1008 return scope; 1009 } 1010 scope = scope->next; 1011 } 1012 return nil; 1013 } 1014 1015 // add a fixup for the last-emitted instruction 1016 // to the scope (used for return and break) 1017 void add_scope_fixup(Scope scope) { 1018 Fixup fixup = malloc(sizeof(FixupRec)); 1019 fixup->next = scope->fixups; 1020 fixup->pc = ctx.pc - 4; 1021 scope->fixups = fixup; 1022 } 1023 1024 // add a fixup for the last-emitted instruction 1025 // to the object (used for forward func refs) 1026 void add_object_fixup(Object obj) { 1027 Fixup fixup = malloc(sizeof(FixupRec)); 1028 fixup->next = obj->fixups; 1029 fixup->pc = ctx.pc - 4; 1030 obj->fixups = fixup; 1031 } 1032 1033 u32 invert_relop(u32 op) { 1034 if (op > 5) { abort(); } 1035 return invert_relop_tab[op]; 1036 } 1037 // ================================================================ 1038 1039 void parse_expr(Item x); 1040 1041 String parse_name(const char* what) { 1042 if (ctx.tok != tIDN) { 1043 error("expected %s, found %s %u", what, tnames[ctx.tok], ctx.tok); 1044 } 1045 String str = ctx.ident; 1046 next(); 1047 return str; 1048 } 1049 1050 void parse_operand(Item x) { 1051 if (ctx.tok == tNUM) { 1052 set_item(x, iConst, ctx.type_int32, 0, ctx.num, 0); 1053 } else if (ctx.tok == tSTR) { 1054 error("<TODO> string const"); 1055 } else if (ctx.tok == tTRUE) { 1056 set_item(x, iConst, ctx.type_bool, 0, 1, 0); 1057 } else if (ctx.tok == tFALSE) { 1058 set_item(x, iConst, ctx.type_bool, 0, 0, 0); 1059 } else if (ctx.tok == tNIL) { 1060 set_item(x, iConst, ctx.type_nil, 0, 0, 0); 1061 } else if (ctx.tok == tOPAREN) { 1062 next(); 1063 parse_expr(x); 1064 require(tCPAREN); 1065 return; 1066 } else if (ctx.tok == tIDN) { 1067 String str = ctx.ident; 1068 Object obj = find(str); 1069 if (obj == nil) { 1070 error("unknown identifier '%s'", str->text); 1071 } 1072 gen_item_from_obj(x, obj); 1073 } else { 1074 error("invalid expression"); 1075 } 1076 next(); 1077 } 1078 1079 void dereference(Item x, String name) { 1080 if (x->kind != iRegInd) { 1081 error("internal: cannot deref via item kind %u", x->kind); 1082 } 1083 1084 if (x->type->kind == tRecord) { 1085 Object field = x->type->first; 1086 while (field != nil) { 1087 if (field->name == name) { 1088 x->type = field->type; 1089 x->a = x->a + field->value; 1090 return; 1091 } 1092 field = field->next; 1093 } 1094 error("field '%s' does not exist", name->text); 1095 } else { 1096 error("internal: cannot deref non-structs"); 1097 } 1098 } 1099 1100 void parse_primary_expr(Item x) { 1101 parse_operand(x); 1102 while (true) { 1103 if (ctx.tok == tOPAREN) { 1104 next(); 1105 if (x->kind != iFunc) { 1106 error("cannot call non-function"); 1107 } 1108 // TODO ptr-to-func 1109 u32 n = 0; 1110 Object param = x->type->first; 1111 while (param != nil) { 1112 if (ctx.tok == tCPAREN) { 1113 error("too few parameters for %s()", x->type->obj->name->text); 1114 } 1115 if (n != 0) { 1116 require(tCOMMA); 1117 } 1118 ItemRec y; 1119 parse_expr(&y); 1120 if (!compatible_type(param->type, y.type, &y)) { 1121 error("incompatible type for parameter '%s'\n", param->name->text); 1122 } 1123 if (param->type->kind == tArray) { 1124 gen_ref_param(n, &y); 1125 } else { 1126 gen_param(n, &y); 1127 } 1128 param = param->next; 1129 n++; 1130 } 1131 require(tCPAREN); 1132 gen_call(x); 1133 } else if (ctx.tok == tDOT) { 1134 next(); 1135 String name = parse_name("field name"); 1136 if (x->type->kind == tRecord) { 1137 dereference(x, name); 1138 } else if ((x->type->kind == tPointer) && 1139 (x->type->base->kind == tRecord)) { 1140 gen_deref_ptr(x); 1141 dereference(x, name); 1142 } else { 1143 error("can only use '.' with struct or pointer-to-struct"); 1144 } 1145 } else if (ctx.tok == tOBRACK) { 1146 next(); 1147 ItemRec y; 1148 parse_expr(&y); 1149 require(tCBRACK); 1150 if (x->type->kind != tArray) { 1151 error("can only use [] with array"); 1152 } 1153 if (x->kind != iRegInd) { 1154 error("internal: cannot index via item kind %u", x->kind); 1155 } 1156 if (y.kind == iConst) { 1157 if (y.a >= x->type->len) { 1158 error("array index out of range"); 1159 } 1160 x->a = x->a + y.a * x->type->base->size; 1161 x->type = x->type->base; 1162 } else { 1163 gen_index(x, &y); 1164 } 1165 } else { 1166 break; 1167 } 1168 } 1169 } 1170 1171 void parse_unary_expr(Item x) { 1172 if (ctx.tok == tPLUS) { 1173 next(); 1174 parse_unary_expr(x); 1175 } else if ((ctx.tok == tMINUS) || (ctx.tok == tBANG) || (ctx.tok == tNOT)) { 1176 u32 op = ctx.tok; 1177 next(); 1178 parse_unary_expr(x); 1179 if ((x->kind == iComp) && (op == tBANG)) { 1180 x->r = invert_relop(x->r); 1181 } else { 1182 gen_unary_op(op, x); 1183 } 1184 } else if (ctx.tok == tAMP) { 1185 next(); 1186 parse_unary_expr(x); 1187 gen_get_ptr(x); 1188 } else { 1189 parse_primary_expr(x); 1190 } 1191 } 1192 1193 void parse_mul_expr(Item x) { 1194 parse_unary_expr(x); 1195 while ((ctx.tok & tcMASK) == tcMULOP) { 1196 u32 mulop = ctx.tok - tSTAR; 1197 next(); 1198 ItemRec y; 1199 parse_unary_expr(&y); 1200 gen_mul_op(mulop, x, &y); 1201 } 1202 } 1203 1204 void parse_add_expr(Item x) { 1205 parse_mul_expr(x); 1206 while ((ctx.tok & tcMASK) == tcADDOP) { 1207 u32 addop = ctx.tok - tPLUS; 1208 next(); 1209 ItemRec y; 1210 parse_mul_expr(&y); 1211 gen_add_op(addop, x, &y); 1212 } 1213 } 1214 1215 void parse_rel_expr(Item x) { 1216 parse_add_expr(x); 1217 if ((ctx.tok & tcMASK) == tcRELOP) { 1218 u32 relop = ctx.tok - tEQ; 1219 next(); 1220 ItemRec y; 1221 parse_add_expr(&y); 1222 gen_rel_op(relop, x, &y); 1223 } 1224 } 1225 1226 void parse_and_expr(Item x) { 1227 parse_rel_expr(x); 1228 if (ctx.tok == tAND) { 1229 Scope outer = push_scope(sBlock, nil); 1230 1231 // if !x goto nope 1232 gen_branch_cond(x, false); 1233 add_scope_fixup(outer); 1234 1235 while (ctx.tok == tAND) { 1236 next(); 1237 ItemRec y; 1238 1239 // if !y goto nope 1240 parse_rel_expr(&y); 1241 gen_branch_cond(&y, false); 1242 add_scope_fixup(outer); 1243 } 1244 // res = true, goto done 1245 gen_load_bool1(x, true); 1246 u32 l0_true = gen_branch_fwd(); 1247 1248 // nope: res = false 1249 pop_scope(); 1250 gen_load_bool2(x); 1251 1252 // done: 1253 fixup_branch_fwd(l0_true); 1254 } 1255 } 1256 1257 void parse_expr(Item x) { 1258 parse_and_expr(x); 1259 if (ctx.tok == tOR) { 1260 Scope outer = push_scope(sBlock, nil); 1261 1262 // if x goto yup 1263 gen_branch_cond(x, true); 1264 add_scope_fixup(outer); 1265 1266 while (ctx.tok == tOR) { 1267 next(); 1268 ItemRec y; 1269 1270 // if y goto yup 1271 parse_and_expr(&y); 1272 gen_branch_cond(&y, true); 1273 add_scope_fixup(outer); 1274 } 1275 // res = false, goto done 1276 gen_load_bool1(x, false); 1277 u32 l0_false = gen_branch_fwd(); 1278 1279 // yup: res = true 1280 pop_scope(); 1281 gen_load_bool2(x); 1282 1283 // done: 1284 fixup_branch_fwd(l0_false); 1285 } 1286 } 1287 1288 Type find_type(String name) { 1289 Object obj = ctx.typetab; 1290 while (obj != nil) { 1291 if (obj->name == name) { 1292 return obj->type; 1293 } 1294 obj = obj->next; 1295 } 1296 return nil; 1297 } 1298 1299 // fwd_ref_ok indicates that an undefined typename 1300 // may be treated as a forward reference. This is 1301 // only used for pointers (size their size does not 1302 // depend on their target). 1303 Type parse_type(bool fwd_ref_ok); 1304 1305 Type parse_struct_type() { 1306 Type rectype = make_type(tRecord, nil, nil, nil, 0, 0); 1307 Object last = nil; 1308 require(tOBRACE); 1309 while (true) { 1310 if (ctx.tok == tCBRACE) { 1311 next(); 1312 break; 1313 } 1314 String name = parse_name("field name"); 1315 Type type = parse_type(false); 1316 Object field = make_object(oField, name, type, nil, 0, rectype->size); 1317 1318 // TODO sub-word packing 1319 rectype->size += (type->size + 3) & (~3); 1320 rectype->len++; 1321 1322 // add field to record 1323 if (last == nil) { 1324 rectype->first = field; 1325 } else { 1326 last->next = field; 1327 } 1328 last = field; 1329 1330 if (ctx.tok != tCBRACE) { 1331 require(tCOMMA); 1332 } 1333 } 1334 return rectype; 1335 } 1336 1337 Type parse_array_type() { 1338 if (ctx.tok == tCBRACK) { 1339 next(); 1340 return make_type(tSlice, parse_type(false), nil, nil, 0, 8); 1341 } else { 1342 ItemRec x; 1343 parse_expr(&x); 1344 require(tCBRACK); 1345 if ((x.kind != iConst) || (x.type != ctx.type_int32)) { 1346 error("array size must be integer constant"); 1347 } 1348 //XXX check for >0 1349 Type base = parse_type(false); 1350 u32 sz = x.a * base->size; 1351 if (sz < x.a) { 1352 error("array size overflow"); 1353 } 1354 return make_type(tArray, base, nil, nil, x.a, sz); 1355 } 1356 } 1357 1358 Type parse_func_type() { 1359 error("<TODO> func type"); 1360 return nil; 1361 } 1362 1363 Type parse_type(bool fwd_ref_ok) { 1364 if (ctx.tok == tSTAR) { // pointer-to 1365 next(); 1366 return make_type(tPointer, parse_type(true), nil, nil, 0, 4); 1367 } else if (ctx.tok == tOBRACK) { // array-of 1368 next(); 1369 return parse_array_type(); 1370 } else if (ctx.tok == tFUNC) { 1371 next(); 1372 return parse_func_type(); 1373 } else if (ctx.tok == tSTRUCT) { 1374 next(); 1375 return parse_struct_type(); 1376 } else if (ctx.tok == tIDN) { 1377 String name = ctx.ident; 1378 next(); 1379 Type type = find_type(name); 1380 if (type == nil) { 1381 if (fwd_ref_ok) { 1382 type = make_type(tUndefined, nil, nil, nil, 0, 0); 1383 add_type(type, name); 1384 } else { 1385 error("undefined type '%s' not usable here", name->text); 1386 } 1387 } 1388 return type; 1389 } else { 1390 expected("type"); 1391 return nil; 1392 } 1393 } 1394 1395 void parse_block(); 1396 1397 void parse_while() { 1398 ItemRec x; 1399 u32 save = ctx.pc_continue; 1400 ctx.pc_continue = ctx.pc; // for backward branch 1401 1402 parse_expr(&x); 1403 u32 l1_br_false = gen_branch_cond(&x, false); 1404 1405 require(tOBRACE); 1406 push_scope(sLoop, nil); 1407 parse_block(); 1408 gen_branch_back(ctx.pc_continue); 1409 pop_scope(); 1410 1411 fixup_branch_fwd(l1_br_false); 1412 1413 ctx.pc_continue = save; 1414 } 1415 1416 void parse_if() { 1417 Scope outer = push_scope(sBlock, nil); 1418 1419 ItemRec x; 1420 parse_expr(&x); 1421 // branch over "if" code 1422 u32 l0_br_false = gen_branch_cond(&x, false); 1423 1424 // generate "if" code 1425 require(tOBRACE); 1426 push_scope(sBlock, nil); 1427 parse_block(); 1428 pop_scope(); 1429 1430 // branch past "else" code 1431 gen_branch_fwd(); 1432 add_scope_fixup(outer); 1433 1434 while (ctx.tok == tELSE) { 1435 next(); 1436 fixup_branch_fwd(l0_br_false); 1437 if (ctx.tok == tIF) { 1438 next(); 1439 parse_expr(&x); 1440 // branch over "if" code 1441 l0_br_false = gen_branch_cond(&x, false); 1442 1443 // generate "if else" code 1444 require(tOBRACE); 1445 push_scope(sBlock, nil); 1446 parse_block(); 1447 pop_scope(); 1448 1449 // branch past "else" code 1450 gen_branch_fwd(); 1451 add_scope_fixup(outer); 1452 } else { 1453 // generate "else" code 1454 require(tOBRACE); 1455 push_scope(sBlock, nil); 1456 parse_block(); 1457 pop_scope(); 1458 1459 // close outer scope 1460 pop_scope(); 1461 return; // no further fixups needed 1462 } 1463 } 1464 fixup_branch_fwd(l0_br_false); 1465 1466 // close outer scope 1467 pop_scope(); 1468 } 1469 1470 void parse_return() { 1471 ItemRec x; 1472 if (ctx.tok == tSEMI) { 1473 if (ctx.fn->type->base != ctx.type_void) { 1474 error("function requires return type"); 1475 } 1476 next(); 1477 x.type = ctx.type_void; 1478 } else { 1479 parse_expr(&x); 1480 if (!compatible_type(ctx.fn->type->base, x.type, &x)) { 1481 error("return types do not match"); 1482 } 1483 require(tSEMI); 1484 } 1485 gen_return(&x); 1486 } 1487 1488 void parse_break() { 1489 // XXX break-to-labeled-loop support 1490 gen_branch_fwd(); 1491 Scope scope = find_scope(sLoop); 1492 if (scope == nil) { 1493 error("break must be used from inside a looping construct"); 1494 } 1495 add_scope_fixup(scope); 1496 require(tSEMI); 1497 } 1498 1499 void parse_continue() { 1500 // XXX continue-to-labeled-loop support 1501 if (ctx.pc_continue == 0) { 1502 error("continue must be used from inside a looping construct"); 1503 } 1504 gen_branch_back(ctx.pc_continue); 1505 require(tSEMI); 1506 } 1507 1508 void STORE(u32 val, u32* ptr, u32 n, u32 sz) { 1509 if (sz == 4) { 1510 ptr[n >> 2] = val; 1511 } else if (sz == 1) { 1512 ((u8*)ptr)[n] = val; 1513 } 1514 } 1515 1516 u32 parse_init_constexpr(Type type) { 1517 if (type->size > 4) { 1518 error("<TODO> larger init constexpr types"); 1519 } 1520 ItemRec x; 1521 parse_expr(&x); 1522 if (x.kind != iConst) { 1523 error("non-constant initializer"); 1524 } 1525 return x.a; 1526 } 1527 1528 u32 parse_array_init(Object var, u32ptr data, u32 dmax, u32 sz) { 1529 memset(data, 0, dmax); 1530 u32 n = 0; 1531 while (true) { 1532 if (ctx.tok == tCBRACE) { 1533 next(); 1534 break; 1535 } 1536 if (n >= dmax) { 1537 error("initializer too large"); 1538 } 1539 u32 v = parse_init_constexpr(var->type->base); 1540 STORE(v, data, n, sz); 1541 n += sz; 1542 if (ctx.tok != tCBRACE) { 1543 require(tCOMMA); 1544 } 1545 } 1546 return n; 1547 } 1548 1549 void parse_struct_init(Object var, u32ptr data) { 1550 memset(data, 0, var->type->size); 1551 while (true) { 1552 if (ctx.tok == tCBRACE) { 1553 next(); 1554 break; 1555 } 1556 String name = parse_name("field name"); 1557 Object field = var->type->first; 1558 while (true) { 1559 if (field == nil) { 1560 error("structure has no '%s' field", name->text); 1561 } 1562 if (field->name == name) { 1563 break; 1564 } 1565 field = field->next; 1566 } 1567 require(tCOLON); 1568 u32 v = parse_init_constexpr(field->type); 1569 STORE(v, data, field->value, 4); 1570 if (ctx.tok != tCBRACE) { 1571 require(tCOMMA); 1572 } 1573 } 1574 } 1575 1576 void parse_local_var() { 1577 String name = parse_name("variable name"); 1578 // TODO: allow type inference 1579 Type type = parse_type(false); 1580 1581 Object lvar = make_var(oVar, name, type, 0, ctx.alloc_stack); 1582 lvar->next = ctx.scope->first; 1583 ctx.scope->first = lvar; 1584 1585 ctx.alloc_stack = ctx.alloc_stack + type->size; 1586 if (ctx.local_stack < ctx.alloc_stack) { 1587 ctx.local_stack = ctx.alloc_stack; 1588 } 1589 1590 if (ctx.tok == tASSIGN) { 1591 next(); 1592 ItemRec x, y; 1593 parse_expr(&x); 1594 gen_item_from_obj(&y, lvar); 1595 gen_store(&x, &y); 1596 } 1597 require(tSEMI); 1598 } 1599 1600 void parse_global_var() { 1601 String name = parse_name("variable name"); 1602 // TODO: allow type inference 1603 Type type = parse_type(false); 1604 1605 Object gvar = make_var(oGlobal, name, type, 0, ctx.alloc_global); 1606 gvar->next = ctx.scope->first; 1607 ctx.scope->first = gvar; 1608 ctx.alloc_global = ctx.alloc_global + type->size; 1609 ctx.alloc_global = (ctx.alloc_global + 3) & (~3); // round to word 1610 1611 if (ctx.tok == tASSIGN) { 1612 next(); 1613 if (ctx.tok == tOBRACE) { 1614 next(); 1615 u32* data = ctx.data + (gvar->value >> 2); 1616 if (type->kind == tArray) { 1617 parse_array_init(gvar, data, type->size, type->base->size); 1618 } else if (type->kind == tRecord) { 1619 parse_struct_init(gvar, data); 1620 } else { 1621 error("cannot initialize this way"); 1622 } 1623 } else { 1624 ItemRec x; 1625 parse_expr(&x); 1626 if (x.kind != iConst) { 1627 error("non-constant global initializer"); 1628 } 1629 //TODO: check type/size compat 1630 ctx.data[gvar->value >> 2] = x.a; 1631 } 1632 } 1633 require(tSEMI); 1634 } 1635 1636 void parse_expr_statement() { 1637 ItemRec x; 1638 parse_expr(&x); 1639 if (ctx.tok == tASSIGN) { 1640 next(); 1641 ItemRec y; 1642 parse_expr(&y); 1643 if (!compatible_type(x.type, y.type, &x)) { 1644 error("incompatible type in assignment"); 1645 } 1646 gen_store(&y, &x); 1647 } else if ((ctx.tok & tcMASK) == tcAEQOP) { 1648 u32 op = ctx.tok - tADDEQ; 1649 next(); 1650 ItemRec y, z; 1651 parse_expr(&y); 1652 copy_item(&x, &z); 1653 gen_add_op(op, &x, &y); 1654 gen_store(&x, &z); 1655 } else if ((ctx.tok & tcMASK) == tcMEQOP) { 1656 u32 op = ctx.tok - tMULEQ; 1657 next(); 1658 ItemRec y, z; 1659 parse_expr(&y); 1660 copy_item(&x, &z); 1661 gen_add_op(op, &x, &y); 1662 gen_store(&x, &z); 1663 } else if ((ctx.tok == tINC) || (ctx.tok == tDEC)) { 1664 ItemRec y, z; 1665 set_item(&y, iConst, ctx.type_int32, 0, 1, 0); 1666 copy_item(&x, &z); 1667 if (ctx.tok == tINC) { 1668 gen_add_op(aADD, &x, &y); 1669 } else { 1670 gen_add_op(aSUB, &x, &y); 1671 } 1672 gen_store(&x, &z); 1673 next(); 1674 } else { 1675 gen_discard(&x); 1676 } 1677 require(tSEMI); 1678 } 1679 1680 void parse_block() { 1681 while (true) { 1682 if (ctx.tok == tCBRACE) { 1683 next(); 1684 break; 1685 } else if (ctx.tok == tRETURN) { 1686 next(); 1687 parse_return(); 1688 } else if (ctx.tok == tBREAK) { 1689 next(); 1690 parse_break(); 1691 } else if (ctx.tok == tCONTINUE) { 1692 next(); 1693 parse_continue(); 1694 } else if (ctx.tok == tWHILE) { 1695 next(); 1696 parse_while(); 1697 } else if (ctx.tok == tIF) { 1698 next(); 1699 parse_if(); 1700 } else if (ctx.tok == tVAR) { 1701 next(); 1702 parse_local_var(); 1703 } else if (ctx.tok == tSEMI) { 1704 next(); 1705 // empty statement 1706 } else { 1707 parse_expr_statement(); 1708 } 1709 } 1710 } 1711 1712 void parse_function_body(Object fn) { 1713 ctx.fn = fn; 1714 gen_prologue(fn); 1715 push_scope(sFunc, fn->first); // scope for parameters 1716 push_scope(sBlock, nil); // top scope for function body 1717 parse_block(); 1718 pop_scope(); 1719 pop_scope(); 1720 gen_epilogue(fn); 1721 } 1722 1723 Object parse_param(String fname, u32 n, Object first, Object last) { 1724 if (n == FNMAXARGS) { 1725 error("too many parameters (%d) for '%s'", FNMAXARGS, fname->text); 1726 } 1727 String pname = parse_name("parameter name"); 1728 Type ptype = parse_type(false); 1729 Object param = make_var(oParam, pname, ptype, 0, 4 + n * 4); 1730 1731 Object obj = first; 1732 while (obj != nil) { 1733 if (obj->name == param->name) { 1734 error("duplicate parameter name '%s'", fname->text); 1735 } 1736 obj = obj->next; 1737 } 1738 1739 if (last != nil) { 1740 last->next = param; 1741 } 1742 return param; 1743 } 1744 1745 void make_builtin(const char* name, u32 id, Type p0, Type p1, Type rtn) { 1746 String fname = make_string(name, strlen(name)); 1747 Type type = make_type(tFunc, rtn, nil, nil, 0, 0); 1748 type->obj = make_object(oFunc, fname, type, nil, ofBuiltin, id); 1749 1750 if (p0 != nil) { 1751 Object param = make_var(oParam, make_string("a", 1), p0, 0, 0); 1752 type->obj->first = param; 1753 type->first = param; 1754 type->len = 1; 1755 if (p1 != nil) { 1756 param->next = make_var(oParam, make_string("b", 1), p1, 0, 1); 1757 type->len = 2; 1758 } 1759 } 1760 make_global(type->obj); 1761 } 1762 1763 void parse_function() { 1764 Object first = nil; 1765 Object last = nil; 1766 u32 n = 0; 1767 String fname = parse_name("function name"); 1768 Type rettype = ctx.type_void; 1769 1770 require(tOPAREN); 1771 1772 // process parameters 1773 if (ctx.tok != tCPAREN) { 1774 first = parse_param(fname, n, nil, nil); 1775 last = first; 1776 n++; 1777 while (ctx.tok == tCOMMA) { 1778 next(); 1779 last = parse_param(fname, n, first, last); 1780 n++; 1781 } 1782 } 1783 require(tCPAREN); 1784 1785 if ((ctx.tok != tSEMI) && (ctx.tok != tOBRACE)) { 1786 rettype = parse_type(false); 1787 } 1788 1789 int isdef = 0; 1790 if (ctx.tok == tSEMI) { 1791 // declaration 1792 next(); 1793 } else if (ctx.tok == tOBRACE) { 1794 // definition 1795 next(); 1796 isdef = 1; 1797 } else { 1798 expected("semi or open brace"); 1799 } 1800 1801 // Look for an existing declaration or definintion of this function 1802 Object obj = find(fname); 1803 if (obj != nil) { 1804 // such a named identifier exists 1805 // check to see if we are in agreement with it 1806 if (obj->kind != oFunc) { 1807 error("redefining '%s' as function", fname->text); 1808 } 1809 if (isdef && (obj->flags & ofDefined)) { 1810 error("redefined function '%s'", fname->text); 1811 } 1812 if (rettype != obj->type->base) { 1813 error("func '%s' return type differs from decl", fname->text); 1814 } 1815 if (obj->type->len != n) { 1816 error("func '%s' parameter count differs from decl", fname->text); 1817 } 1818 Object pa = first; 1819 Object pb = obj->type->first; 1820 u32 i = 1; 1821 while ((pa != nil) && (pb != nil)) { 1822 if (!same_type(pa->type, pb->type)) { 1823 error("func '%s' param %u differs from decl", fname->text, i); 1824 } 1825 pa = pa->next; 1826 pb = pb->next; 1827 } 1828 } else { 1829 // if there was no existing record of this function, create one now 1830 Type type = make_type(tFunc, rettype, nil, first, n, 0); 1831 obj = make_object(oFunc, fname, type, first, 0, 0); 1832 type->obj = obj; 1833 make_global(obj); 1834 } 1835 1836 // handle definition if it is one 1837 if (isdef) { 1838 // patch any forward references 1839 fixup_branches_fwd(obj->fixups); 1840 1841 // mark as defined and save entry address 1842 obj->flags |= ofDefined; 1843 obj->value = ctx.pc; 1844 parse_function_body(obj); 1845 } 1846 } 1847 1848 void parse_type_def() { 1849 String name = parse_name("type name"); 1850 Type type = parse_type(false); 1851 Type prev = find_type(name); 1852 if (prev == nil) { 1853 add_type(type, name); 1854 } else { 1855 if (prev->kind != tUndefined) { 1856 error("cannot redefine type '%s'\n", name->text); 1857 } 1858 prev->kind = type->kind; 1859 prev->base = type->base; 1860 prev->first = type->first; 1861 prev->len = type->len; 1862 prev->size = type->size; 1863 prev->obj->type = type; 1864 // XXX discard type 1865 } 1866 require(tSEMI); 1867 } 1868 1869 void parse_enum_def() { 1870 require(tOBRACE); 1871 u32 val = 0; 1872 while (ctx.tok != tCBRACE) { 1873 String name = parse_name("enum tag name"); 1874 Object obj = find(name); 1875 if (obj != nil) { 1876 error("cannot redefine %s as enum tag\n", name->text); 1877 } 1878 if (ctx.tok == tASSIGN) { 1879 next(); 1880 val = parse_init_constexpr(ctx.type_int32); 1881 } 1882 require(tCOMMA); 1883 obj = make_var(oConst, name, ctx.type_int32, 0, val); 1884 obj->next = ctx.scope->first; 1885 ctx.scope->first = obj; 1886 val++; 1887 } 1888 require(tCBRACE); 1889 require(tSEMI); 1890 } 1891 1892 void parse_program() { 1893 next(); 1894 for (;;) { 1895 if (ctx.tok == tFUNC) { 1896 next(); 1897 parse_function(); 1898 } else if (ctx.tok == tTYPE) { 1899 next(); 1900 parse_type_def(); 1901 } else if (ctx.tok == tVAR) { 1902 next(); 1903 parse_global_var(); 1904 } else if (ctx.tok == tENUM) { 1905 next(); 1906 parse_enum_def(); 1907 } else if (ctx.tok == tEOF) { 1908 return; 1909 } else { 1910 expected("function, variable, or type definition"); 1911 } 1912 } 1913 } 1914 1915 // ================================================================ 1916 1917 enum { 1918 tmp_reg_count = 4, 1919 tmp_reg_first = 8, 1920 tmp_reg_last = 11, 1921 }; 1922 1923 bool is_tmp_reg(u32 n) { 1924 return (n >= tmp_reg_first) && (n <= tmp_reg_last); 1925 } 1926 1927 u32 get_reg_tmp() { 1928 u32 n = tmp_reg_first; 1929 while (n <= tmp_reg_last) { 1930 if (!(ctx.regbits & (1 << n))) { 1931 ctx.regbits |= (1 << n); 1932 return n; 1933 } 1934 n++; 1935 } 1936 error("cannot allocate register"); 1937 return 0; 1938 } 1939 1940 void put_reg(u32 r) { 1941 if ((r < tmp_reg_first) || (r > tmp_reg_last)) { 1942 // currently we don't strictly track r0..r7 1943 // they are used for function calls and returns 1944 return; 1945 } 1946 if (!(ctx.regbits & (1 << r))) { 1947 error("freeing non-allocated register %u\n", r); 1948 } 1949 ctx.regbits = ctx.regbits & (~(1 << r)); 1950 } 1951 1952 void emit(u32 ins) { 1953 ctx.code[ctx.pc / 4] = ins; 1954 gen_trace_code("", ctx.pc); 1955 ctx.pc = ctx.pc + 4; 1956 } 1957 1958 enum { 1959 R0 = 0, R1 = 1, R2 = 2, R3 = 3, R4 = 4, R5 = 5, R6 = 6, R7 = 7, 1960 R8 = 9, R9 = 9, R10 = 10, R11 = 11, MT = 12, SB = 13, SP = 14, LR = 15, 1961 TMP = 16 1962 }; 1963 enum { 1964 MOV = 0x0000, LSL = 0x0001, ASR = 0x0002, ROR = 0x0003, 1965 AND = 0x0004, ANN = 0x0005, IOR = 0x0006, XOR = 0x0007, 1966 ADD = 0x0008, SUB = 0x0009, MUL = 0x000A, DIV = 0x000B, 1967 FAD = 0x000C, FSB = 0x000D, FML = 0x000E, FDV = 0x000F, 1968 ADC = 0x2008, SBC = 0x2009, UML = 0x200A, 1969 MHI = 0x2000, 1970 MOV_H = 0x2000, 1971 MOV_CC = 0x3000, 1972 MOD = 0x001B, // fake op for plumbing (DIV+MOV_H) 1973 }; 1974 1975 void emit_op(u32 op, u32 a, u32 b, u32 c) { 1976 emit((op << 16) | (a << 24) | (b << 20) | c); 1977 } 1978 void emit_opi(u32 op, u32 a, u32 b, u32 n) { 1979 emit(((0x4000 | op) << 16) | (a << 24) | (b << 20) | (n & 0xffff)); 1980 } 1981 1982 // mov (load immediate) using the appropriate one or two 1983 // instruction form for the immediate argument 1984 void emit_mov(u32 a, u32 n) { 1985 u32 m = n >> 16; 1986 if (m == 0) { 1987 emit_opi(MOV, a, 0, n); 1988 } else if (m == 0xFFFF) { 1989 emit_opi(MOV | 0x1000, a, 0, n); 1990 } else { 1991 emit_opi(MHI, a, 0, m); 1992 if ((n & 0xFFFF) != 0) { 1993 emit_opi(IOR, a, a, n); 1994 } 1995 } 1996 } 1997 1998 // immediate op, using a temporary register and register op if the 1999 // immediate argument does not fit the single instruction form 2000 void emit_opi_n(u32 op, u32 a, u32 b, u32 n) { 2001 u32 m = n >> 16; 2002 if (m == 0) { 2003 emit_opi(op, a, b, n); 2004 } else if (m == 0xFFFF) { 2005 emit_opi(op | 0x1000, a, b, n); 2006 } else { 2007 u32 t0 = get_reg_tmp(); 2008 emit_opi(MHI, t0, 0, m); 2009 if ((n & 0xFFFF) != 0) { 2010 emit_opi(IOR, t0, t0, n); 2011 } 2012 emit_op(op, a, b, t0); 2013 put_reg(t0); 2014 } 2015 } 2016 2017 enum { 2018 LDW = 8, LDB = 9, STW = 10, STB = 11 2019 }; 2020 void emit_mem(u32 op, u32 a, u32 b, u32 off) { 2021 emit((op << 28) | (a << 24) | (b << 20) | (off & 0xfffff)); 2022 } 2023 2024 enum { 2025 MI = 0, EQ = 1, CS = 2, VS = 3, LS = 4, LT = 5, LE = 6, AL = 7, 2026 PL = 8, NE = 9, CC = 10, VC = 11, HI = 12, GE = 13, GT = 14, NV = 15, 2027 L = 0x10, 2028 }; 2029 void emit_br(u32 op, u32 r) { 2030 emit(((0xC0 | op) << 24) | r); 2031 } 2032 void emit_bi(u32 op, u32 off) { 2033 emit(((0xE0 | op) << 24) | (off & 0xffffff)); 2034 } 2035 2036 u8 rel_op_to_cc_tab[6] = { EQ, NE, LT, LE, GT, GE }; 2037 u32 add_op_to_ins_tab[4] = { ADD, SUB, IOR, XOR }; 2038 u32 mul_op_to_ins_tab[7] = { MUL, DIV, MOD, AND, ANN, LSL, ASR }; 2039 2040 // ================================================================ 2041 2042 u32 rel_op_to_cc(u32 op) { 2043 if (op > 5) { abort(); } 2044 return rel_op_to_cc_tab[op]; 2045 } 2046 u32 add_op_to_ins(u32 op) { 2047 if (op > 3) { abort(); } 2048 return add_op_to_ins_tab[op]; 2049 } 2050 u32 mul_op_to_ins(u32 op) { 2051 if (op > 6) { abort(); }; 2052 return mul_op_to_ins_tab[op]; 2053 } 2054 2055 // parser does not know internal details like "which register is SP" 2056 // so the backend needs to initialize variable objects for it 2057 void gen_item_from_obj(Item x, Object obj) { 2058 if (obj->kind == oParam) { 2059 if (obj->type->kind == tArray) { 2060 // arrays are passed by reference through parameters 2061 // maybe this should be better represented? 2062 u32 r = get_reg_tmp(); 2063 emit_mem(LDW, r, SP, obj->value); 2064 set_item(x, iRegInd, obj->type, r, 0, 0); 2065 } else { 2066 set_item(x, iRegInd, obj->type, SP, obj->value, 0); 2067 } 2068 } else if (obj->kind == oVar) { 2069 set_item(x, iRegInd, obj->type, SP, obj->value, 0); 2070 } else if (obj->kind == oGlobal) { 2071 set_item(x, iRegInd, obj->type, SB, obj->value, 0); 2072 } else if (obj->kind == oFunc) { 2073 set_item(x, iFunc, obj->type, 0, obj->value, 0); 2074 } else if (obj->kind == oConst) { 2075 set_item(x, iConst, obj->type, 0, obj->value, 0); 2076 } else { 2077 error("unsupported identifier"); 2078 } 2079 gen_trace("item_from_obj<<<", x, nil); 2080 } 2081 2082 2083 // check to see if the last emitted instruction 2084 // loaded a particular register and if so, patch 2085 // it to load a different register 2086 bool patch_last_load(u32 oldr, u32 newr) { 2087 u32 ins = ctx.code[(ctx.pc - 4) / 4]; 2088 u32 n = ins >> 29; 2089 if ((n == 0b101) || (n == 0b110) || (n == 0b111)) { 2090 // store and branch instructions can be ignored 2091 return false; 2092 } 2093 if (((ins >> 24) & 15) != oldr) { 2094 return false; 2095 } 2096 ctx.code[(ctx.pc - 4) / 4] = (ins & 0xF0FFFFFF) | (newr << 24); 2097 gen_trace_code("patch_last_load()", (ctx.pc - 4)); 2098 return true; 2099 } 2100 2101 // load the value of an item into a specific register 2102 void gen_load_reg(Item x, u32 r) { 2103 gen_trace_n("load_reg", r, x, nil); 2104 if (x->kind == iReg) { 2105 if (x->r != r) { 2106 if (patch_last_load(x->r, r)) { 2107 put_reg(x->r); 2108 } else { 2109 emit_op(MOV, r, 0, x->r); 2110 put_reg(x->r); 2111 } 2112 } 2113 } else if (x->kind == iConst) { 2114 emit_mov(r, x->a); 2115 } else if (x->kind == iRegInd) { 2116 if (x->type->size == 4) { 2117 emit_mem(LDW, r, x->r, x->a); 2118 } else if (x->type->size == 1) { 2119 emit_mem(LDB, r, x->r, x->a); 2120 } else { 2121 error("cannot load a size %u item\n", x->type->size); 2122 } 2123 if (x->r != r) { 2124 put_reg(x->r); 2125 } 2126 } else { 2127 error("gen_load failed (kind %u)", x->kind); 2128 } 2129 x->kind = iReg; 2130 x->r = r; 2131 x->a = 0; 2132 x->b = 0; 2133 gen_trace("load_reg<<<", x, nil); 2134 } 2135 2136 void gen_discard(Item x) { 2137 gen_trace("discard", x, nil); 2138 if (x->kind == iReg) { 2139 put_reg(x->r); 2140 } 2141 } 2142 2143 // convert an item to value-in-register format 2144 // if it's not already in that format 2145 void gen_load(Item x) { 2146 if ((x->kind == iRegInd) && is_tmp_reg(x->r)) { 2147 gen_load_reg(x, x->r); 2148 } else if (x->kind != iReg) { 2149 gen_load_reg(x, get_reg_tmp()); 2150 } 2151 } 2152 2153 void gen_store(Item val, Item adr) { 2154 gen_trace("gen_store", val, adr); 2155 gen_load(val); 2156 if (adr->kind == iRegInd) { 2157 if (adr->type->size == 4) { 2158 emit_mem(STW, val->r, adr->r, adr->a); 2159 } else if (adr->type->size == 1) { 2160 emit_mem(STB, val->r, adr->r, adr->a); 2161 } else { 2162 error("cannot store a size %u item\n", adr->type->size); 2163 } 2164 put_reg(val->r); 2165 put_reg(adr->r); 2166 } else { 2167 error("gen_store: invalid target"); 2168 } 2169 } 2170 2171 void gen_address_reg(Item x, i32 r) { 2172 gen_trace_n("address_reg", r, x, nil); 2173 if (x->kind != iRegInd) { 2174 error("internal error, wrong kind"); 2175 } 2176 if (x->a > 0) { 2177 emit_opi_n(ADD, r, x->r, x->a); 2178 } else if(r != x->r) { 2179 emit_op(MOV, r, 0, x->r); 2180 } 2181 if (r != x->r) { 2182 put_reg(x->r); 2183 x->r = r; 2184 } 2185 } 2186 2187 // convert RegInd+off to RegInd+0 2188 // migrate to a tmpreg if not already 2189 void gen_address(Item x) { 2190 if (!is_tmp_reg(x->r)) { 2191 gen_address_reg(x, get_reg_tmp()); 2192 } else { 2193 gen_address_reg(x, x->r); 2194 } 2195 } 2196 2197 2198 void gen_index(Item x, Item idx) { 2199 gen_trace("index", x, idx); 2200 gen_address(x); 2201 u32 sz = x->type->base->size; 2202 gen_load(idx); 2203 // OPT: use shifts for powers of two 2204 if (sz > 1) { 2205 emit_opi_n(MUL, idx->r, idx->r, sz); 2206 } 2207 // TODO: range check 2208 emit_op(ADD, x->r, x->r, idx->r); 2209 put_reg(idx->r); 2210 x->type = x->type->base; 2211 gen_trace("index<<<", x, nil); 2212 } 2213 2214 void gen_deref_ptr(Item x) { 2215 gen_trace("deref_ptr", x, nil); 2216 gen_load(x); 2217 if (x->kind != iReg) { 2218 error("internal - ptr deref failed"); 2219 } 2220 x->type = x->type->base; 2221 x->kind = iRegInd; 2222 x->a = 0; 2223 gen_trace("deref_ptr<<<", x, nil); 2224 } 2225 2226 void gen_get_ptr(Item x) { 2227 gen_trace("get_ptr", x, nil); 2228 if (x->kind != iRegInd) { 2229 error("internal - get ptr failed"); 2230 } 2231 x->kind = iReg; 2232 if (x->a != 0) { 2233 emit_opi_n(ADD, x->r, x->r, x->a); 2234 x->a = 0; 2235 } 2236 // TODO: can we cache these or be sure to recycle them later? 2237 x->type = make_type(tPointer, x->type, nil, nil, 0, 4); 2238 gen_trace("get_ptr<<<", x, nil); 2239 } 2240 2241 void gen_load_bool1(Item x, bool val) { 2242 set_item(x, iReg, ctx.type_bool, get_reg_tmp(), val, 0); 2243 gen_trace("load_bool1", x, nil); 2244 emit_opi(MOV, x->r, 0, x->a); 2245 } 2246 2247 void gen_load_bool2(Item x) { 2248 gen_trace("load_bool2", x, nil); 2249 emit_opi(MOV, x->r, 0, !x->a); 2250 } 2251 2252 void gen_bool_from_comp(Item x) { 2253 gen_trace("bool_from_cond", x, nil); 2254 emit_op(SUB, x->a, x->a, x->b); 2255 put_reg(x->b); 2256 u32 l0_br_to_true = ctx.pc; 2257 emit_bi(rel_op_to_cc(x->r), 0); 2258 emit_opi(MOV, x->a, 0, 0); 2259 u32 l1_br_to_done = ctx.pc; 2260 emit_bi(AL, 0); 2261 fixup_branch_fwd(l0_br_to_true); 2262 emit_opi(MOV, x->a, 0, 1); 2263 fixup_branch_fwd(l1_br_to_done); 2264 x->kind = iReg; 2265 x->type = ctx.type_bool; 2266 x->r = x->a; 2267 x->a = 0; 2268 x->b = 0; 2269 } 2270 2271 u32 gen_branch_cond(Item x, bool sense) { 2272 gen_trace_n("branch_cond", sense, x, nil); 2273 u32 cc; 2274 if (x->kind == iComp) { 2275 if (sense == false) { 2276 x->r = invert_relop(x->r); 2277 } 2278 emit_op(SUB, x->a, x->a, x->b); 2279 put_reg(x->a); 2280 put_reg(x->b); 2281 cc = rel_op_to_cc(x->r); 2282 } else if (x->type == ctx.type_bool) { 2283 gen_load(x); 2284 emit_opi(SUB, x->r, x->r, 1); 2285 put_reg(x->r); 2286 if (sense) { 2287 cc = EQ; 2288 } else { 2289 cc = NE; 2290 } 2291 } else { 2292 error("conditional branch needs comparison or bool"); 2293 } 2294 emit_bi(cc, 0); 2295 return ctx.pc - 4; 2296 } 2297 2298 u32 gen_branch_fwd() { 2299 gen_trace("branch_fwd", nil, nil); 2300 emit_bi(AL, 0); 2301 return ctx.pc - 4; 2302 } 2303 2304 void gen_branch_back(u32 addr) { 2305 gen_trace_n("branch_back", addr, nil, nil); 2306 emit_bi(AL, (addr - ctx.pc - 4) >> 2); 2307 } 2308 2309 void gen_return(Item x) { 2310 gen_trace("return", x, nil); 2311 if (x->type != ctx.type_void) { 2312 gen_load_reg(x, R0); 2313 } 2314 emit_bi(AL, 0); 2315 add_scope_fixup(find_scope(sFunc)); 2316 } 2317 2318 void gen_ref_param(u32 n, Item val) { 2319 gen_trace_n("ref_param", n, val, nil); 2320 if (n > 7) { 2321 error("gen_ref_param - too many parameters"); 2322 } 2323 gen_address_reg(val, n); 2324 } 2325 2326 void gen_param(u32 n, Item val) { 2327 gen_trace_n("param", n, val, nil); 2328 if (n > 7) { 2329 error("gen_param - too many parameters"); 2330 } 2331 gen_load_reg(val, n); 2332 } 2333 2334 void gen_builtin(u32 id) { 2335 gen_trace_n("builtin", id, nil, nil); 2336 if (id == biPrintHex32) { 2337 emit_mov(1, 0xFFFF0000); // MOV R1, IOBASE 2338 emit_mem(STW, 0, 1, 0x104); // SW R0, [R1, 0x104] 2339 } else if (id == biPutC) { 2340 emit_mov(1, 0xFFFF0000); // MOV R1, IOBASE 2341 emit_mem(STW, 0, 1, 0x108); // SW R0, [R1, 0x108] 2342 } else { 2343 error("unknown builtin function"); 2344 } 2345 } 2346 2347 void gen_save_regs() { 2348 gen_trace("save_regs", nil, nil); 2349 u32 n = tmp_reg_first; 2350 u32 off = ctx.spill_stack; 2351 while (n <= tmp_reg_last) { 2352 if (ctx.regbits & (1 << n)) { 2353 emit_mem(STW, n, SP, off); 2354 off += 4; 2355 } 2356 n++; 2357 } 2358 } 2359 2360 void gen_restore_regs() { 2361 gen_trace("restore_regs", nil, nil); 2362 u32 n = tmp_reg_first; 2363 u32 off = ctx.spill_stack; 2364 while (n <= tmp_reg_last) { 2365 if (ctx.regbits & (1 << n)) { 2366 emit_mem(LDW, n, SP, off); 2367 off += 4; 2368 } 2369 n++; 2370 } 2371 } 2372 2373 void gen_call(Item x) { 2374 gen_trace("call", x, nil); 2375 gen_save_regs(); 2376 if (x->type->obj->flags & ofBuiltin) { 2377 // OPT: not all builtins will require save/restore regs 2378 gen_builtin(x->type->obj->value); 2379 } else if (x->type->obj->flags & ofDefined) { 2380 u32 fnpc = x->type->obj->value; 2381 emit_bi(AL|L, (fnpc - ctx.pc - 4) >> 2); 2382 } else { 2383 emit_bi(AL|L, 0); 2384 add_object_fixup(x->type->obj); 2385 } 2386 gen_restore_regs(); 2387 2388 // item becomes the return value 2389 x->type = x->type->base; 2390 if (x->type == ctx.type_void) { 2391 x->kind = iConst; 2392 } else { 2393 x->kind = iReg; 2394 x->r = R0; 2395 // OPT if we tracked r0 usage we could 2396 // avoid moving from R0 to a tmp reg here 2397 gen_load_reg(x, get_reg_tmp()); 2398 } 2399 x->a = 0; 2400 x->b = 0; 2401 } 2402 2403 void gen_add_op(u32 op, Item x, Item y) { 2404 gen_trace_n("add_op", op, x, y); 2405 op = add_op_to_ins(op); 2406 if ((x->kind == iConst) && (y->kind == iConst)) { 2407 // XC = XC op YC 2408 if (op == ADD) { 2409 x->a = x->a + y->a; 2410 } else if (op == SUB) { 2411 x->a = x->a - y->a; 2412 } else if (op == IOR) { 2413 x->a = x->a | y->a; 2414 } else if (op == XOR) { 2415 x->a = x->a ^ y->a; 2416 } 2417 } else if (y->kind == iConst) { 2418 // XR = XR op YC 2419 gen_load(x); 2420 // for all of these, rhs==0 is no-op 2421 if (y->a != 0) { 2422 emit_opi_n(op, x->r, x->r, y->a); 2423 } 2424 } else { 2425 // XR = XR op YR 2426 gen_load(x); 2427 gen_load(y); 2428 emit_op(op, x->r, x->r, y->r); 2429 put_reg(y->r); 2430 } 2431 } 2432 2433 void gen_mul_op(u32 op, Item x, Item y) { 2434 gen_trace_n("mul_op", op, x, y); 2435 op = mul_op_to_ins(op); 2436 if ((x->kind == iConst) && (y->kind == iConst)) { 2437 // XC = XC op YC 2438 if (op == MUL) { 2439 x->a = x->a * y->a; 2440 } else if (op == DIV) { 2441 x->a = x->a / y->a; 2442 } else if (op == MOD) { 2443 x->a = x->a % y->a; 2444 } else if (op == LSL) { 2445 x->a = x->a << y->a; 2446 } else if (op == ASR) { 2447 x->a = x->a >> y->a; 2448 } else if (op == AND) { 2449 x->a = x->a & y->a; 2450 } else if (op == ANN) { 2451 x->a = x->a & (~y->a); 2452 } 2453 } else if (y->kind == iConst) { 2454 // XR = XR op YC 2455 gen_load(x); 2456 if (((op == DIV) || (op == MOD)) && (y->a == 0)) { 2457 error("divide by zero"); 2458 } else if ((op == MUL) && (y->a == 1)) { 2459 return; // x * 1 = x 2460 } else if (((op == LSL) || (op == ASR)) && (y->a == 0)) { 2461 return; // shift-by-zero 2462 } 2463 if (op == MOD) { 2464 emit_opi_n(DIV, x->r, x->r, y->a); 2465 emit_op(MOV_H, x->r, 0, 0); 2466 } else { 2467 emit_opi_n(op, x->r, x->r, y->a); 2468 } 2469 } else { 2470 // TODO runtime div-by-zero check 2471 // XR = XR op YR 2472 gen_load(x); 2473 gen_load(y); 2474 if (op == MOD) { 2475 emit_op(DIV, x->r, x->r, y->r); 2476 emit_op(MOV_H, x->r, 0, 0); 2477 } else { 2478 emit_op(op, x->r, x->r, y->r); 2479 } 2480 put_reg(y->r); 2481 } 2482 } 2483 2484 void gen_rel_op(u32 op, Item x, Item y) { 2485 gen_trace_n("rel_op", op, x, y); 2486 gen_load(x); 2487 gen_load(y); 2488 2489 // fuse x and y items into the new Comparison Item x 2490 x->kind = iComp; 2491 x->type = ctx.type_bool; 2492 x->a = x->r; 2493 x->b = y->r; 2494 x->r = op; 2495 } 2496 2497 void gen_unary_op(u32 op, Item x) { 2498 gen_trace_n("unary_op", op, x, nil); 2499 if (x->kind == iConst) { 2500 if (op == tMINUS) { 2501 x->a = - x->a; 2502 } else if (op == tBANG) { 2503 x->a = ! x->a; 2504 } else if (op == tNOT) { 2505 x->a = ~ x->a; 2506 } 2507 return; 2508 } 2509 gen_load(x); 2510 if (op == tBANG) { 2511 // ERROR? r, r, 1 instead? 2512 emit_opi_n(XOR, x->r, x->r, 1); 2513 } else if (op == tNOT) { 2514 emit_opi_n(XOR, x->r, x->r, 0xFFFFFFFF); 2515 } else if (op == tMINUS) { 2516 u32 t0 = get_reg_tmp(); 2517 emit_mov(t0, 0); 2518 emit_op(SUB, x->r, t0, x->r); 2519 put_reg(t0); 2520 } 2521 } 2522 2523 // patch branch instruction at addr to 2524 // branch to current pc 2525 void fixup_branch_fwd(u32 addr) { 2526 if ((addr == ctx.pc - 4) && (ctx.last_target_pc < ctx.pc)) { 2527 // if the branch to be patched is the 2528 // instruction just previously emitted, we 2529 // can simply erase it 2530 // 2531 // provided it's safe to do so (no branches are 2532 // already arriving here...) 2533 ctx.pc -= 4; 2534 } else { 2535 u32 off = (ctx.pc - addr - 4) >> 2; 2536 u32 ins = ctx.code[addr >> 2] & 0xFF000000; 2537 ctx.code[addr >> 2] = ins | (off & 0x00FFFFFF); 2538 gen_trace_code("fixup_branch_fwd()", addr); 2539 } 2540 2541 // note that we have forward branches to this pc 2542 // (to prevent optimizations from moving code 2543 // behind them) 2544 ctx.last_target_pc = ctx.pc; 2545 } 2546 2547 void fixup_branches_fwd(Fixup fixup) { 2548 while (fixup != nil) { 2549 fixup_branch_fwd(fixup->pc); 2550 fixup = fixup->next; 2551 } 2552 } 2553 2554 void gen_prologue(Object fn) { 2555 gen_trace("prologue", nil, nil); 2556 fn->value = ctx.pc; 2557 emit_opi(SUB, SP, SP, 4 + fn->type->len * 4); 2558 emit_mem(STW, LR, SP, 0); 2559 2560 Object param = fn->first; 2561 u32 off = 4; 2562 u32 r = 0; 2563 while (param != nil) { 2564 emit_mem(STW, r, SP, off); 2565 r++; 2566 off += 4; 2567 param = param->next; 2568 } 2569 // set aside space to spill temporary regs 2570 // OPT: would be nice to only reserve space we need 2571 ctx.spill_stack = off; 2572 ctx.local_stack = off + 4 * tmp_reg_count; 2573 ctx.alloc_stack = ctx.local_stack; 2574 } 2575 2576 void gen_epilogue(Object fn) { 2577 gen_trace("epilogue", nil, nil); 2578 if (ctx.regbits) { 2579 u32 n = tmp_reg_first; 2580 while (n <= tmp_reg_last) { 2581 if (ctx.regbits & (1 << n)) { 2582 fprintf(stderr, "R%u ", n); 2583 } 2584 n++; 2585 } 2586 fprintf(stderr,"\n"); 2587 error("internal - registers reserved in epilogue"); 2588 } 2589 if (ctx.local_stack > 0xFFFF) { 2590 error("using too much stack"); 2591 } 2592 2593 // patch prologue 2594 u32 ins = ctx.code[fn->value / 4]; 2595 ctx.code[fn->value / 4] = (ins & 0xFFFF0000) | ctx.local_stack; 2596 gen_trace_code("patch_prologue()", fn->value); 2597 2598 // emit epilogue 2599 emit_mem(LDW, LR, SP, 0); 2600 emit_opi(ADD, SP, SP, ctx.local_stack); 2601 emit_br(AL, LR); 2602 } 2603 2604 2605 void gen_start() { 2606 gen_trace("start", nil, nil); 2607 emit_mov(SB, 0); // placeholder SB load 2608 emit_bi(AL, 0); // placeholder branch to init 2609 } 2610 2611 void gen_end() { 2612 gen_trace("end", nil, nil); 2613 String str = make_string("start", 5); 2614 Object obj = find(str); 2615 while (obj != nil) { 2616 if (obj->type->kind != tFunc) { 2617 error("'start' is not a function\n"); 2618 } 2619 if (obj->first != nil) { 2620 error("'start' must have no parameters\n"); 2621 } 2622 // patch static base load to after the last instruction 2623 ctx.code[0] |= ctx.pc; 2624 // patch branch-to-start 2625 ctx.code[1] |= (obj->value - 8) >> 2; 2626 // TODO: copy ro globals after code 2627 // TODO: SB should neg-index into ro, pos-index into rw 2628 return; 2629 } 2630 error("no 'start' function\n"); 2631 } 2632 2633 void gen_write(const char* outname) { 2634 int fd = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0644); 2635 if (fd < 0) { 2636 error("cannot open '%s' to write", outname); 2637 } 2638 u32 n = 0; 2639 while (n < ctx.pc) { 2640 if (write(fd, ctx.code + (n/4), sizeof(u32)) != sizeof(u32)) { 2641 error("error writing '%s'", outname); 2642 } 2643 n += 4; 2644 } 2645 n = 0; 2646 while (n < ctx.alloc_global) { 2647 if (write(fd, ctx.data + (n/4), sizeof(u32)) != sizeof(u32)) { 2648 error("error writing '%s'", outname); 2649 } 2650 n += 4; 2651 } 2652 close(fd); 2653 } 2654 2655 void gen_listing(const char* listfn, const char* srcfn) { 2656 FILE* fin = fopen(srcfn, "r"); 2657 if (fin == NULL) { 2658 error("cannot re-read '%s'\n", srcfn); 2659 } 2660 FILE* fout = fopen(listfn, "w"); 2661 if (fout == NULL) { 2662 error("cannot write '%s'\n", listfn); 2663 } 2664 u32 n = 0; 2665 u32 line = 1; 2666 char buf[1024]; 2667 while (n < ctx.pc) { 2668 u32 ins = ctx.code[n/4]; 2669 if ((line < ctx.xref[n/4]) && fin) { 2670 fprintf(fout, "\n"); 2671 while (line < ctx.xref[n/4]) { 2672 if (fgets(buf, sizeof(buf), fin) == nil) { 2673 fin = nil; 2674 break; 2675 } 2676 u32 i = 0; 2677 while (buf[i] != 0) { 2678 if (buf[i] > ' ') { 2679 fprintf(fout,"%s", buf); 2680 break; 2681 } 2682 i++; 2683 } 2684 line++; 2685 } 2686 fprintf(fout, "\n"); 2687 } 2688 risc5dis(n, ins, buf); 2689 fprintf(fout, "%08x: %08x %s\n", n, ins, buf); 2690 n += 4; 2691 } 2692 n = 0; 2693 while (n < ctx.alloc_global) { 2694 fprintf(fout, "%08x: %08x\n", ctx.pc + n, ctx.data[n >> 2]); 2695 n += 4; 2696 } 2697 fclose(fout); 2698 if (fin) { 2699 fclose(fin); 2700 } 2701 } 2702 2703 // ================================================================ 2704 2705 2706 void dump_file_line(const char* fn, u32 offset) { 2707 int fd = open(fn, O_RDONLY); 2708 if (fd < 0) { 2709 return; 2710 } 2711 if (lseek(fd, offset, SEEK_SET) != offset) { 2712 close(fd); 2713 return; 2714 } 2715 char line[256]; 2716 int r = read(fd, line, 255); 2717 if (r > 0) { 2718 line[r] = 0; 2719 int n = 0; 2720 while (n < r) { 2721 if (line[n] == '\n') { 2722 line[n] = 0; 2723 break; 2724 } 2725 n++; 2726 } 2727 fprintf(stderr, "\n%s", line); 2728 } 2729 close(fd); 2730 } 2731 2732 void dump_type(Type type, bool use_short_name) { 2733 if (use_short_name && (type->obj != nil)) { 2734 printf("%s", type->obj->name->text); 2735 } else if (type->kind == tArray) { 2736 printf("[%u]", type->len); 2737 dump_type(type->base, true); 2738 } else if (type->kind == tRecord) { 2739 printf("struct {\n"); 2740 Object field = type->first; 2741 while (field != nil) { 2742 printf(" %s ", field->name->text); 2743 dump_type(field->type, true); 2744 printf(", // off=%u, sz=%u\n", field->value, field->type->size); 2745 field = field->next; 2746 } 2747 printf("}"); 2748 } else { 2749 printf("%s", type_id_tab[type->kind]); 2750 if ((type->kind == tPointer) || (type->kind == tSlice)) { 2751 dump_type(type->base, true); 2752 } 2753 } 2754 } 2755 2756 void dump_context() { 2757 Object obj = ctx.typetab; 2758 while (obj != nil) { 2759 printf("type %s ", obj->name->text); 2760 dump_type(obj->type, false); 2761 printf("; // sz=%u\n", obj->type->size); 2762 obj = obj->next; 2763 } 2764 } 2765 2766 void print_type(Type type) { 2767 if (type->kind == tArray) { 2768 fprintf(stderr, "[%u]", type->len); 2769 print_type(type->base); 2770 } else if (type->kind == tPointer) { 2771 fprintf(stderr, "*"); 2772 print_type(type->base); 2773 } else if (type->kind == tRecord) { 2774 if (type->obj != nil) { 2775 fprintf(stderr, "{}%s", type->obj->name->text); 2776 } else { 2777 fprintf(stderr, "{}\n"); 2778 } 2779 } else { 2780 fprintf(stderr, "%s", type_id_tab[type->kind]); 2781 } 2782 } 2783 2784 void print_item(Item x) { 2785 fprintf(stderr, "<%s: r=%d,a=%d,b=%d,t=", 2786 item_id_tab[x->kind], x->r, x->a, x->b); 2787 print_type(x->type); 2788 fprintf(stderr, ">"); 2789 } 2790 2791 void gen_trace(const char* fn, Item x, Item y) { 2792 if (TRACE_CODEGEN) { 2793 fprintf(stderr, "gen_%-17s", fn); 2794 if (x != nil) { 2795 print_item(x); 2796 } 2797 if (y != nil) { 2798 fprintf(stderr, ", "); 2799 print_item(y); 2800 } 2801 fprintf(stderr, "\n"); 2802 } 2803 } 2804 void gen_trace_n(const char* fn, u32 n, Item x, Item y) { 2805 if (TRACE_CODEGEN) { 2806 fprintf(stderr, "gen_%-17s0x%x, ", fn, n); 2807 if (x != nil) { 2808 print_item(x); 2809 } 2810 if (y != nil) { 2811 fprintf(stderr, ", "); 2812 print_item(y); 2813 } 2814 fprintf(stderr, "\n"); 2815 } 2816 } 2817 void gen_trace_code(const char* msg, u32 pc) { 2818 if (TRACE_CODEGEN) { 2819 u32 ins = ctx.code[pc >> 2]; 2820 char buf[64]; 2821 risc5dis(pc, ins, buf); 2822 fprintf(stderr, "gen_code '%08x: %08x %s' %s\n", pc, ins, buf, msg); 2823 } 2824 } 2825 // ================================================================ 2826 2827 i32 main(int argc, args argv) { 2828 str outname = "out.bin"; 2829 str lstname = nil; 2830 str srcname = nil; 2831 bool dump = false; 2832 bool scan_only = false; 2833 2834 init_ctx(); 2835 ctx.filename = "<commandline>"; 2836 2837 while (argc > 1) { 2838 if (!strcmp(argv[1],"-o")) { 2839 if (argc < 2) { 2840 error("option -o requires argument"); 2841 } 2842 outname = argv[2]; 2843 argc--; 2844 argv++; 2845 } else if (!strcmp(argv[1], "-l")) { 2846 if (argc < 2) { 2847 error("option -l requires argument"); 2848 } 2849 lstname = argv[2]; 2850 argc--; 2851 argv++; 2852 } else if (!strcmp(argv[1], "-p")) { 2853 dump = true; 2854 } else if (!strcmp(argv[1], "-v")) { 2855 TRACE_CODEGEN = true; 2856 } else if (!strcmp(argv[1], "-s")) { 2857 scan_only = true; 2858 } else if (!strcmp(argv[1], "-A")) { 2859 ctx.flags |= cfAbortOnError; 2860 } else if (argv[1][0] == '-') { 2861 error("unknown option: %s", argv[1]); 2862 } else { 2863 if (srcname != nil) { 2864 error("multiple source files disallowed"); 2865 } else { 2866 srcname = argv[1]; 2867 } 2868 } 2869 argc--; 2870 argv++; 2871 } 2872 2873 if (srcname == nil) { 2874 printf( 2875 "usage: compiler [ <option> | <sourcefilename> ]*\n" 2876 "\n" 2877 "options: -o <filename> binary output (default 'out.bin')\n" 2878 " -l <filename> listing output (default none)\n" 2879 " -v trace code generation\n" 2880 " -s scan only\n" 2881 " -p dump type context\n" 2882 " -A abort on error\n"); 2883 return 0; 2884 } 2885 ctx.filename = srcname; 2886 2887 load(srcname); 2888 ctx.linenumber = 1; 2889 ctx.lineoffset = 0; 2890 // prime the lexer 2891 scan(); 2892 2893 if (scan_only) { 2894 ctx.flags |= 1; 2895 while (true) { 2896 next(); 2897 print(); 2898 if (ctx.tok == tEOF) { 2899 return 0; 2900 } 2901 } 2902 } 2903 2904 gen_start(); 2905 parse_program(); 2906 gen_end(); 2907 gen_write(outname); 2908 if (lstname != nil) { 2909 gen_listing(lstname, ctx.filename); 2910 } 2911 if (dump) { 2912 dump_context(); 2913 } 2914 2915 return 0; 2916 }