parser.spl (13506B)
1 // Copyright 2023, Brian Swetland <swetland@frotz.net> 2 // Licensed under the Apache License, Version 2.0. 3 4 // ================================================================ 5 // parser 6 7 fn expected(what str) { 8 error("expected ", what, ", found ", @str tnames[ctx.tok]); 9 } 10 11 fn expect(tok Token) { 12 if ctx.tok != tok { 13 expected(tnames[tok]); 14 } 15 } 16 17 fn require(tok Token) { 18 expect(tok); 19 next(); 20 } 21 22 fn parse_name(what str) String { 23 if ctx.tok != tIDN { 24 expected(what); 25 } 26 var s String = ctx.ident; 27 next(); 28 return s; 29 } 30 31 fn parse_symbol(what str) Ast { 32 var name String = parse_name(what); 33 var sym Symbol = symbol_find(name); 34 return ast_make_symbol(name, sym); 35 } 36 37 fn parse_ident() Ast { 38 var node Ast = parse_symbol("identifier"); 39 40 if (node.sym == nil) && (ctx.tok != tOPAREN) { 41 error("undefined identifier '", @str node.name.text, "'"); 42 } 43 44 if ctx.tok == tOPAREN { 45 // function call 46 next(); 47 node = ast_make_l(AST_CALL, node); 48 var last Ast = nil; 49 50 while ctx.tok != tCPAREN { 51 if ctx.tok == tAT { 52 // type annotation for varargs hack 53 next(); 54 parse_type(false); 55 } 56 57 var expr Ast = parse_expr(); 58 if last != nil { 59 last.next = expr; 60 } else { 61 node.right = expr; 62 } 63 last = expr; 64 65 if ctx.tok != tCPAREN { 66 require(tCOMMA); 67 } 68 } 69 next(); 70 } 71 72 while true { 73 if ctx.tok == tDOT { 74 // field access 75 next(); 76 node = ast_make_lr(AST_FIELD, node, parse_symbol("field name")); 77 } else if ctx.tok == tOBRACK { 78 // array access 79 next(); 80 node = ast_make_lr(AST_INDEX, node, parse_expr()); 81 require(tCBRACK); 82 } else { 83 return node; 84 } 85 } 86 87 return nil; // unreachable 88 } 89 90 fn parse_primary_expr() Ast { 91 var node Ast; 92 if ctx.tok == tNUM { 93 node = ast_make_const(ctx.num, ctx.type_i32); 94 next(); 95 } else if ctx.tok == tSTR { 96 node = ast_make_simple(AST_STRING, 0); 97 node.name = string_make(ctx.tmp, strlen(ctx.tmp)); 98 next(); 99 } else if ctx.tok == tTRUE { 100 node = ast_make_const(1, ctx.type_bool); 101 next(); 102 } else if ctx.tok == tFALSE { 103 node = ast_make_const(0, ctx.type_bool); 104 next(); 105 } else if ctx.tok == tNIL { 106 node = ast_make_const(0, ctx.type_nil); 107 next(); 108 } else if ctx.tok == tOPAREN { 109 next(); 110 node = parse_expr(); 111 require(tCPAREN); 112 } else if ctx.tok == tNEW { 113 next(); 114 require(tOPAREN); 115 node = ast_make_simple(AST_NEW, 0); 116 node.name = parse_name("type name"); 117 require(tCPAREN); 118 } else if ctx.tok == tIDN { 119 node = parse_ident(); 120 } else { 121 error("invalid expression"); 122 } 123 return node; 124 } 125 126 fn parse_unary_expr() Ast { 127 var op u32 = ctx.tok; 128 if op == tPLUS { 129 next(); 130 return parse_unary_expr(); 131 } else if op == tMINUS { 132 next(); 133 return ast_make_l(AST_NEG, parse_unary_expr()); 134 } else if op == tBANG { 135 next(); 136 return ast_make_l(AST_BOOL_NOT, parse_unary_expr()); 137 } else if op == tNOT { 138 next(); 139 return ast_make_l(AST_NOT, parse_unary_expr()); 140 } else if op == tAMP { 141 error("dereference not supported"); 142 //next(); 143 //parse_unary_expr(); 144 } else { 145 return parse_primary_expr(); 146 } 147 return nil; // unreachable 148 } 149 150 fn parse_mul_expr() Ast { 151 var node Ast = parse_unary_expr(); 152 while ctx.tok & tcMASK == tcMULOP { 153 var op u32 = (ctx.tok - tSTAR) + AST_MUL; 154 next(); 155 node = ast_make_lr(op, node, parse_unary_expr()); 156 } 157 return node; 158 } 159 160 fn parse_add_expr() Ast { 161 var node Ast = parse_mul_expr(); 162 while ctx.tok & tcMASK == tcADDOP { 163 var op u32 = (ctx.tok - tPLUS) + AST_ADD; 164 next(); 165 node = ast_make_lr(op, node, parse_mul_expr()); 166 } 167 return node; 168 } 169 170 fn parse_rel_expr() Ast { 171 var node Ast = parse_add_expr(); 172 if ctx.tok & tcMASK == tcRELOP { 173 var op u32 = (ctx.tok - tEQ) + AST_EQ; 174 next(); 175 node = ast_make_lr(op, node, parse_add_expr()); 176 } 177 return node; 178 } 179 180 fn parse_and_expr() Ast { 181 var node Ast = parse_rel_expr(); 182 if ctx.tok == tAND { 183 while ctx.tok == tAND { 184 next(); 185 node = ast_make_lr(AST_BOOL_AND, node, parse_rel_expr()); 186 } 187 } 188 return node; 189 } 190 191 fn parse_expr() Ast { 192 var node Ast = parse_and_expr(); 193 if ctx.tok == tOR { 194 while ctx.tok == tOR { 195 next(); 196 node = ast_make_lr(AST_BOOL_OR, node, parse_and_expr()); 197 } 198 } 199 return node; 200 } 201 202 fn parse_struct_type(name String) Type { 203 var type Type = type_find(name); 204 205 if type == nil { 206 type = type_make(name, TYPE_STRUCT, nil, nil, 0); 207 } else { 208 if type.kind == TYPE_UNDEFINED { 209 // resolve forward ref 210 type.kind = TYPE_STRUCT; 211 } else { 212 error("cannot redefine struct '", @str name.text, "'"); 213 } 214 }; 215 scope_push(SCOPE_STRUCT); 216 require(tOBRACE); 217 while true { 218 if ctx.tok == tCBRACE { 219 next(); 220 break; 221 } 222 var fname String = parse_name("field name"); 223 var kind SymbolKind = SYMBOL_FLD; 224 if ctx.tok == tSTAR { 225 next(); 226 kind = SYMBOL_PTR; 227 } 228 var ftype Type = parse_type(true); 229 var sym Symbol = symbol_make(fname, ftype); 230 sym.kind = kind; 231 if ctx.tok != tCBRACE { 232 require(tCOMMA); 233 } 234 } 235 type.list = scope_pop(); 236 return type; 237 } 238 239 240 fn parse_array_type() Type { 241 var type Type; 242 var nelem u32 = 0; 243 if ctx.tok == tCBRACK { 244 // TODO: slices 245 next(); 246 type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, 0); 247 } else { 248 if ctx.tok != tNUM { 249 error("array size must be numeric"); 250 } 251 nelem = ctx.num; 252 next(); 253 require(tCBRACK); 254 type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, nelem); 255 } 256 // TODO: type.name? 257 return type; 258 } 259 260 fn parse_type(fwd_ref_ok u32) Type { 261 if ctx.tok == tSTAR { // pointer-to 262 error("pointer types not supported"); 263 //next(); 264 //return type_make(nil, TYPE_POINTER, parse_type(true), nil, 0); 265 } else if ctx.tok == tOBRACK { // array-of 266 next(); 267 return parse_array_type(); 268 } else if ctx.tok == tFN { 269 error("func types not supported"); 270 //next(); 271 //return parse_func_type(); 272 } else if ctx.tok == tSTRUCT { 273 error ("anonymous struct types not supported"); 274 //next(); 275 //return parse_struct_type(nil); 276 } else if ctx.tok == tIDN { 277 var name String = ctx.ident; 278 next(); 279 var type Type = type_find(name); 280 if type == nil { 281 if fwd_ref_ok { 282 type = type_make(name, TYPE_UNDEFINED, nil, nil, 0); 283 } else { 284 error("undefined type '", @str name.text, "' not usable here"); 285 } 286 } 287 return type; 288 } else { 289 expected("type"); 290 } 291 return nil; 292 } 293 294 fn parse_while() Ast { 295 // while expr { block } 296 var expr Ast = parse_expr(); 297 require(tOBRACE); 298 scope_push(SCOPE_LOOP); 299 var block Ast = parse_block(); 300 scope_pop(); 301 return ast_make_lr(AST_WHILE, expr, block); 302 } 303 304 fn parse_if() Ast { 305 // if expr { block } 306 307 var expr Ast = parse_expr(); 308 require(tOBRACE); 309 scope_push(SCOPE_BLOCK); 310 var block Ast = parse_block(); 311 scope_pop(); 312 313 var last Ast = ast_make_lr(AST_CASE, expr, block); 314 var stmt Ast = ast_make_l(AST_IF, last); 315 316 while ctx.tok == tELSE { 317 // ... else ... 318 next(); 319 if ctx.tok == tIF { 320 // ... if expr { block } 321 next(); 322 expr = parse_expr(); 323 require(tOBRACE); 324 scope_push(SCOPE_BLOCK); 325 block = parse_block(); 326 scope_pop(); 327 last.next = ast_make_lr(AST_CASE, expr, block); 328 last = last.next; 329 } else { 330 // ... { block } 331 require(tOBRACE); 332 scope_push(SCOPE_BLOCK); 333 block = parse_block(); 334 scope_pop(); 335 last.next = ast_make_l(AST_ELSE, block); 336 break; 337 } 338 } 339 return stmt; 340 } 341 342 fn parse_return() Ast { 343 // TODO check for return required/type 344 var node Ast = ast_make_simple(AST_RETURN, 0); 345 if ctx.tok == tSEMI { 346 next(); 347 } else { 348 // error("return types do not match"); 349 node.left = parse_expr(); 350 require(tSEMI); 351 } 352 return node; 353 } 354 355 fn parse_break() Ast { 356 // TODO: break-to-labeled-loop support 357 var scope Scope = scope_find(SCOPE_LOOP); 358 if scope == nil { 359 error("break must be used from inside a looping construct"); 360 } 361 require(tSEMI); 362 return ast_make_simple(AST_BREAK, 0); 363 } 364 365 fn parse_continue() Ast { 366 // TODO: continue-to-labeled-loop support 367 var scope Scope = scope_find(SCOPE_LOOP); 368 if scope == nil { 369 error("continue must be used from inside a looping construct"); 370 } 371 require(tSEMI); 372 return ast_make_simple(AST_CONTINUE, 0); 373 } 374 375 fn parse_struct_init(sym Symbol) { 376 while true { 377 if ctx.tok == tCBRACE { 378 next(); 379 break; 380 } 381 var name String = parse_name("field name"); 382 var field Symbol = sym.type.list; 383 while true { // TODO: field_find 384 if field == nil { 385 error("structure has no '", @str name.text, "' field"); 386 } 387 if field.name == name { 388 break; 389 } 390 field = field.next; 391 } 392 require(tCOLON); 393 if ctx.tok == tOBRACE { 394 next(); 395 parse_struct_init(field); 396 } else { 397 parse_expr(); 398 } 399 if ctx.tok != tCBRACE { 400 require(tCOMMA); 401 } 402 } 403 } 404 405 fn parse_array_init(sym Symbol) { 406 while true { 407 if ctx.tok == tCBRACE { 408 next(); 409 break; 410 } 411 parse_expr(); 412 if ctx.tok != tCBRACE { 413 require(tCOMMA); 414 } 415 } 416 } 417 418 fn parse_var() { 419 // TODO: global vs local 420 // TODO: generate assignment/initialization 421 var name String = parse_name("variable name"); 422 var type Type = parse_type(false); 423 var sym Symbol = symbol_make(name, type); 424 425 if ctx.tok == tASSIGN { 426 next(); 427 if ctx.tok == tOBRACE { 428 next(); 429 if type.kind == TYPE_STRUCT { 430 parse_struct_init(sym); 431 } else if type.kind == TYPE_ARRAY { 432 parse_array_init(sym); 433 } else { 434 error("type ", @str type.name.text, 435 " cannot be initialized with {} expr"); 436 } 437 } else { 438 parse_expr(); 439 } 440 } else { 441 // default init 442 } 443 require(tSEMI); 444 } 445 446 fn _parse_expr_statement() Ast { 447 var node Ast = parse_expr(); 448 var expr Ast = nil; 449 var op u32; 450 451 if ctx.tok == tASSIGN { 452 // basic assignment 453 next(); 454 return ast_make_lr(AST_ASSIGN, node, parse_expr()); 455 } else if (ctx.tok & tcMASK) == tcAEQOP { 456 // +=, etc 457 op = (ctx.tok - tADDEQ) + AST_ADD; 458 next(); 459 expr = parse_expr(); 460 } else if (ctx.tok & tcMASK) == tcMEQOP { 461 // *=, etc 462 op = (ctx.tok - tMULEQ) + AST_MUL; 463 next(); 464 expr = parse_expr(); 465 } else if (ctx.tok == tINC) { 466 op = AST_ADD; 467 next(); 468 expr = ast_make_const(1, ctx.type_i32); 469 } else if (ctx.tok == tDEC) { 470 op = AST_SUB; 471 expr = ast_make_const(1, ctx.type_i32); 472 } else { 473 // simple expression 474 return node; 475 } 476 477 // TODO duplicate node instead of sharing it 478 expr = ast_make_lr(op, node, expr); 479 return ast_make_lr(AST_ASSIGN, node, expr); 480 } 481 482 fn parse_expr_statement() Ast { 483 var stmt Ast = ast_make_l(AST_EXPR, _parse_expr_statement()); 484 require(tSEMI); 485 return stmt; 486 } 487 488 fn parse_block() Ast { 489 var block Ast = ast_make_simple(AST_BLOCK, 0); 490 var last Ast = nil; 491 var node Ast; 492 while true { 493 if ctx.tok == tCBRACE { 494 next(); 495 break; 496 } else if ctx.tok == tRETURN { 497 next(); 498 node = parse_return(); 499 } else if ctx.tok == tBREAK { 500 next(); 501 node = parse_break(); 502 } else if ctx.tok == tCONTINUE { 503 next(); 504 node = parse_continue(); 505 } else if ctx.tok == tWHILE { 506 next(); 507 node = parse_while(); 508 } else if ctx.tok == tIF { 509 next(); 510 node = parse_if(); 511 } else if ctx.tok == tVAR { 512 next(); 513 parse_var(); 514 } else if ctx.tok == tSEMI { 515 next(); 516 // empty statement 517 continue; 518 } else { 519 node = parse_expr_statement(); 520 } 521 if last == nil { 522 block.left = node; 523 } else { 524 last.next = node; 525 } 526 last = node; 527 } 528 return block; 529 } 530 531 fn parse_param(fname String) Symbol { 532 var pname String = parse_name("parameter name"); 533 var ptype Type = parse_type(false); 534 if symbol_find(pname) != nil { 535 error("duplicate parameter name '", @str pname.text, "'"); 536 } 537 return symbol_make(pname, ptype); 538 } 539 540 fn parse_fn_body(sym Symbol) Ast { 541 ctx.cur_fn = sym; 542 require(tOBRACE); 543 scope_push(SCOPE_FUNC); // parameters, from fn 544 scope_push(SCOPE_BLOCK); 545 var node Ast = parse_block(); 546 scope_pop(); // stow in node? 547 scope_pop(); 548 ctx.cur_fn = nil; 549 return node; 550 } 551 552 fn parse_fn() Ast { 553 var fname String = parse_name("function name"); 554 var rtype Type = ctx.type_void; 555 556 scope_push(SCOPE_FUNC); 557 558 require(tOPAREN); 559 var n u32 = 0; 560 if ctx.tok != tCPAREN { 561 parse_param(fname); 562 while ctx.tok == tCOMMA { 563 next(); 564 parse_param(fname); 565 } 566 n++; 567 } 568 require(tCPAREN); 569 570 if ctx.tok != tOBRACE { 571 rtype = parse_type(false); 572 } 573 574 var sym Symbol = symbol_make_global(fname, rtype); 575 sym.kind = SYMBOL_FN; 576 sym.type = type_make(fname, TYPE_FN, rtype, nil, n); 577 578 var node Ast = ast_make_simple(AST_FUNC, 0); 579 node.name = fname; 580 node.sym = sym; 581 node.right = parse_fn_body(sym); 582 583 // save parameters 584 sym.type.list = scope_pop(); 585 586 return node; 587 } 588 589 fn parse_enum_def() { 590 if ctx.tok == tIDN { 591 var name String = parse_name("enum name"); 592 type_make(name, TYPE_ENUM, nil, nil, 0); 593 } 594 595 require(tOBRACE); 596 var val u32 = 0; 597 while ctx.tok != tCBRACE { 598 var name String = parse_name("enum tag name"); 599 var sym Symbol = symbol_find(name); 600 if sym != nil { 601 error("cannot redfine '", @str name.text, "' as enum tag"); 602 } 603 sym = symbol_make_global(name, ctx.type_u32); 604 sym.kind = SYMBOL_DEF; 605 if ctx.tok == tASSIGN { 606 next(); 607 parse_expr(); 608 // TODO val <- expr 609 } else { 610 val++; 611 } 612 require(tCOMMA); 613 } 614 require(tCBRACE); 615 require(tSEMI); 616 } 617 618 fn parse_init() { 619 ctx.program = ast_make_simple(AST_PROGRAM, 0); 620 ctx.last = nil; 621 } 622 623 fn parse_program() Ast { 624 while true { 625 if ctx.tok == tENUM { 626 next(); 627 parse_enum_def(); 628 } else if ctx.tok == tSTRUCT { 629 next(); 630 var name String = parse_name("struct name"); 631 parse_struct_type(name); 632 require(tSEMI); 633 } else if ctx.tok == tFN { 634 next(); 635 var node Ast = parse_fn(); 636 if ctx.last == nil { 637 ctx.program.left = node; 638 } else { 639 ctx.last.next = node; 640 } 641 ctx.last = node; 642 } else if ctx.tok == tVAR { 643 next(); 644 parse_var(); 645 } else if ctx.tok == tEOF { 646 break; 647 } else { 648 expected("function, variable, or type definition"); 649 } 650 } 651 return ctx.program; 652 } 653