spl

systems programming language
git clone http://frotz.net/git/spl.git
Log | Files | Refs | README | LICENSE

parser.spl (13506B)


      1 // Copyright 2023, Brian Swetland <swetland@frotz.net>
      2 // Licensed under the Apache License, Version 2.0.
      3 
      4 // ================================================================
      5 // parser
      6 
      7 fn expected(what str) {
      8 	error("expected ", what, ", found ", @str tnames[ctx.tok]);
      9 }
     10 
     11 fn expect(tok Token) {
     12 	if ctx.tok != tok {
     13 		expected(tnames[tok]);
     14 	}
     15 }
     16 
     17 fn require(tok Token) {
     18 	expect(tok);
     19 	next();
     20 }
     21 
     22 fn parse_name(what str) String {
     23 	if ctx.tok != tIDN {
     24 		expected(what);
     25 	}
     26 	var s String = ctx.ident;
     27 	next();
     28 	return s;
     29 }
     30 
     31 fn parse_symbol(what str) Ast {
     32 	var name String = parse_name(what);
     33 	var sym Symbol = symbol_find(name);
     34 	return ast_make_symbol(name, sym);
     35 }
     36 
     37 fn parse_ident() Ast {
     38 	var node Ast = parse_symbol("identifier");
     39 
     40 	if (node.sym == nil) && (ctx.tok != tOPAREN) {
     41 		error("undefined identifier '", @str node.name.text, "'");
     42 	}
     43 
     44 	if ctx.tok == tOPAREN {
     45 		// function call
     46 		next();
     47 		node = ast_make_l(AST_CALL, node);
     48 		var last Ast = nil;
     49 
     50 		while ctx.tok != tCPAREN {
     51 			if ctx.tok == tAT {
     52 				// type annotation for varargs hack
     53 				next();
     54 				parse_type(false);
     55 			}
     56 
     57 			var expr Ast = parse_expr();
     58 			if last != nil {
     59 				last.next = expr;
     60 			} else {
     61 				node.right = expr;
     62 			}
     63 			last = expr;
     64 
     65 			if ctx.tok != tCPAREN {
     66 				require(tCOMMA);
     67 			}
     68 		}
     69 		next();
     70 	}
     71 
     72 	while true {
     73 		if ctx.tok == tDOT {
     74 			// field access
     75 			next();
     76 			node = ast_make_lr(AST_FIELD, node, parse_symbol("field name"));
     77 		} else if ctx.tok == tOBRACK {
     78 			// array access
     79 			next();
     80 			node = ast_make_lr(AST_INDEX, node, parse_expr());
     81 			require(tCBRACK);
     82 		} else {
     83 			return node;
     84 		}
     85 	}
     86 
     87 	return nil; // unreachable
     88 }
     89 
     90 fn parse_primary_expr() Ast {
     91 	var node Ast;
     92 	if ctx.tok == tNUM {
     93 		node = ast_make_const(ctx.num, ctx.type_i32);
     94 		next();
     95 	} else if ctx.tok == tSTR {
     96 		node = ast_make_simple(AST_STRING, 0);
     97 		node.name = string_make(ctx.tmp, strlen(ctx.tmp));
     98 		next();
     99 	} else if ctx.tok == tTRUE {
    100 		node = ast_make_const(1, ctx.type_bool);
    101 		next();
    102 	} else if ctx.tok == tFALSE {
    103 		node = ast_make_const(0, ctx.type_bool);
    104 		next();
    105 	} else if ctx.tok == tNIL {
    106 		node = ast_make_const(0, ctx.type_nil);
    107 		next();
    108 	} else if ctx.tok == tOPAREN {
    109 		next();
    110 		node = parse_expr();
    111 		require(tCPAREN);
    112 	} else if ctx.tok == tNEW {
    113 		next();
    114 		require(tOPAREN);
    115 		node = ast_make_simple(AST_NEW, 0);
    116 		node.name = parse_name("type name");
    117 		require(tCPAREN);
    118 	} else if ctx.tok == tIDN {
    119 		node = parse_ident();
    120 	} else {
    121 		error("invalid expression");
    122 	}
    123 	return node;
    124 }
    125 
    126 fn parse_unary_expr() Ast {
    127 	var op u32 = ctx.tok;
    128 	if op == tPLUS {
    129 		next();
    130 		return parse_unary_expr();
    131 	} else if op == tMINUS {
    132 		next();
    133 		return ast_make_l(AST_NEG, parse_unary_expr());
    134 	} else if op == tBANG {
    135 		next();
    136 		return ast_make_l(AST_BOOL_NOT, parse_unary_expr());
    137 	} else if op == tNOT {
    138 		next();
    139 		return ast_make_l(AST_NOT, parse_unary_expr());
    140 	} else if op == tAMP {
    141 		error("dereference not supported");
    142 		//next();
    143 		//parse_unary_expr();
    144 	} else {
    145 		return parse_primary_expr();
    146 	}
    147 	return nil; // unreachable
    148 }
    149 
    150 fn parse_mul_expr() Ast {
    151 	var node Ast = parse_unary_expr();
    152 	while ctx.tok & tcMASK == tcMULOP {
    153 		var op u32 = (ctx.tok - tSTAR) + AST_MUL;
    154 		next();
    155 		node = ast_make_lr(op, node, parse_unary_expr());
    156 	}
    157 	return node;
    158 }
    159 
    160 fn parse_add_expr() Ast {
    161 	var node Ast = parse_mul_expr();
    162 	while ctx.tok & tcMASK == tcADDOP {
    163 		var op u32 = (ctx.tok - tPLUS) + AST_ADD;
    164 		next();
    165 		node = ast_make_lr(op, node, parse_mul_expr());
    166 	}
    167 	return node;
    168 }
    169 
    170 fn parse_rel_expr() Ast {
    171 	var node Ast = parse_add_expr();
    172 	if ctx.tok & tcMASK == tcRELOP {
    173 		var op u32 = (ctx.tok - tEQ) + AST_EQ;
    174 		next();
    175 		node = ast_make_lr(op, node, parse_add_expr());
    176 	}
    177 	return node;
    178 }
    179 
    180 fn parse_and_expr() Ast {
    181 	var node Ast = parse_rel_expr();
    182 	if ctx.tok == tAND {
    183 		while ctx.tok == tAND {
    184 			next();
    185 			node = ast_make_lr(AST_BOOL_AND, node, parse_rel_expr());
    186 		}
    187 	}
    188 	return node;
    189 }
    190 
    191 fn parse_expr() Ast {
    192 	var node Ast = parse_and_expr();
    193 	if ctx.tok == tOR {
    194 		while ctx.tok == tOR {
    195 			next();
    196 			node = ast_make_lr(AST_BOOL_OR, node, parse_and_expr());
    197 		}
    198 	}
    199 	return node;
    200 }
    201 
    202 fn parse_struct_type(name String) Type {
    203 	var type Type = type_find(name);
    204 
    205 	if type == nil {
    206 		type = type_make(name, TYPE_STRUCT, nil, nil, 0);
    207 	} else {
    208 		if type.kind == TYPE_UNDEFINED {
    209 			// resolve forward ref
    210 			type.kind = TYPE_STRUCT;
    211 		} else {
    212 			error("cannot redefine struct '", @str name.text, "'");
    213 		}
    214 	};
    215 	scope_push(SCOPE_STRUCT);
    216 	require(tOBRACE);
    217 	while true {
    218 		if ctx.tok == tCBRACE {
    219 			next();
    220 			break;
    221 		}
    222 		var fname String = parse_name("field name");
    223 		var kind SymbolKind = SYMBOL_FLD;
    224 		if ctx.tok == tSTAR {
    225 			next();
    226 			kind = SYMBOL_PTR;
    227 		}
    228 		var ftype Type = parse_type(true);
    229 		var sym Symbol = symbol_make(fname, ftype);
    230 		sym.kind = kind;
    231 		if ctx.tok != tCBRACE {
    232 			require(tCOMMA);
    233 		}
    234 	}
    235 	type.list = scope_pop();
    236 	return type;
    237 }
    238 
    239 
    240 fn parse_array_type() Type {
    241 	var type Type;
    242 	var nelem u32 = 0;
    243 	if ctx.tok == tCBRACK {
    244 		// TODO: slices
    245 		next();
    246 		type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, 0);
    247 	} else {
    248 		if ctx.tok != tNUM {
    249 			error("array size must be numeric");
    250 		}
    251 		nelem = ctx.num;
    252 		next();
    253 		require(tCBRACK);
    254 		type = type_make(nil, TYPE_ARRAY, parse_type(false), nil, nelem);
    255 	}
    256 	// TODO: type.name?
    257 	return type;
    258 }
    259 
    260 fn parse_type(fwd_ref_ok u32) Type {
    261 	if ctx.tok == tSTAR { // pointer-to
    262 		error("pointer types not supported");
    263 		//next();
    264 		//return type_make(nil, TYPE_POINTER, parse_type(true), nil, 0);
    265 	} else if ctx.tok == tOBRACK { // array-of
    266 		next();
    267 		return parse_array_type();
    268 	} else if ctx.tok == tFN {
    269 		error("func types not supported");
    270 		//next();
    271 		//return parse_func_type();
    272 	} else if ctx.tok == tSTRUCT {
    273 		error ("anonymous struct types not supported");
    274 		//next();
    275 		//return parse_struct_type(nil);
    276 	} else if ctx.tok == tIDN {
    277 		var name String = ctx.ident;
    278 		next();
    279 		var type Type = type_find(name);
    280 		if type == nil {
    281 			if fwd_ref_ok {
    282 				type = type_make(name, TYPE_UNDEFINED, nil, nil, 0);
    283 			} else {
    284 				error("undefined type '", @str name.text, "' not usable here");
    285 			}
    286 		}
    287 		return type;
    288 	} else {
    289 		expected("type");
    290 	}
    291 	return nil;
    292 }
    293 
    294 fn parse_while() Ast {
    295 	// while expr { block }
    296 	var expr Ast = parse_expr();
    297 	require(tOBRACE);
    298 	scope_push(SCOPE_LOOP);
    299 	var block Ast = parse_block();
    300 	scope_pop();
    301 	return ast_make_lr(AST_WHILE, expr, block);
    302 }
    303 
    304 fn parse_if() Ast {
    305 	// if expr { block }
    306 
    307 	var expr Ast = parse_expr();
    308 	require(tOBRACE);
    309 	scope_push(SCOPE_BLOCK);
    310 	var block Ast = parse_block();
    311 	scope_pop();
    312 
    313 	var last Ast = ast_make_lr(AST_CASE, expr, block);
    314 	var stmt Ast = ast_make_l(AST_IF, last);
    315 
    316 	while ctx.tok == tELSE {
    317 		// ... else ...
    318 		next();
    319 		if ctx.tok == tIF {
    320 			// ... if expr { block }
    321 			next();
    322 			expr = parse_expr();
    323 			require(tOBRACE);
    324 			scope_push(SCOPE_BLOCK);
    325 			block = parse_block();
    326 			scope_pop();
    327 			last.next = ast_make_lr(AST_CASE, expr, block);
    328 			last = last.next;
    329 		} else {
    330 			// ... { block }
    331 			require(tOBRACE);
    332 			scope_push(SCOPE_BLOCK);
    333 			block = parse_block();
    334 			scope_pop();
    335 			last.next = ast_make_l(AST_ELSE, block);
    336 			break;
    337 		}
    338 	}
    339 	return stmt;
    340 }
    341 
    342 fn parse_return() Ast {
    343 	// TODO check for return required/type
    344 	var node Ast = ast_make_simple(AST_RETURN, 0);
    345 	if ctx.tok == tSEMI {
    346 		next();
    347 	} else {
    348 		//	error("return types do not match");
    349 		node.left = parse_expr();
    350 		require(tSEMI);
    351 	}
    352 	return node;
    353 }
    354 
    355 fn parse_break() Ast {
    356 	// TODO: break-to-labeled-loop support
    357 	var scope Scope = scope_find(SCOPE_LOOP);
    358 	if scope == nil {
    359 		error("break must be used from inside a looping construct");
    360 	}
    361 	require(tSEMI);
    362 	return ast_make_simple(AST_BREAK, 0);
    363 }
    364 
    365 fn parse_continue() Ast {
    366 	// TODO: continue-to-labeled-loop support
    367 	var scope Scope = scope_find(SCOPE_LOOP);
    368 	if scope == nil {
    369 		error("continue must be used from inside a looping construct");
    370 	}
    371 	require(tSEMI);
    372 	return ast_make_simple(AST_CONTINUE, 0);
    373 }
    374 
    375 fn parse_struct_init(sym Symbol) {
    376 	while true {
    377 		if ctx.tok == tCBRACE {
    378 			next();
    379 			break;
    380 		}
    381 		var name String = parse_name("field name");
    382 		var field Symbol = sym.type.list;
    383 		while true { // TODO: field_find
    384 			if field == nil {
    385 				error("structure has no '", @str name.text, "' field");
    386 			}
    387 			if field.name == name {
    388 				break;
    389 			}
    390 			field = field.next;
    391 		}
    392 		require(tCOLON);
    393 		if ctx.tok == tOBRACE {
    394 			next();
    395 			parse_struct_init(field);
    396 		} else {
    397 			parse_expr();
    398 		}
    399 		if ctx.tok != tCBRACE {
    400 			require(tCOMMA);
    401 		}
    402 	}
    403 }
    404 
    405 fn parse_array_init(sym Symbol) {
    406 	while true {
    407 		if ctx.tok == tCBRACE {
    408 			next();
    409 			break;
    410 		}
    411 		parse_expr();
    412 		if ctx.tok != tCBRACE {
    413 			require(tCOMMA);
    414 		}
    415 	}
    416 }
    417 
    418 fn parse_var() {
    419 	// TODO: global vs local
    420 	// TODO: generate assignment/initialization
    421 	var name String = parse_name("variable name");
    422 	var type Type = parse_type(false);
    423 	var sym Symbol = symbol_make(name, type);
    424 
    425 	if ctx.tok == tASSIGN {
    426 		next();
    427 		if ctx.tok == tOBRACE {
    428 			next();
    429 			if type.kind == TYPE_STRUCT {
    430 				parse_struct_init(sym);
    431 			} else if type.kind == TYPE_ARRAY {
    432 				parse_array_init(sym);
    433 			} else {
    434 				error("type ", @str type.name.text,
    435 					" cannot be initialized with {} expr");
    436 			}
    437 		} else {
    438 			parse_expr();
    439 		}
    440 	} else {
    441 		// default init
    442 	}
    443 	require(tSEMI);
    444 }
    445 
    446 fn _parse_expr_statement() Ast {
    447 	var node Ast = parse_expr();
    448 	var expr Ast = nil;
    449 	var op u32;
    450 
    451 	if ctx.tok == tASSIGN {
    452 		// basic assignment
    453 		next();
    454 		return ast_make_lr(AST_ASSIGN, node, parse_expr());
    455 	} else if (ctx.tok & tcMASK) == tcAEQOP {
    456 		// +=, etc
    457 		op = (ctx.tok - tADDEQ) + AST_ADD;
    458 		next();
    459 		expr = parse_expr();
    460 	} else if (ctx.tok & tcMASK) == tcMEQOP {
    461 		// *=, etc
    462 		op = (ctx.tok - tMULEQ) + AST_MUL;
    463 		next();
    464 		expr = parse_expr();
    465 	} else if (ctx.tok == tINC) {
    466 		op = AST_ADD;
    467 		next();
    468 		expr = ast_make_const(1, ctx.type_i32);
    469 	} else if (ctx.tok == tDEC) {
    470 		op = AST_SUB;
    471 		expr = ast_make_const(1, ctx.type_i32);
    472 	} else {
    473 		// simple expression
    474 		return node;
    475 	}
    476 
    477 	// TODO duplicate node instead of sharing it
    478 	expr = ast_make_lr(op, node, expr);
    479 	return ast_make_lr(AST_ASSIGN, node, expr);
    480 }
    481 
    482 fn parse_expr_statement() Ast {
    483 	var stmt Ast = ast_make_l(AST_EXPR, _parse_expr_statement());
    484 	require(tSEMI);
    485 	return stmt;
    486 }
    487 
    488 fn parse_block() Ast {
    489 	var block Ast = ast_make_simple(AST_BLOCK, 0);
    490 	var last Ast = nil;
    491 	var node Ast;
    492 	while true {
    493 		if ctx.tok == tCBRACE {
    494 			next();
    495 			break;
    496 		} else if ctx.tok == tRETURN {
    497 			next();
    498 			node = parse_return();
    499 		} else if ctx.tok == tBREAK {
    500 			next();
    501 			node = parse_break();
    502 		} else if ctx.tok == tCONTINUE {
    503 			next();
    504 			node = parse_continue();
    505 		} else if ctx.tok == tWHILE {
    506 			next();
    507 			node = parse_while();
    508 		} else if ctx.tok == tIF {
    509 			next();
    510 			node = parse_if();
    511 		} else if ctx.tok == tVAR {
    512 			next();
    513 			parse_var();
    514 		} else if ctx.tok == tSEMI {
    515 			next();
    516 			// empty statement
    517 			continue;
    518 		} else {
    519 			node = parse_expr_statement();
    520 		}
    521 		if last == nil {
    522 			block.left = node;
    523 		} else {
    524 			last.next = node;
    525 		}
    526 		last = node;
    527 	}
    528 	return block;
    529 }
    530 
    531 fn parse_param(fname String) Symbol {
    532 	var pname String = parse_name("parameter name");
    533 	var ptype Type = parse_type(false);
    534 	if symbol_find(pname) != nil {
    535 		error("duplicate parameter name '", @str pname.text, "'");
    536 	}
    537 	return symbol_make(pname, ptype);
    538 }
    539 
    540 fn parse_fn_body(sym Symbol) Ast {
    541 	ctx.cur_fn = sym;
    542 	require(tOBRACE);
    543 	scope_push(SCOPE_FUNC); // parameters, from fn
    544 	scope_push(SCOPE_BLOCK);
    545 	var node Ast = parse_block();
    546 	scope_pop(); // stow in node?
    547 	scope_pop();
    548 	ctx.cur_fn = nil;
    549 	return node;
    550 }
    551 
    552 fn parse_fn() Ast {
    553 	var fname String = parse_name("function name");
    554 	var rtype Type = ctx.type_void;
    555 
    556 	scope_push(SCOPE_FUNC);
    557 
    558 	require(tOPAREN);
    559 	var n u32 = 0;
    560 	if ctx.tok != tCPAREN {
    561 		parse_param(fname);
    562 		while ctx.tok == tCOMMA {
    563 			next();
    564 			parse_param(fname);
    565 		}
    566 		n++;
    567 	}
    568 	require(tCPAREN);
    569 
    570 	if ctx.tok != tOBRACE {
    571 		rtype = parse_type(false);
    572 	}
    573 
    574 	var sym Symbol = symbol_make_global(fname, rtype);
    575 	sym.kind = SYMBOL_FN;
    576 	sym.type = type_make(fname, TYPE_FN, rtype, nil, n);
    577 
    578 	var node Ast = ast_make_simple(AST_FUNC, 0);
    579 	node.name = fname;
    580 	node.sym = sym;
    581 	node.right = parse_fn_body(sym);
    582 
    583 	// save parameters
    584 	sym.type.list = scope_pop();
    585 
    586 	return node;
    587 }
    588 
    589 fn parse_enum_def() {
    590 	if ctx.tok == tIDN {
    591 		var name String = parse_name("enum name");
    592 		type_make(name, TYPE_ENUM, nil, nil, 0);
    593 	}
    594 
    595 	require(tOBRACE);
    596 	var val u32 = 0;
    597 	while ctx.tok != tCBRACE {
    598 		var name String = parse_name("enum tag name");
    599 		var sym Symbol = symbol_find(name);
    600 		if sym != nil {
    601 			error("cannot redfine '", @str name.text, "' as enum tag");
    602 		}
    603 		sym = symbol_make_global(name, ctx.type_u32);
    604 		sym.kind = SYMBOL_DEF;
    605 		if ctx.tok == tASSIGN {
    606 			next();
    607 			parse_expr();
    608 			// TODO val <- expr
    609 		} else {
    610 			val++;
    611 		}
    612 		require(tCOMMA);
    613 	}
    614 	require(tCBRACE);
    615 	require(tSEMI);
    616 }
    617 
    618 fn parse_init() {
    619 	ctx.program = ast_make_simple(AST_PROGRAM, 0);
    620 	ctx.last = nil;
    621 }
    622 
    623 fn parse_program() Ast {
    624 	while true {
    625 		if ctx.tok == tENUM {
    626 			next();
    627 			parse_enum_def();
    628 		} else if ctx.tok == tSTRUCT {
    629 			next();
    630 			var name String = parse_name("struct name");
    631 			parse_struct_type(name);
    632 			require(tSEMI);
    633 		} else if ctx.tok == tFN {
    634 			next();
    635 			var node Ast = parse_fn();
    636 			if ctx.last == nil {
    637 				ctx.program.left = node;
    638 			} else {
    639 				ctx.last.next = node;
    640 			}
    641 			ctx.last = node;
    642 		} else if ctx.tok == tVAR {
    643 			next();
    644 			parse_var();
    645 		} else if ctx.tok == tEOF {
    646 			break;
    647 		} else {
    648 			expected("function, variable, or type definition");
    649 		}
    650 	}
    651 	return ctx.program;
    652 }
    653