rewriter.c - compiler - Unnamed Compiled Systems Language Project

rewriter.c (17644B)
      1 // Copyright 2020, Brian Swetland <swetland@frotz.net>
      2 // Licensed under the Apache License, Version 2.0.
      3 
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <stdarg.h>
      7 #include <stdint.h>
      8 #include <stdbool.h>
      9 #include <strings.h>
     10 #include <string.h>
     11 
     12 #include <fcntl.h>
     13 #include <unistd.h>
     14 #include <sys/stat.h>
     15 
     16 #define nil 0
     17 
     18 void error(const char *fmt, ...);
     19 
     20 typedef uint32_t u32;
     21 typedef int32_t i32;
     22 typedef uint8_t u8;
     23 
     24 enum { FNMAXARGS = 8, };
     25 
     26 // token classes (tok & tcMASK)
     27 enum {
     28 	tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18,
     29 	tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8,
     30 };
     31 
     32 typedef enum {
     33 	// EndMarks, Braces, Brackets Parens
     34 	tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN,
     35 	// RelOps (do not reorder)
     36 	tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F,
     37 	// AddOps (do not reorder)
     38 	tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17,
     39 	// MulOps (do not reorder)
     40 	tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F,
     41 	// AsnOps (do not reorder)
     42 	tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27,
     43 	tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F,
     44 	// Various, UnaryNot, LogicalOps,
     45 	tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG,
     46 	tASSIGN, tINC, tDEC, tHASH, tARROW,
     47 	// Keywords
     48 	tTYPEDEF, tSTRUCT, tVAR, tENUM,
     49 	tIF, tELSE, tWHILE,
     50 	tBREAK, tCONTINUE, tRETURN,
     51 	tFOR, tSWITCH, tCASE,
     52 	tTRUE, tFALSE, tNIL,
     53 	tIDN, tNUM, tSTR, tTYPE,
     54 	// used internal to the lexer but never returned
     55 	tSPC, tINV, tDQT, tSQT, tMSC, tTAB
     56 } token_t;
     57 
     58 char *tnames[] = {
     59 	"<EOF>", "<EOL>", "{",  "}",  "[",   "]",   "(",   ")",
     60 	"==",    "!=",    "<",  "<=", ">",   ">=",  "",    "",
     61 	"+",     "-",     "|",  "^",  "",    "",    "",    "",
     62 	"*",     "/",     "%",  "&",  "&~",  "<<",  ">>",  "",
     63 	"+=",    "-=",    "|=", "^=", "",    "",    "",    "",
     64 	"*=",    "/=",    "%=", "&=", "&~=", "<<=", ">>=", "",
     65 	";",     ":",     ".",  ",",  "~",   "&&",  "||",  "!",
     66 	"=",     "++",    "--", "#", "->",
     67 	"typedef", "struct", "var", "enum",
     68 	"if", "else", "while",
     69 	"break", "continue", "return",
     70 	"for", "switch", "case",
     71 	"true", "false", "nil",
     72 	"<ID>", "<NUM>", "<STR>", "<TYPE>",
     73 	"<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", "<TAB>"
     74 };
     75 
     76 u8 lextab[256] = {
     77 	tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     78 	tINV, tTAB, tEOL, tSPC, tINV, tSPC, tINV, tINV,
     79 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     80 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     81 	tSPC, tBANG, tDQT, tHASH, tMSC, tPERCENT, tAMP, tSQT,
     82 	tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH,
     83 	tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM,
     84 	tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC,
     85 	tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
     86 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
     87 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
     88 	tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN,
     89 	tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
     90 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
     91 	tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN,
     92 	tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV,
     93 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     94 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     95 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     96 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     97 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     98 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
     99 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    100 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    101 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    102 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    103 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    104 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    105 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    106 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    107 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    108 	tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV,
    109 };
    110 
    111 typedef struct StringRec* String;
    112 typedef struct StringRec StringRec;
    113 
    114 
    115 struct StringRec {
    116 	String next;
    117 	u32 len;
    118 	u32 kind;
    119 	char text[0];
    120 };
    121 
    122 #define KindNone 0
    123 #define KindType 1
    124 #define KindKeyword 2
    125 
    126 // ------------------------------------------------------------------
    127 
    128 struct CtxRec {
    129 	const char* filename;  // filename of active source
    130 	int fd;
    131 
    132 	u8 iobuffer[1024];     // scanner file io buffer
    133 	u32 ionext;
    134 	u32 iolast;
    135 
    136 	u32 linenumber;        // line number of most recent line
    137 	u32 lineoffset;        // position of start of most recent line
    138 	u32 byteoffset;        // position of the most recent character
    139 	u32 flags;
    140 	u32 cc;                // scanner: next character
    141 
    142 	token_t tok;           // most recent token
    143 	u32 num;               // used for tNUM
    144 	char tmp[256];         // used for tIDN, tSTR;
    145 	String ident;          // used for tIDN
    146 
    147 	String strtab;         // TODO: hashtable
    148 };
    149 
    150 struct CtxRec ctx;
    151 
    152 String make_string(const char* text, u32 len, u32 kind) {
    153 	// OPT obviously this wants to be a hash table
    154 	String str = ctx.strtab;
    155 	while (str != nil) {
    156 		if ((str->len == len) && (memcmp(text, str->text, len) == 0)) {
    157 			if ((str->kind != kind) && (kind != tIDN)) {
    158 				error("string '%s' already kind %u\n", str->text, str->kind);
    159 			}
    160 			return str;
    161 		}
    162 		str = str->next;
    163 	}
    164 
    165 	str = malloc(sizeof(StringRec) + len + 1);
    166 	str->len = len;
    167 	str->kind = kind;
    168 	memcpy(str->text, text, len);
    169 	str->text[len] = 0;
    170 	str->next = ctx.strtab;
    171 	ctx.strtab = str;
    172 
    173 	return str;
    174 }
    175 
    176 void make_keyword(const char* text, u32 tok) {
    177 	make_string(text, strlen(text), tok);
    178 }
    179 
    180 void make_type(const char* text) {
    181 	make_string(text, strlen(text), tTYPE);
    182 }
    183 
    184 int is_type(String str) {
    185 	return str->kind == 0x1000;
    186 }
    187 
    188 void init_ctx() {
    189 	memset(&ctx, 0, sizeof(ctx));
    190 
    191 	make_type("u8");
    192 	make_type("u32");
    193 	make_type("i32");
    194 	make_type("void");
    195 	make_type("str");
    196 	make_type("strptr");
    197 	make_type("bool");
    198 	make_type("token_t");
    199 
    200 	// pre-intern keywords
    201 	make_keyword("if", tIF);
    202 	//make_keyword("for", tFOR);
    203 	make_keyword("nil", tNIL);
    204 	make_keyword("else", tELSE);
    205 	make_keyword("enum", tENUM);
    206 	make_keyword("true", tTRUE);
    207 	make_keyword("false", tFALSE);
    208 	make_keyword("typedef", tTYPEDEF);
    209 	make_keyword("break", tBREAK);
    210 	make_keyword("while", tWHILE);
    211 	make_keyword("struct", tSTRUCT);
    212 	make_keyword("return", tRETURN);
    213 	make_keyword("continue", tCONTINUE);
    214 }
    215 
    216 void dump_file_line(const char* fn, u32 offset) {
    217 	int fd = open(fn, O_RDONLY);
    218 	if (fd < 0) {
    219 		return;
    220 	}
    221 	if (lseek(fd, offset, SEEK_SET) != offset) {
    222 		close(fd);
    223 		return;
    224 	}
    225 	char line[256];
    226 	int r = read(fd, line, 255);
    227 	if (r > 0) {
    228 		line[r] = 0;
    229 		int n = 0;
    230 		while (n < r) {
    231 			if (line[n] == '\n') {
    232 				line[n] = 0;
    233 				break;
    234 			}
    235 			n++;
    236 		}
    237 		fprintf(stderr, "\n%s", line);
    238 	}
    239 	close(fd);
    240 }
    241 
    242 void error(const char *fmt, ...) {
    243 	va_list ap;
    244 
    245 	fprintf(stderr,"\n\n%s:%d: ", ctx.filename, ctx.linenumber);
    246 	va_start(ap, fmt);
    247 	vfprintf(stderr, fmt, ap);
    248 	va_end(ap);
    249 	if (ctx.linenumber > 0) {
    250 		dump_file_line(ctx.filename, ctx.lineoffset);
    251 	}
    252 	fprintf(stderr, "\n\n");
    253 	exit(1);
    254 }
    255 
    256 void load(const char* filename) {
    257 	ctx.filename = filename;
    258 	ctx.linenumber = 0;
    259 
    260 	if (ctx.fd >= 0) {
    261 		close(ctx.fd);
    262 	}
    263 	ctx.fd = open(filename, O_RDONLY);
    264 	if (ctx.fd < 0) {
    265 		error("cannot open file '%s'", filename);
    266 	}
    267 	ctx.ionext = 0;
    268 	ctx.iolast = 0;
    269 	ctx.linenumber = 1;
    270 	ctx.lineoffset = 0;
    271 	ctx.byteoffset = 0;
    272 }
    273 
    274 int unhex(u32 ch) {
    275 	if ((ch >= '0') && (ch <= '9')) {
    276 		return ch - '0';
    277 	}
    278 	if ((ch >= 'a') && (ch <= 'f')) {
    279 		return ch - 'a' + 10;
    280 	}
    281 	if ((ch >= 'A') && (ch <= 'F')) {
    282 		return ch - 'A' + 10;
    283 	}
    284 	return -1;
    285 }
    286 
    287 u32 scan() {
    288 	while (ctx.ionext == ctx.iolast) {
    289 		if (ctx.fd < 0) {
    290 			ctx.cc = 0;
    291 			return ctx.cc;
    292 		}
    293 		int r = read(ctx.fd, ctx.iobuffer, sizeof(ctx.iobuffer));
    294 		if (r <= 0) {
    295 			ctx.fd = -1;
    296 		} else {
    297 			ctx.iolast = r;
    298 			ctx.ionext = 0;
    299 		}
    300 	}
    301 	ctx.cc = ctx.iobuffer[ctx.ionext];
    302 	ctx.ionext++;
    303 	ctx.byteoffset++;
    304 	return ctx.cc;
    305 }
    306 
    307 u32 unescape(u32 n) {
    308 	if (n == 'n') {
    309 		return 10;
    310 	} else if (n == 'r') {
    311 		return 13;
    312 	} else if (n == 't') {
    313 		return 9;
    314 	} else if (n == '"') {
    315 		return '"';
    316 	} else if (n == '\'') {
    317 		return '\'';
    318 	} else if (n == '\\') {
    319 		return '\\';
    320 	} else if (n == 'x') {
    321 		int x0 = unhex(scan());
    322 		int x1 = unhex(scan());
    323 		if ((x0 < 0) || (x1 < 0)) {
    324 			error("invalid hex escape");
    325 		}
    326 		return (x0 << 4) | x1;
    327 	} else {
    328 		error("invalid escape 0x%02x", n);
    329 		return 0;
    330 	}
    331 }
    332 
    333 token_t scan_string(u32 cc, u32 nc) {
    334 	u32 n = 0;
    335 	while (true) {
    336 		if (nc == '"') {
    337 			nc = scan();
    338 			break;
    339 		} else if (nc == 0) {
    340 			error("unterminated string");
    341 		} else if (nc == '\\') {
    342 			ctx.tmp[n] = unescape(scan());
    343 		} else {
    344 			ctx.tmp[n] = nc;
    345 		}
    346 		nc = scan();
    347 		n++;
    348 		if (n == 255) {
    349 			ctx.tmp[n] = 0;
    350 			error("constant string too large '%s'", ctx.tmp);
    351 		}
    352 	}
    353 	ctx.tmp[n] = 0;
    354 	return tSTR;
    355 }
    356 
    357 token_t scan_keyword(u32 len) {
    358 	ctx.tmp[len] = 0;
    359 	String idn = make_string(ctx.tmp, len, tIDN);
    360 	ctx.ident = idn;
    361 
    362 	return idn->kind;
    363 }
    364 
    365 token_t scan_number(u32 cc, u32 nc) {
    366 	u32 n = 1;
    367 	u32 val = cc - '0';
    368 
    369 	if ((cc == '0') && (nc == 'b')) { // binary
    370 		nc = scan();
    371 		while ((nc == '0') || (nc == '1')) {
    372 			val = (val << 1) | (nc - '0');
    373 			nc = scan();
    374 			n++;
    375 			if (n == 34) {
    376 				error("binary constant too large");
    377 			}
    378 		}
    379 	} else if ((cc == '0') && (nc == 'x')) { // hex
    380 		nc = scan();
    381 		while (true) {
    382 			int tmp = unhex(nc);
    383 			if (tmp == -1) {
    384 				break;
    385 			}
    386 			val = (val << 4) | tmp;
    387 			nc = scan();
    388 			n++;
    389 			if (n == 10) {
    390 				error("hex constant too large");
    391 			}
    392 		}
    393 	} else { // decimal
    394 		while (lextab[nc] == tNUM) {
    395 			u32 tmp = (val * 10) + (nc - '0');
    396 			if (tmp <= val) {
    397 				error("decimal constant too large");
    398 			}
    399 			val = tmp;
    400 			nc = scan();
    401 			n++;
    402 		}
    403 	}
    404 	ctx.num = val;
    405 	return tNUM;
    406 }
    407 
    408 token_t scan_ident(u32 cc, u32 nc) {
    409 	ctx.tmp[0] = cc;
    410 	u32 n = 1;
    411 
    412 	while (true) {
    413 		u32 tok = lextab[nc];
    414 		if ((tok == tIDN) || (tok == tNUM)) {
    415 			ctx.tmp[n] = nc;
    416 			n++;
    417 			if (n == 32) { error("identifier too large"); }
    418 			nc = scan();
    419 		} else {
    420 			break;
    421 		}
    422 	}
    423 	return scan_keyword(n);
    424 }
    425 
    426 token_t _next(int ws) {
    427 	u8 nc = ctx.cc;
    428 	while (true) {
    429 		u8 cc = nc;
    430 		nc = scan();
    431 		u32 tok = lextab[cc];
    432 		if (tok == tNUM) { // 0..9
    433 			return scan_number(cc, nc);
    434 		} else if (tok == tIDN) { // _ A..Z a..z
    435 			return scan_ident(cc, nc);
    436 		} else if (tok == tDQT) { // "
    437 			return scan_string(cc, nc);
    438 		} else if (tok == tSQT) { // '
    439 			ctx.num = nc;
    440 			if (nc == '\\') {
    441 				ctx.num = unescape(scan());
    442 			}
    443 			nc = scan();
    444 			if (nc != '\'') {
    445 				error("unterminated character constant");
    446 			}
    447 			nc = scan();
    448 			return tNUM;
    449 		} else if (tok == tPLUS) {
    450 			if (nc == '+') { tok = tINC; nc = scan(); }
    451 		} else if (tok == tMINUS) {
    452 			if (nc == '-') { tok = tDEC; nc = scan(); }
    453 			else if (nc == '>') { tok = tARROW; nc = scan(); }
    454 		} else if (tok == tAMP) {
    455 			if (nc == '&') { tok = tAND; nc = scan(); }
    456 			else if (nc == '~') { tok = tANDNOT; nc = scan(); }
    457 		} else if (tok == tPIPE) {
    458 			if (nc == '|') { tok = tOR; nc = scan(); }
    459 		} else if (tok == tGT) {
    460 			if (nc == '=') { tok = tGE; nc = scan(); }
    461 			else if (nc == '>') { tok = tRIGHT; nc = scan(); }
    462 		} else if (tok == tLT) {
    463 			if (nc == '=') { tok = tLE; nc = scan(); }
    464 			else if (nc == '<') { tok = tLEFT; nc = scan(); }
    465 		} else if (tok == tASSIGN) {
    466 			if (nc == '=') { tok = tEQ; nc = scan(); }
    467 		} else if (tok == tBANG) {
    468 			if (nc == '=') { tok = tNE; nc = scan(); }
    469 		} else if (tok == tSLASH) {
    470 			if (nc == '/') {
    471 				if (ws) printf("/");
    472 				// comment -- consume until EOL or EOF
    473 				while ((nc != '\n') && (nc != 0)) {
    474 					if (ws) printf("%c", nc);
    475 					nc = scan();
    476 				}
    477 				continue;
    478 			}
    479 		} else if (tok == tHASH) {
    480 			while ((nc != '\n') && (nc != 0)) {
    481 				nc = scan();
    482 			}
    483 			continue;
    484 		} else if (tok == tEOL) {
    485 			ctx.linenumber++;
    486 			ctx.lineoffset = ctx.byteoffset;
    487 			//ctx.xref[ctx.pc / 4] = ctx.linenumber;
    488 			//if (ctx.flags & cfVisibleEOL) {
    489 			//	return tEOL;
    490 			//}
    491 			if (ws) printf("\n");
    492 			continue;
    493 		} else if (tok == tSPC) {
    494 			if (ws) printf(" ");
    495 			continue;
    496 		} else if (tok == tTAB) {
    497 			if (ws) printf("\t");
    498 			continue;
    499 		} else if ((tok == tMSC) || (tok == tINV)) {
    500 			error("unknown character 0x%02x", cc);
    501 		}
    502 
    503 		// if we're an AddOp or MulOp, followed by an '='
    504 		if (((tok & 0xF0) == 0x20) && (nc == '=')) {
    505 			nc = scan();
    506 			// transform us to a XEQ operation
    507 			tok = tok + 0x10;
    508 		}
    509 
    510 		return tok;
    511 	}
    512 }
    513 
    514 token_t next() {
    515 	return (ctx.tok = _next(1));
    516 }
    517 
    518 token_t nextq() {
    519 	return (ctx.tok = _next(0));
    520 }
    521 
    522 void printstr() {
    523 	u32 n = 0;
    524 	printf("\"");
    525 	while (n < 256) {
    526 		u32 ch = ctx.tmp[n];
    527 		if (ch == 0) {
    528 			break;
    529 		} else if ((ch < ' ') || (ch > '~')) {
    530 			printf("\\x%02x", ch);
    531 		} else if ((ch == '"') || (ch == '\\')) {
    532 			printf("\\%c", ch);
    533 		} else {
    534 			printf("%c", ch);
    535 		}
    536 		n++;
    537 	}
    538 	printf("\"");
    539 }
    540 
    541 void print() {
    542 	if (ctx.tok == tNUM) {
    543 		printf("%u ", ctx.num);
    544 	} else if (ctx.tok == tIDN) {
    545 		printf("@%s ", ctx.tmp);
    546 	} else if (ctx.tok == tTYPE) {
    547 		printf("@@%s ", ctx.tmp);
    548 	} else if (ctx.tok == tEOL) {
    549 		printf("\n");
    550 	} else if (ctx.tok == tSTR) {
    551 		printstr();
    552 	} else {
    553 		printf("%s ", tnames[ctx.tok]);
    554 	}
    555 }
    556 
    557 void emit() {
    558 	if (ctx.tok == tNUM) {
    559 		printf("%u", ctx.num);
    560 	} else if (ctx.tok == tIDN) {
    561 		printf("%s", ctx.tmp);
    562 	} else if (ctx.tok == tTYPE) {
    563 		printf("%s", ctx.tmp);
    564 	} else if (ctx.tok == tSTR) {
    565 		printstr();
    566 	} else {
    567 		printf("%s", tnames[ctx.tok]);
    568 	}
    569 }
    570 
    571 void expected(const char* what) {
    572 	error("expected %s, found %s", what, tnames[ctx.tok]);
    573 }
    574 
    575 void expect(token_t tok) {
    576 	if (ctx.tok != tok) {
    577 		error("expected %s, found %s", tnames[tok], tnames[ctx.tok]);
    578 	}
    579 }
    580 
    581 void require(token_t tok) {
    582 	expect(tok);
    583 	emit();
    584 	next();
    585 }
    586 
    587 void requireq(token_t tok) {
    588 	expect(tok);
    589 	nextq();
    590 }
    591 
    592 void parse_enum() {
    593 	printf("enum ");
    594 	require(tOBRACE);
    595 	while (ctx.tok != tCBRACE) {
    596 		emit();
    597 		next();
    598 	}
    599 	require(tCBRACE);
    600 	require(tSEMI);
    601 }
    602 
    603 void parse_expr() {
    604 	while (ctx.tok != tCPAREN) {
    605 		if (ctx.tok == tOPAREN) {
    606 			printf("(");
    607 			next();
    608 			parse_expr();
    609 		} else {
    610 			emit();
    611 			next();
    612 		}
    613 	}
    614 	require(tCPAREN);
    615 	printf(")");
    616 }
    617 
    618 void parse_block() {
    619 	unsigned start = 1;
    620 	while (ctx.tok != tCBRACE) {
    621 		if (start) {
    622 			start = 0;
    623 		}
    624 		if (ctx.tok == tOPAREN) {
    625 			printf("(");
    626 			next();
    627 			parse_expr();
    628 		} else if (ctx.tok == tOBRACE) {
    629 			printf("{");
    630 			next();
    631 			parse_block();
    632 			start = 1;
    633 		} else if (ctx.tok == tSEMI) {
    634 			printf(";");
    635 			next();
    636 			start = 1;
    637 		} else {
    638 			emit();
    639 			next();
    640 		}
    641 	}
    642 	require(tCBRACE);
    643 }
    644 		
    645 void parse_func(String type, String name) {
    646 	printf("func %s(", name->text);
    647 	while (ctx.tok != tCPAREN) {
    648 		String pt = ctx.ident;
    649 		nextq();
    650 		String pn = ctx.ident;
    651 		nextq();
    652 		printf("%s %s", pn->text, pt->text);
    653 		if (ctx.tok == tCOMMA) {
    654 			printf(",");
    655 			next();
    656 		}
    657 	}
    658 	require(tCPAREN);
    659 	if (ctx.tok == tSEMI) {
    660 		printf(" %s;", type->text);
    661 		next();
    662 		return;
    663 	}
    664 	printf("%s ", type->text);
    665 	require(tOBRACE);
    666 	parse_block();
    667 }
    668 
    669 void parse_array(String type, String name) {
    670 	u32 n = ctx.num;
    671 	if (ctx.tok == tCBRACK) {
    672 		next();
    673 		printf("var %s []%s", name->text, type->text);
    674 	} else {
    675 		requireq(tNUM);
    676 		requireq(tCBRACK);
    677 		printf("var %s [%u]%s", name->text, n, type->text);
    678 	}
    679 	if (ctx.tok == tSEMI) {
    680 		printf(";");
    681 		next();
    682 	} else if (ctx.tok == tASSIGN) {
    683 		printf(" =");
    684 		next();
    685 		require(tOBRACE);
    686 		while (ctx.tok != tCBRACE) {
    687 			emit();
    688 			next();
    689 		}
    690 		require(tCBRACE);
    691 		require(tSEMI);
    692 	} else {
    693 		error("LOST");
    694 	}
    695 }
    696 
    697 void parse_program() {
    698 	next();
    699 
    700 	for (;;) {
    701 		if (ctx.tok == tENUM) {
    702 			nextq();
    703 			parse_enum();
    704 		} else if (ctx.tok == tTYPE) {
    705 			String type = ctx.ident;
    706 			requireq(tTYPE);
    707 			String ident = ctx.ident;
    708 			requireq(tIDN);
    709 			if (ctx.tok == tOBRACK) { // array
    710 				next();
    711 				parse_array(type, ident);
    712 			} else if(ctx.tok == tOPAREN) { // func
    713 				next();
    714 				parse_func(type, ident);
    715 			} else { // global var
    716 				printf("var %s %s", ident->text, type->text);
    717 				while (ctx.tok != tSEMI) {
    718 					emit();
    719 					next();
    720 				}
    721 				require(tSEMI);
    722 			}
    723 		} else if (ctx.tok == tTYPEDEF) {
    724 			nextq();
    725 			requireq(tSTRUCT);
    726 			String t1 = ctx.ident;
    727 			nextq();
    728 			if (ctx.tok == tSTAR) {
    729 				nextq();
    730 				String t2 = ctx.ident;
    731 				next();
    732 				t2->kind = tTYPE;
    733 				printf("type %s *%s", t2->text, t1->text);
    734 			} else {
    735 				next();
    736 				t1->kind = tTYPE;
    737 				printf("type %s", t1->text);
    738 			}
    739 			require(tSEMI);
    740 		} else if (ctx.tok == tSTRUCT) {
    741 			nextq();
    742 			String n = ctx.ident;
    743 			nextq();
    744 			n->kind = tTYPE;
    745 			printf("type %s struct ", n->text);
    746 			require(tOBRACE);
    747 			while (ctx.tok != tCBRACE) {
    748 				emit();
    749 				next();
    750 			}
    751 			require(tCBRACE);
    752 			require(tSEMI);
    753 		} else if (ctx.tok == tEOF) {
    754 			return;
    755 		} else {
    756 			expected("top level entity");
    757 		}
    758 	}
    759 }
    760 
    761 // ================================================================
    762 
    763 int main(int argc, char **argv) {
    764 	const char *outname = "out.c";
    765 	const char *srcname = nil;
    766 	bool dump = false;
    767 	bool scan_only = false;
    768 
    769 	init_ctx();
    770 	ctx.filename = "<commandline>";
    771 
    772 	while (argc > 1) {
    773 		if (!strcmp(argv[1],"-o")) {
    774 			if (argc < 2) {
    775 				error("option -o requires argument");
    776 			}
    777 			outname = argv[2];
    778 			argc--;
    779 			argv++;
    780 		} else if (!strcmp(argv[1], "-p")) {
    781 			dump = true;
    782 		} else if (!strcmp(argv[1], "-s")) {
    783 			scan_only = true;
    784 		} else if (argv[1][0] == '-') {
    785 			error("unknown option: %s", argv[1]);
    786 		} else {
    787 			if (srcname != nil) {
    788 				error("multiple source files disallowed");
    789 			} else {
    790 				srcname = argv[1];
    791 			}
    792 		}
    793 		argc--;
    794 		argv++;
    795 	}
    796 
    797 	if (srcname == nil) {
    798 		error("no file specified");
    799 	}
    800 	ctx.filename = srcname;
    801 
    802 	load(srcname);
    803 	ctx.linenumber = 1;
    804 	ctx.lineoffset = 0;
    805 	// prime the lexer
    806 	scan();
    807 
    808 	if (scan_only) {
    809 		ctx.flags |= 1;
    810 		while (true) {
    811 			next();
    812 			print();
    813 			if (ctx.tok == tEOF) {
    814 				return 0;
    815 			}
    816 		}
    817 	}
    818 
    819 	parse_program();
    820 
    821 	return 0;
    822 }
	compiler Unnamed Compiled Systems Language Project
	git clone http://frotz.net/git/compiler.git
	Log \| Files \| Refs