dcpu16

Virtual Machine and Assembler for Notch's DCPU-16 Architecture
git clone http://frotz.net/git/dcpu16.git
Log | Files | Refs | README

assembler.c (11943B)


      1 /*
      2  * Copyright (c) 2012, Brian Swetland
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without 
      6  * modification, are permitted provided that the following conditions are met:
      7  *
      8  *   Redistributions of source code must retain the above copyright notice, 
      9  *   this list of conditions and the following disclaimer.
     10  *
     11  *   Redistributions in binary form must reproduce the above copyright 
     12  *   notice, this list of conditions and the following disclaimer in the 
     13  *   documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
     16  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
     17  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
     18  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
     19  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
     20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
     21  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  *
     27  */
     28 
     29 /* A DCPU-16 Assembler */
     30 
     31 /* DCPU-16 Spec is Copyright 2012 Mojang */
     32 /* http://0x10c.com/doc/dcpu-16.txt */
     33 
     34 #include <stdio.h>
     35 #include <stdlib.h>
     36 #include <stdint.h>
     37 #include <stdarg.h>
     38 #include <string.h>
     39 #include <ctype.h>
     40 #include <getopt.h>
     41 
     42 typedef uint8_t u8;
     43 typedef uint16_t u16;
     44 typedef uint32_t u32;
     45 
     46 #define countof(a) (sizeof(a) / sizeof((a)[0]))
     47 
     48 extern u16 *disassemble(u16 *pc, char *out);
     49 
     50 static u16 image[65536] = { 0, };
     51 static u8 note[65536] = { 0, };
     52 static u16 PC = 0;
     53 static FILE *fin;
     54 static const char *filename = "";
     55 static int linenumber = 0;
     56 
     57 static char linebuffer[128] = { 0, };
     58 static char *lineptr = linebuffer;
     59 static int token;
     60 static char tstring[128];
     61 static u16 tnumber;
     62 
     63 enum outformat {
     64 	OUTFORMAT_PRETTY,
     65 	OUTFORMAT_HEX,
     66 	OUTFORMAT_BINARY,
     67 };
     68 
     69 void die(const char *fmt, ...) {
     70 	va_list ap;
     71 	fprintf(stderr,"%s:%d: ", filename, linenumber);
     72 	va_start(ap, fmt);
     73 	vfprintf(stderr, fmt, ap);
     74 	va_end(ap);
     75 	fprintf(stderr,"\n");
     76 	if (linebuffer[0])
     77 		fprintf(stderr,"%s:%d: >> %s <<\n", filename, linenumber, linebuffer);
     78 	exit(1);
     79 }
     80 
     81 struct fixup {
     82 	struct fixup *next;
     83 	struct label *label;
     84 	u16 pc;
     85 };
     86 
     87 struct label {
     88 	struct label *next;
     89 	u16 pc;
     90 	u16 defined;
     91 	char name[1];
     92 };
     93 
     94 struct label *labels = 0;
     95 struct fixup *fixups = 0;
     96 
     97 struct label *mklabel(const char *name, u16 pc, u16 def) {
     98 	struct label *l;
     99 	for (l = labels; l; l = l->next) {
    100 		if (!strcasecmp(name, l->name)) {
    101 			if (def) {
    102 				if (l->defined)
    103 					die("cannot redefine label: %s", name);
    104 				l->defined = def;
    105 				l->pc = pc;
    106 			}
    107 			return l;
    108 		}
    109 	}
    110 	l = malloc(sizeof(*l) + strlen(name));
    111 	l->defined = def;
    112 	l->pc = pc;
    113 	strcpy(l->name, name);
    114 	l->next = labels;
    115 	labels = l;
    116 	return l;
    117 }
    118 
    119 void use_label(const char *name, u16 pc) {
    120 	struct label *l = mklabel(name, 0, 0);
    121 	if (l->defined) {
    122 		image[pc] = l->pc;
    123 	} else {
    124 		struct fixup *f = malloc(sizeof(*f));
    125 		f->next = fixups;
    126 		f->pc = pc;
    127 		f->label = l;
    128 		fixups = f;
    129 	}	
    130 }
    131 
    132 void set_label(const char *name, u16 pc) {
    133 	mklabel(name, pc, 1);
    134 }
    135 
    136 void resolve_fixups(void) {
    137 	struct fixup *f;
    138 	for (f = fixups; f; f = f->next) {
    139 		if (f->label->defined) {
    140 			image[f->pc] = f->label->pc;
    141 		} else {
    142 			die("undefined reference to '%s' at 0x%04x", f->label->name, f->pc);
    143 		}
    144 	}
    145 }
    146 
    147 enum tokens {
    148 	tA, tB, tC, tX, tY, tZ, tI, tJ,
    149 	tR0, tR1, tR2, tR3, tR4, tR5, tR6, tR7,
    150 	tSET, tADD, tSUB, tMUL, tDIV, tMOD, tSHL,
    151 	tSHR, tAND, tBOR, tXOR, tIFE, tIFN, tIFG, tIFB,
    152 	tJSR,
    153 	tPOP, tPEEK, tPUSH, tSP, tPC, tO,
    154 	tJMP, tMOV, tNOP,
    155 	tDATA, tDAT, tDW, tWORD,
    156 	tCOMMA, tOBRACK, tCBRACK, tCOLON, tPLUS,
    157 	tSTRING, tQSTRING, tNUMBER, tEOF,
    158 };
    159 static const char *tnames[] = {
    160 	"A", "B", "C", "X", "Y", "Z", "I", "J",
    161 	"R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7",
    162 	"SET", "ADD", "SUB", "MUL", "DIV", "MOD", "SHL",
    163 	"SHR", "AND", "BOR", "XOR", "IFE", "IFN", "IFG", "IFB",
    164 	"JSR",
    165 	"POP", "PEEK", "PUSH", "SP", "PC", "O",
    166 	"JMP", "MOV", "NOP",
    167 	"DATA", "DAT", "DW", "WORD",
    168 	",", "[", "]", ":", "+",
    169 	"<STRING>", "<QUOTED-STRING>", "<NUMBER>", "<EOF>",
    170 };
    171 #define LASTKEYWORD	tWORD
    172 
    173 int _next(void) {
    174 	char c, *x;
    175 nextline:
    176 	if (!*lineptr) {
    177 		if (feof(fin)) return tEOF;
    178 		if (fgets(linebuffer, 128, fin) == 0) return tEOF;
    179 		lineptr = linebuffer;
    180 		linenumber++;
    181 	}
    182 	while (*lineptr <= ' ') {
    183 		if (*lineptr == 0) goto nextline;
    184 		lineptr++;
    185 	}
    186 	switch ((c = *lineptr++)) {
    187 	case ',': return tCOMMA;
    188 	case '+': return tPLUS;
    189 	case '[': return tOBRACK;
    190 	case ']': return tCBRACK;
    191 	case ':': return tCOLON;
    192 	case '/': case ';': case '#': *lineptr = 0; goto nextline;
    193 	case '"':
    194 		x = tstring;
    195 		for (;;) {
    196 			switch((c = *lineptr++)) {
    197 			case 0:
    198 				die("unterminated string");
    199 			case '"':
    200 				*x = 0;
    201 				return tQSTRING;
    202 			case '\\':
    203 				switch((c = *lineptr++)) {
    204 				case 'n': *x++ = '\n'; break;
    205 				case 't': *x++ = '\t'; break;
    206 				case 'r': *x++ = '\r'; break;
    207 				default:
    208 					*x++ = c; break;
    209 				}
    210 				break;
    211 			default:
    212 				*x++ = c;
    213 			}
    214 		}
    215 	default:
    216 		if (isdigit(c) || ((c == '-') && isdigit(*lineptr))) {
    217 			tnumber = strtoul(lineptr-1, &lineptr, 0);
    218 			return tNUMBER;
    219 		}
    220 		if (isalpha(c) || c == '_') {
    221 			int n;
    222 			x = tstring;
    223 			lineptr--;
    224 			while (isalnum(*lineptr) || *lineptr == '_')
    225 				*x++ = tolower(*lineptr++);
    226 			*x = 0;
    227 			for (n = 0; n <= LASTKEYWORD; n++)
    228 				if (!strcasecmp(tnames[n], tstring))
    229 					return n;
    230 			return tSTRING;
    231 		}
    232 		if ((c == '\'') && (*(lineptr+1) == '\'')) {
    233 			tnumber = (u16) (*lineptr++);
    234 			lineptr++;
    235 			return tNUMBER;
    236 		}
    237 		die("illegal character '%c'", c);
    238 		return tEOF;
    239 	}
    240 }
    241 
    242 int next(void) {
    243 	token = _next();
    244 
    245 	//fprintf(stderr,"%3d %s\n", token, tnames[token]);
    246 	return token;
    247 }
    248 
    249 void expect(int t) {
    250 	if (next() != t)
    251 		die("expecting %s, found %s", tnames[t], tnames[token]);
    252 }
    253 
    254 void assemble_imm_or_label(void) {
    255 	do {
    256 		next();
    257 		if (token == tNUMBER) {
    258 			note[PC] = 'd';
    259 			image[PC++] = tnumber;
    260 		} else if (token == tSTRING) {
    261 			note[PC] = 'd';
    262 			image[PC] = 0;
    263 			use_label(tstring, PC++);
    264 		} else if (token == tQSTRING) {
    265 			char *x = tstring;
    266 			while (*x) {
    267 				note[PC] = 'd';
    268 				image[PC++] = *x++;
    269 			}
    270 		} else {
    271 			die("expected number or label");
    272 		}
    273 		next();
    274 	} while (token == tCOMMA);
    275 }
    276 
    277 int assemble_operand(void) {
    278 	u16 n;
    279 
    280 	next();
    281 	switch (token) {
    282 	case tA: case tB: case tC: case tX:
    283 	case tY: case tZ: case tI: case tJ:
    284 		return token & 7;
    285 	case tR0: case tR1: case tR2: case tR3:
    286 	case tR4: case tR5: case tR6: case tR7:
    287 		return (token - 8) & 7;
    288 	case tPOP: return 0x18;
    289 	case tPEEK: return 0x19;
    290 	case tPUSH: return 0x1a;
    291 	case tSP: return 0x1b;
    292 	case tPC: return 0x1c;
    293 	case tO: return 0x1d;
    294 	case tNUMBER:
    295 		if (tnumber < 0x20)
    296 			return tnumber + 0x20;
    297 		image[PC++] = tnumber;
    298 		return 0x1f;
    299 	case tSTRING:
    300 		image[PC] = 0;
    301 		use_label(tstring, PC++);
    302 		return 0x1f;
    303 	default:
    304 		if (token != tOBRACK)
    305 			die("expected [");
    306 	}
    307 
    308 	/* we must have seen a '[' */
    309 	next();
    310 	switch (token) {
    311 	case tA: case tB: case tC: case tX:
    312 	case tY: case tZ: case tI: case tJ:
    313 		n = token & 7;
    314 		next();
    315 		if (token == tCBRACK)
    316 			return 0x08 | n;
    317 		if ((token != tCOMMA) && (token != tPLUS))
    318 			die("expected , or +");
    319 		next();
    320 		if (token == tSTRING) {
    321 			use_label(tstring, PC++);
    322 		} else if (token == tNUMBER) {
    323 			image[PC++] = tnumber;
    324 		} else {
    325 			die("expected immediate value");
    326 		}
    327 		expect(tCBRACK);
    328 		return 0x10 | n;
    329 	case tSTRING:
    330 		use_label(tstring, PC++);
    331 	case tNUMBER:
    332 		if (token == tNUMBER)
    333 			image[PC++] = tnumber;
    334 
    335 		next();
    336 		if (token == tCBRACK) {
    337 			return 0x1e;
    338 		} else if ((token == tCOMMA) || (token == tPLUS)) {
    339 			next();
    340 			if ((token >= tA) && (token <= tJ)) {
    341 				n = 0x10 | (token & 7);
    342 			} else {
    343 				die("invalid register");
    344 			}
    345 			expect(tCBRACK);
    346 			return n;
    347 		} else {
    348 			die("invalid operand");
    349 		}
    350 	default:
    351 		die("invalid operand");
    352 	}
    353 	return 0;
    354 }
    355 
    356 void assemble_binop(void) {
    357 	u16 pc = PC++;
    358 	int a, b;
    359 	int op = token;
    360 
    361 	/* alias for push x, pop x */
    362 	if (token == tPUSH) {
    363 		op = tSET;
    364 		a = 0x1a; // push
    365 		b = assemble_operand();
    366 	} else if (token == tPOP) {
    367 		op = tSET;
    368 		a = assemble_operand();
    369 		b = 0x18; // pop
    370 	} else if (token == tNOP) {
    371 		// SET 0,0
    372 		op = tSET;
    373 		a = 0x20;
    374 		b = 0x20;
    375 	} else {
    376 		a = assemble_operand();
    377 		expect(tCOMMA);
    378 		b = assemble_operand();
    379 	}
    380 
    381 	/* token to opcode */
    382 	op -= (tSET - 1);
    383 	image[pc] = op | (a << 4) | (b << 10);
    384 }
    385 
    386 void assemble_jump(void) {
    387 	u16 pc = PC++;
    388 	image[pc] = 0x01c1 | (assemble_operand() << 10);
    389 }
    390 
    391 void assemble(const char *fn) {
    392 	u16 pc, n;
    393 	fin = fopen(fn, "r");
    394 	filename = fn;
    395 	linenumber = 0;
    396 	if (!fin) die("cannot read file");
    397 
    398 	for (;;) {
    399 		next();
    400 again:
    401 		switch (token) {
    402 		case tEOF:
    403 			goto done;
    404 		case tSTRING:
    405 			expect(tCOLON);
    406 			set_label(tstring, PC);
    407 			continue;
    408 		case tCOLON:
    409 			expect(tSTRING);
    410 			set_label(tstring, PC);
    411 			continue;
    412 		case tWORD: case tDAT: case tDATA: case tDW:
    413 			assemble_imm_or_label();
    414 			goto again;
    415 		case tJMP: // alias for SET PC, ...
    416 			assemble_jump();
    417 			continue;
    418 		case tMOV: // alias for SET
    419 			token = tSET;
    420 		case tSET: case tADD: case tSUB: case tMUL:
    421 		case tDIV: case tMOD: case tSHL: case tSHR:
    422 		case tAND: case tBOR: case tXOR: case tIFE:
    423 		case tIFN: case tIFG: case tIFB:
    424 		case tPUSH: case tPOP: case tNOP:
    425 			assemble_binop();
    426 			continue;
    427 		case tJSR:
    428 			pc = PC++;
    429 			n = assemble_operand();
    430 			image[pc] = (n << 10) | 0x0010;
    431 			continue;
    432 		default:
    433 			die("unexpected: %s", tnames[token]);
    434 		}
    435 	}
    436 done:
    437 	fclose(fin);
    438 }
    439 
    440 void emit(const char *fn, enum outformat format) {
    441 	FILE *fp;
    442 	u16 *pc = image;
    443 	u16 *end = image + PC;
    444 	u16 *dis = pc;
    445 	filename = fn;
    446 	linenumber = 0;
    447 
    448 	if (!strcmp(fn, "-")) {
    449 		fp = stdout;
    450 	} else {
    451 		fp = fopen(fn, "w");
    452 	}
    453 	if (!fp) die("cannot write file");
    454 
    455 	while (pc < end) {
    456 		if (format == OUTFORMAT_PRETTY) {
    457 			if (note[pc-image] == 'd') {
    458 				fprintf(fp, "%04x\n", *pc);
    459 				dis = pc + 1;
    460 			} else if (pc == dis) {
    461 				char out[128];
    462 				dis = disassemble(pc, out);
    463 				fprintf(fp, "%04x\t%04x:\t%s\n", *pc, (unsigned)(pc-image), out);
    464 			} else {
    465 				fprintf(fp, "%04x\n", *pc);
    466 			}
    467 		} else if (format == OUTFORMAT_HEX) {
    468 			fprintf(fp, "%04x\n", *pc);
    469 		} else if (format == OUTFORMAT_BINARY) {
    470 			/* XXX handle host endian */
    471 			fwrite(pc, sizeof(*pc), 1, fp);
    472 		}
    473 		pc++;
    474 	}
    475 	if (fp != stdout)
    476 		fclose(fp);
    477 }
    478 
    479 static void usage(int argc, char **argv)
    480 {
    481 	fprintf(stderr, "usage: %s [-o output] [-O output_format] <input file(s)>\n", argv[0]);
    482 	fprintf(stderr, "\toutput_format can be one of: pretty, hex, binary\n");
    483 }
    484 
    485 int main(int argc, char **argv) {
    486 	const char *outfn = "out.hex";
    487 	enum outformat oformat = OUTFORMAT_PRETTY;
    488 
    489 	for (;;) {
    490 		int c;
    491 		int option_index = 0;
    492 
    493 		static struct option long_options[] = {
    494 			{"help", 0, 0, 'h'},
    495 			{"output", 1, 0, 'o'},
    496 			{"outformat", 1, 0, 'O'},
    497 			{0, 0, 0, 0},
    498 		};
    499 
    500 		c = getopt_long(argc, argv, "ho:O:", long_options, &option_index);
    501 		if (c == -1)
    502 			break;
    503 
    504 		switch (c) {
    505 			case 'h':
    506 				usage(argc, argv);
    507 				return 0;
    508 			case 'o':
    509 				outfn = optarg;
    510 				break;
    511 			case 'O':
    512 				if (!strcasecmp(optarg, "binary")) {
    513 					oformat = OUTFORMAT_BINARY;
    514 				} else if (!strcasecmp(optarg, "hex")) {
    515 					oformat = OUTFORMAT_HEX;
    516 				} else if (!strcasecmp(optarg, "pretty")) {
    517 					oformat = OUTFORMAT_PRETTY;
    518 				} else {
    519 					usage(argc, argv);
    520 					return 1;
    521 				}
    522 				break;
    523 			default:
    524 				usage(argc, argv);
    525 				return 1;
    526 		}
    527 	}
    528 
    529 	if (argc - optind < 1) {
    530 		usage(argc, argv);
    531 		return 1;
    532 	}
    533 
    534 	argc -= optind;
    535 	argv += optind;
    536 
    537 	while (argc >= 1) {
    538 		assemble(argv[0]);
    539 		argv++;
    540 		argc--;
    541 	}
    542 
    543 	if (PC != 0) {
    544 		linebuffer[0] = 0;
    545 		resolve_fixups();
    546 		emit(outfn, oformat);
    547 	}
    548 	return 0;
    549 }
    550