assembler.c (11943B)
1 /* 2 * Copyright (c) 2012, Brian Swetland 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 */ 28 29 /* A DCPU-16 Assembler */ 30 31 /* DCPU-16 Spec is Copyright 2012 Mojang */ 32 /* http://0x10c.com/doc/dcpu-16.txt */ 33 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <stdint.h> 37 #include <stdarg.h> 38 #include <string.h> 39 #include <ctype.h> 40 #include <getopt.h> 41 42 typedef uint8_t u8; 43 typedef uint16_t u16; 44 typedef uint32_t u32; 45 46 #define countof(a) (sizeof(a) / sizeof((a)[0])) 47 48 extern u16 *disassemble(u16 *pc, char *out); 49 50 static u16 image[65536] = { 0, }; 51 static u8 note[65536] = { 0, }; 52 static u16 PC = 0; 53 static FILE *fin; 54 static const char *filename = ""; 55 static int linenumber = 0; 56 57 static char linebuffer[128] = { 0, }; 58 static char *lineptr = linebuffer; 59 static int token; 60 static char tstring[128]; 61 static u16 tnumber; 62 63 enum outformat { 64 OUTFORMAT_PRETTY, 65 OUTFORMAT_HEX, 66 OUTFORMAT_BINARY, 67 }; 68 69 void die(const char *fmt, ...) { 70 va_list ap; 71 fprintf(stderr,"%s:%d: ", filename, linenumber); 72 va_start(ap, fmt); 73 vfprintf(stderr, fmt, ap); 74 va_end(ap); 75 fprintf(stderr,"\n"); 76 if (linebuffer[0]) 77 fprintf(stderr,"%s:%d: >> %s <<\n", filename, linenumber, linebuffer); 78 exit(1); 79 } 80 81 struct fixup { 82 struct fixup *next; 83 struct label *label; 84 u16 pc; 85 }; 86 87 struct label { 88 struct label *next; 89 u16 pc; 90 u16 defined; 91 char name[1]; 92 }; 93 94 struct label *labels = 0; 95 struct fixup *fixups = 0; 96 97 struct label *mklabel(const char *name, u16 pc, u16 def) { 98 struct label *l; 99 for (l = labels; l; l = l->next) { 100 if (!strcasecmp(name, l->name)) { 101 if (def) { 102 if (l->defined) 103 die("cannot redefine label: %s", name); 104 l->defined = def; 105 l->pc = pc; 106 } 107 return l; 108 } 109 } 110 l = malloc(sizeof(*l) + strlen(name)); 111 l->defined = def; 112 l->pc = pc; 113 strcpy(l->name, name); 114 l->next = labels; 115 labels = l; 116 return l; 117 } 118 119 void use_label(const char *name, u16 pc) { 120 struct label *l = mklabel(name, 0, 0); 121 if (l->defined) { 122 image[pc] = l->pc; 123 } else { 124 struct fixup *f = malloc(sizeof(*f)); 125 f->next = fixups; 126 f->pc = pc; 127 f->label = l; 128 fixups = f; 129 } 130 } 131 132 void set_label(const char *name, u16 pc) { 133 mklabel(name, pc, 1); 134 } 135 136 void resolve_fixups(void) { 137 struct fixup *f; 138 for (f = fixups; f; f = f->next) { 139 if (f->label->defined) { 140 image[f->pc] = f->label->pc; 141 } else { 142 die("undefined reference to '%s' at 0x%04x", f->label->name, f->pc); 143 } 144 } 145 } 146 147 enum tokens { 148 tA, tB, tC, tX, tY, tZ, tI, tJ, 149 tR0, tR1, tR2, tR3, tR4, tR5, tR6, tR7, 150 tSET, tADD, tSUB, tMUL, tDIV, tMOD, tSHL, 151 tSHR, tAND, tBOR, tXOR, tIFE, tIFN, tIFG, tIFB, 152 tJSR, 153 tPOP, tPEEK, tPUSH, tSP, tPC, tO, 154 tJMP, tMOV, tNOP, 155 tDATA, tDAT, tDW, tWORD, 156 tCOMMA, tOBRACK, tCBRACK, tCOLON, tPLUS, 157 tSTRING, tQSTRING, tNUMBER, tEOF, 158 }; 159 static const char *tnames[] = { 160 "A", "B", "C", "X", "Y", "Z", "I", "J", 161 "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", 162 "SET", "ADD", "SUB", "MUL", "DIV", "MOD", "SHL", 163 "SHR", "AND", "BOR", "XOR", "IFE", "IFN", "IFG", "IFB", 164 "JSR", 165 "POP", "PEEK", "PUSH", "SP", "PC", "O", 166 "JMP", "MOV", "NOP", 167 "DATA", "DAT", "DW", "WORD", 168 ",", "[", "]", ":", "+", 169 "<STRING>", "<QUOTED-STRING>", "<NUMBER>", "<EOF>", 170 }; 171 #define LASTKEYWORD tWORD 172 173 int _next(void) { 174 char c, *x; 175 nextline: 176 if (!*lineptr) { 177 if (feof(fin)) return tEOF; 178 if (fgets(linebuffer, 128, fin) == 0) return tEOF; 179 lineptr = linebuffer; 180 linenumber++; 181 } 182 while (*lineptr <= ' ') { 183 if (*lineptr == 0) goto nextline; 184 lineptr++; 185 } 186 switch ((c = *lineptr++)) { 187 case ',': return tCOMMA; 188 case '+': return tPLUS; 189 case '[': return tOBRACK; 190 case ']': return tCBRACK; 191 case ':': return tCOLON; 192 case '/': case ';': case '#': *lineptr = 0; goto nextline; 193 case '"': 194 x = tstring; 195 for (;;) { 196 switch((c = *lineptr++)) { 197 case 0: 198 die("unterminated string"); 199 case '"': 200 *x = 0; 201 return tQSTRING; 202 case '\\': 203 switch((c = *lineptr++)) { 204 case 'n': *x++ = '\n'; break; 205 case 't': *x++ = '\t'; break; 206 case 'r': *x++ = '\r'; break; 207 default: 208 *x++ = c; break; 209 } 210 break; 211 default: 212 *x++ = c; 213 } 214 } 215 default: 216 if (isdigit(c) || ((c == '-') && isdigit(*lineptr))) { 217 tnumber = strtoul(lineptr-1, &lineptr, 0); 218 return tNUMBER; 219 } 220 if (isalpha(c) || c == '_') { 221 int n; 222 x = tstring; 223 lineptr--; 224 while (isalnum(*lineptr) || *lineptr == '_') 225 *x++ = tolower(*lineptr++); 226 *x = 0; 227 for (n = 0; n <= LASTKEYWORD; n++) 228 if (!strcasecmp(tnames[n], tstring)) 229 return n; 230 return tSTRING; 231 } 232 if ((c == '\'') && (*(lineptr+1) == '\'')) { 233 tnumber = (u16) (*lineptr++); 234 lineptr++; 235 return tNUMBER; 236 } 237 die("illegal character '%c'", c); 238 return tEOF; 239 } 240 } 241 242 int next(void) { 243 token = _next(); 244 245 //fprintf(stderr,"%3d %s\n", token, tnames[token]); 246 return token; 247 } 248 249 void expect(int t) { 250 if (next() != t) 251 die("expecting %s, found %s", tnames[t], tnames[token]); 252 } 253 254 void assemble_imm_or_label(void) { 255 do { 256 next(); 257 if (token == tNUMBER) { 258 note[PC] = 'd'; 259 image[PC++] = tnumber; 260 } else if (token == tSTRING) { 261 note[PC] = 'd'; 262 image[PC] = 0; 263 use_label(tstring, PC++); 264 } else if (token == tQSTRING) { 265 char *x = tstring; 266 while (*x) { 267 note[PC] = 'd'; 268 image[PC++] = *x++; 269 } 270 } else { 271 die("expected number or label"); 272 } 273 next(); 274 } while (token == tCOMMA); 275 } 276 277 int assemble_operand(void) { 278 u16 n; 279 280 next(); 281 switch (token) { 282 case tA: case tB: case tC: case tX: 283 case tY: case tZ: case tI: case tJ: 284 return token & 7; 285 case tR0: case tR1: case tR2: case tR3: 286 case tR4: case tR5: case tR6: case tR7: 287 return (token - 8) & 7; 288 case tPOP: return 0x18; 289 case tPEEK: return 0x19; 290 case tPUSH: return 0x1a; 291 case tSP: return 0x1b; 292 case tPC: return 0x1c; 293 case tO: return 0x1d; 294 case tNUMBER: 295 if (tnumber < 0x20) 296 return tnumber + 0x20; 297 image[PC++] = tnumber; 298 return 0x1f; 299 case tSTRING: 300 image[PC] = 0; 301 use_label(tstring, PC++); 302 return 0x1f; 303 default: 304 if (token != tOBRACK) 305 die("expected ["); 306 } 307 308 /* we must have seen a '[' */ 309 next(); 310 switch (token) { 311 case tA: case tB: case tC: case tX: 312 case tY: case tZ: case tI: case tJ: 313 n = token & 7; 314 next(); 315 if (token == tCBRACK) 316 return 0x08 | n; 317 if ((token != tCOMMA) && (token != tPLUS)) 318 die("expected , or +"); 319 next(); 320 if (token == tSTRING) { 321 use_label(tstring, PC++); 322 } else if (token == tNUMBER) { 323 image[PC++] = tnumber; 324 } else { 325 die("expected immediate value"); 326 } 327 expect(tCBRACK); 328 return 0x10 | n; 329 case tSTRING: 330 use_label(tstring, PC++); 331 case tNUMBER: 332 if (token == tNUMBER) 333 image[PC++] = tnumber; 334 335 next(); 336 if (token == tCBRACK) { 337 return 0x1e; 338 } else if ((token == tCOMMA) || (token == tPLUS)) { 339 next(); 340 if ((token >= tA) && (token <= tJ)) { 341 n = 0x10 | (token & 7); 342 } else { 343 die("invalid register"); 344 } 345 expect(tCBRACK); 346 return n; 347 } else { 348 die("invalid operand"); 349 } 350 default: 351 die("invalid operand"); 352 } 353 return 0; 354 } 355 356 void assemble_binop(void) { 357 u16 pc = PC++; 358 int a, b; 359 int op = token; 360 361 /* alias for push x, pop x */ 362 if (token == tPUSH) { 363 op = tSET; 364 a = 0x1a; // push 365 b = assemble_operand(); 366 } else if (token == tPOP) { 367 op = tSET; 368 a = assemble_operand(); 369 b = 0x18; // pop 370 } else if (token == tNOP) { 371 // SET 0,0 372 op = tSET; 373 a = 0x20; 374 b = 0x20; 375 } else { 376 a = assemble_operand(); 377 expect(tCOMMA); 378 b = assemble_operand(); 379 } 380 381 /* token to opcode */ 382 op -= (tSET - 1); 383 image[pc] = op | (a << 4) | (b << 10); 384 } 385 386 void assemble_jump(void) { 387 u16 pc = PC++; 388 image[pc] = 0x01c1 | (assemble_operand() << 10); 389 } 390 391 void assemble(const char *fn) { 392 u16 pc, n; 393 fin = fopen(fn, "r"); 394 filename = fn; 395 linenumber = 0; 396 if (!fin) die("cannot read file"); 397 398 for (;;) { 399 next(); 400 again: 401 switch (token) { 402 case tEOF: 403 goto done; 404 case tSTRING: 405 expect(tCOLON); 406 set_label(tstring, PC); 407 continue; 408 case tCOLON: 409 expect(tSTRING); 410 set_label(tstring, PC); 411 continue; 412 case tWORD: case tDAT: case tDATA: case tDW: 413 assemble_imm_or_label(); 414 goto again; 415 case tJMP: // alias for SET PC, ... 416 assemble_jump(); 417 continue; 418 case tMOV: // alias for SET 419 token = tSET; 420 case tSET: case tADD: case tSUB: case tMUL: 421 case tDIV: case tMOD: case tSHL: case tSHR: 422 case tAND: case tBOR: case tXOR: case tIFE: 423 case tIFN: case tIFG: case tIFB: 424 case tPUSH: case tPOP: case tNOP: 425 assemble_binop(); 426 continue; 427 case tJSR: 428 pc = PC++; 429 n = assemble_operand(); 430 image[pc] = (n << 10) | 0x0010; 431 continue; 432 default: 433 die("unexpected: %s", tnames[token]); 434 } 435 } 436 done: 437 fclose(fin); 438 } 439 440 void emit(const char *fn, enum outformat format) { 441 FILE *fp; 442 u16 *pc = image; 443 u16 *end = image + PC; 444 u16 *dis = pc; 445 filename = fn; 446 linenumber = 0; 447 448 if (!strcmp(fn, "-")) { 449 fp = stdout; 450 } else { 451 fp = fopen(fn, "w"); 452 } 453 if (!fp) die("cannot write file"); 454 455 while (pc < end) { 456 if (format == OUTFORMAT_PRETTY) { 457 if (note[pc-image] == 'd') { 458 fprintf(fp, "%04x\n", *pc); 459 dis = pc + 1; 460 } else if (pc == dis) { 461 char out[128]; 462 dis = disassemble(pc, out); 463 fprintf(fp, "%04x\t%04x:\t%s\n", *pc, (unsigned)(pc-image), out); 464 } else { 465 fprintf(fp, "%04x\n", *pc); 466 } 467 } else if (format == OUTFORMAT_HEX) { 468 fprintf(fp, "%04x\n", *pc); 469 } else if (format == OUTFORMAT_BINARY) { 470 /* XXX handle host endian */ 471 fwrite(pc, sizeof(*pc), 1, fp); 472 } 473 pc++; 474 } 475 if (fp != stdout) 476 fclose(fp); 477 } 478 479 static void usage(int argc, char **argv) 480 { 481 fprintf(stderr, "usage: %s [-o output] [-O output_format] <input file(s)>\n", argv[0]); 482 fprintf(stderr, "\toutput_format can be one of: pretty, hex, binary\n"); 483 } 484 485 int main(int argc, char **argv) { 486 const char *outfn = "out.hex"; 487 enum outformat oformat = OUTFORMAT_PRETTY; 488 489 for (;;) { 490 int c; 491 int option_index = 0; 492 493 static struct option long_options[] = { 494 {"help", 0, 0, 'h'}, 495 {"output", 1, 0, 'o'}, 496 {"outformat", 1, 0, 'O'}, 497 {0, 0, 0, 0}, 498 }; 499 500 c = getopt_long(argc, argv, "ho:O:", long_options, &option_index); 501 if (c == -1) 502 break; 503 504 switch (c) { 505 case 'h': 506 usage(argc, argv); 507 return 0; 508 case 'o': 509 outfn = optarg; 510 break; 511 case 'O': 512 if (!strcasecmp(optarg, "binary")) { 513 oformat = OUTFORMAT_BINARY; 514 } else if (!strcasecmp(optarg, "hex")) { 515 oformat = OUTFORMAT_HEX; 516 } else if (!strcasecmp(optarg, "pretty")) { 517 oformat = OUTFORMAT_PRETTY; 518 } else { 519 usage(argc, argv); 520 return 1; 521 } 522 break; 523 default: 524 usage(argc, argv); 525 return 1; 526 } 527 } 528 529 if (argc - optind < 1) { 530 usage(argc, argv); 531 return 1; 532 } 533 534 argc -= optind; 535 argv += optind; 536 537 while (argc >= 1) { 538 assemble(argv[0]); 539 argv++; 540 argc--; 541 } 542 543 if (PC != 0) { 544 linebuffer[0] = 0; 545 resolve_fixups(); 546 emit(outfn, oformat); 547 } 548 return 0; 549 } 550