a16v5.c (14846B)
1 // Copyright 2015, Brian Swetland <swetland@frotz.net> 2 // Licensed under the Apache License, Version 2.0. 3 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <stdarg.h> 7 #include <ctype.h> 8 #include <strings.h> 9 #include <string.h> 10 11 typedef unsigned u32; 12 typedef unsigned short u16; 13 14 static unsigned linenumber = 0; 15 static char linestring[256]; 16 static char *filename; 17 18 FILE *ofp = 0; 19 20 void die(const char *fmt, ...) { 21 va_list ap; 22 fprintf(stderr,"%s:%d: ", filename, linenumber); 23 va_start(ap, fmt); 24 vfprintf(stderr, fmt, ap); 25 va_end(ap); 26 fprintf(stderr,"\n"); 27 if (linestring[0]) 28 fprintf(stderr,"%s:%d: >> %s <<\n", filename, linenumber, linestring); 29 exit(1); 30 } 31 32 // various fields 33 #define _OP(n) (((n) & 7) << 0) 34 #define _C(n) (((n) & 7) << 3) 35 #define _A(n) (((n) & 7) << 6) 36 #define _B(n) (((n) & 7) << 9) 37 #define _F(n) (((n) & 15) << 12) 38 #define _I7(n) (((n) & 0x7F) << 9) 39 40 // instr: siiiiii-jj------ 41 // imm sjjiiiiii 42 static inline unsigned _I9(unsigned n) { 43 return ((n & 0x3F) << 9) | (n & 0xC0) | ((n & 0x100) << 7); 44 } 45 46 // instr: siiiiiijjj------ 47 // imm sjjjiiiiii 48 static inline unsigned _I10(unsigned n) { 49 return ((n & 0x3F) << 9) | (n & 0x1C0) | ((n & 0x200) << 6); 50 } 51 52 // instr: siiiiiijjjkk---- 53 // imm skkjjjiiiiii 54 static inline unsigned _I12(unsigned n) { 55 return ((n & 0x3F) << 9) | (n & 0x1C0) | ((n & 0x600) >> 5) | ((n & 0x800) << 4); 56 } 57 58 static inline unsigned _U6(unsigned n) { 59 return ((n & 3) << 6) | ((n & 0x38) << 9); 60 } 61 62 int is_unsigned6(unsigned n) { 63 return ((n & 0xFFC0) == 0); 64 } 65 66 int is_signed7(unsigned n) { 67 n &= 0xFFFFFFC0; 68 return ((n == 0) || (n == 0xFFFFFFC0)); 69 } 70 int is_signed9(unsigned n) { 71 n &= 0xFFFFFF00; 72 return ((n == 0) || (n == 0xFFFFFF00)); 73 } 74 int is_signed10(unsigned n) { 75 n &= 0xFFFFFE00; 76 return ((n == 0) || (n == 0xFFFFFE00)); 77 } 78 int is_signed12(unsigned n) { 79 n &= 0xFFFFF800; 80 return ((n == 0) || (n == 0xFFFFF800)); 81 } 82 83 u16 rom[65535]; 84 u16 PC = 0; 85 86 #define TYPE_PCREL_S9 1 87 #define TYPE_PCREL_S12 2 88 #define TYPE_ABS_U16 3 89 90 struct fixup { 91 struct fixup *next; 92 unsigned pc; 93 unsigned type; 94 }; 95 96 struct label { 97 struct label *next; 98 struct fixup *fixups; 99 const char *name; 100 unsigned pc; 101 unsigned defined; 102 }; 103 104 struct label *labels; 105 struct fixup *fixups; 106 107 void fixup_branch(const char *name, int addr, int btarget, int type) { 108 unsigned n; 109 110 switch(type) { 111 case TYPE_PCREL_S9: 112 n = btarget - addr - 1; 113 if (!is_signed9(n)) break; 114 rom[addr] |= _I9(n); 115 return; 116 case TYPE_PCREL_S12: 117 n = btarget - addr - 1; 118 if (!is_signed12(n)) break; 119 rom[addr] |= _I12(n); 120 return; 121 case TYPE_ABS_U16: 122 rom[addr] = btarget; 123 return; 124 default: 125 die("unknown branch type %d\n",type); 126 } 127 die("label '%s' at %08x is out of range of %08x\n", name, btarget, addr); 128 } 129 130 void setlabel(const char *name, unsigned pc) { 131 struct label *l; 132 struct fixup *f; 133 134 for (l = labels; l; l = l->next) { 135 if (!strcasecmp(l->name, name)) { 136 if (l->defined) die("cannot redefine '%s'", name); 137 l->pc = pc; 138 l->defined = 1; 139 for (f = l->fixups; f; f = f->next) { 140 fixup_branch(name, f->pc, l->pc, f->type); 141 } 142 return; 143 } 144 } 145 l = malloc(sizeof(*l)); 146 l->name = strdup(name); 147 l->pc = pc; 148 l->fixups = 0; 149 l->defined = 1; 150 l->next = labels; 151 labels = l; 152 } 153 154 const char *getlabel(unsigned pc) { 155 struct label *l; 156 for (l = labels; l; l = l->next) 157 if (l->pc == pc) 158 return l->name; 159 return 0; 160 } 161 162 void uselabel(const char *name, unsigned pc, unsigned type) { 163 struct label *l; 164 struct fixup *f; 165 166 for (l = labels; l; l = l->next) { 167 if (!strcasecmp(l->name, name)) { 168 if (l->defined) { 169 fixup_branch(name, pc, l->pc, type); 170 return; 171 } else { 172 goto add_fixup; 173 } 174 } 175 } 176 l = malloc(sizeof(*l)); 177 l->name = strdup(name); 178 l->pc = 0; 179 l->fixups = 0; 180 l->defined = 0; 181 l->next = labels; 182 labels = l; 183 add_fixup: 184 f = malloc(sizeof(*f)); 185 f->pc = pc; 186 f->type = type; 187 f->next = l->fixups; 188 l->fixups = f; 189 } 190 191 void checklabels(void) { 192 struct label *l; 193 for (l = labels; l; l = l->next) { 194 if (!l->defined) { 195 die("undefined label '%s'", l->name); 196 } 197 } 198 } 199 200 void disassemble(char *buf, unsigned pc, unsigned instr); 201 202 void emit(unsigned instr) { 203 rom[PC++] = instr; 204 } 205 206 void save(const char *fn) { 207 const char *name; 208 unsigned n; 209 char dis[128]; 210 211 FILE *fp = fopen(fn, "w"); 212 if (!fp) die("cannot write to '%s'", fn); 213 for (n = 0; n < PC; n++) { 214 disassemble(dis, n, rom[n]); 215 name = getlabel(n); 216 if (name) { 217 fprintf(fp, "%04x // %04x: %-25s <- %s\n", rom[n], n, dis, name); 218 } else { 219 fprintf(fp, "%04x // %04x: %s\n", rom[n], n, dis); 220 } 221 } 222 fclose(fp); 223 } 224 225 #define MAXTOKEN 32 226 227 enum tokens { 228 tEOL, 229 tCOMMA, tCOLON, tOBRACK, tCBRACK, tDOT, tHASH, tSTRING, tNUMBER, 230 tAND, tORR, tXOR, tNOT, tADD, rSUB, tSLT, tSLU, 231 tSHL, tSHR, tROL, tROR, tMUL, tDUP, tSWP, tMHI, 232 tLW, tSW, tLC, tSC, tB, tBL, tBZ, tBNZ, 233 tMOV, tSGE, tSGU, tSNE, tNOP, tHALT, 234 tR0, tR1, tR2, tR3, tR4, tR5, tR6, tR7, 235 tSP, tLR, 236 tEQU, tWORD, tASCII, tASCIIZ, 237 NUMTOKENS, 238 }; 239 240 char *tnames[] = { 241 "<EOL>", 242 ",", ":", "[", "]", ".", "#", "<STRING>", "<NUMBER>", 243 "AND", "ORR", "XOR", "NOT", "ADD", "SUB", "SLT", "SLU", 244 "SHL", "SHR", "ROL", "ROR", "MUL", "DUP", "SWP", "MHI", 245 "LW", "SW", "LC", "SC", "B", "BL", "BZ", "BNZ", 246 "MOV", "SGE", "SGU", "SNE", "NOP", "HALT", 247 "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", 248 "SP", "LR", 249 "EQU", "WORD", "STRING", "ASCIIZ" 250 }; 251 252 #define FIRST_ALU_OP tAND 253 #define LAST_ALU_OP tMHI 254 #define FIRST_REGISTER tR0 255 #define LAST_REGISTER tLR 256 257 int is_reg(unsigned tok) { 258 return ((tok >= FIRST_REGISTER) && (tok <= LAST_REGISTER)); 259 } 260 261 int is_alu_op(unsigned tok) { 262 return ((tok >= FIRST_ALU_OP) && (tok <= LAST_ALU_OP)); 263 } 264 265 unsigned to_func(unsigned tok) { 266 return tok - FIRST_ALU_OP; 267 } 268 269 unsigned to_reg(unsigned tok) { 270 if (tok == tLR) return 7; 271 if (tok == tSP) return 6; 272 return tok - FIRST_REGISTER; 273 } 274 275 int is_stopchar(unsigned x) { 276 switch (x) { 277 case 0: 278 case ' ': 279 case '\t': 280 case '\r': 281 case '\n': 282 case ',': 283 case ':': 284 case '[': 285 case ']': 286 case '.': 287 case '"': 288 case '#': 289 return 1; 290 default: 291 return 0; 292 } 293 } 294 int is_eoschar(unsigned x) { 295 switch (x) { 296 case 0: 297 case '\t': 298 case '\r': 299 case '"': 300 return 1; 301 default: 302 return 0; 303 } 304 } 305 306 int tokenize(char *line, unsigned *tok, unsigned *num, char **str) { 307 char *s; 308 int count = 0; 309 unsigned x, n, neg; 310 linenumber++; 311 312 for (;;) { 313 x = *line; 314 again: 315 if (count == 31) die("line too complex"); 316 317 switch (x) { 318 case 0: 319 goto alldone; 320 case ' ': 321 case '\t': 322 case '\r': 323 case '\n': 324 line++; 325 continue; 326 case '/': 327 if (line[1] == '/') 328 goto alldone; 329 case ';': 330 goto alldone; 331 case ',': 332 str[count] = ","; 333 num[count] = 0; 334 tok[count++] = tCOMMA; 335 line++; 336 continue; 337 case ':': 338 str[count] = ":"; 339 num[count] = 0; 340 tok[count++] = tCOLON; 341 line++; 342 continue; 343 case '[': 344 str[count] = "["; 345 num[count] = 0; 346 tok[count++] = tOBRACK; 347 line++; 348 continue; 349 case ']': 350 str[count] = "]"; 351 num[count] = 0; 352 tok[count++] = tCBRACK; 353 line++; 354 continue; 355 case '.': 356 str[count] = "."; 357 num[count] = 0; 358 tok[count++] = tDOT; 359 line++; 360 continue; 361 case '#': 362 str[count] = "#"; 363 num[count] = 0; 364 tok[count++] = tHASH; 365 line++; 366 continue; 367 case '"': 368 str[count] = ++line; 369 num[count] = 0; 370 tok[count++] = tSTRING; 371 while (!is_eoschar(*line)) line++; 372 if (*line != '"') 373 die("unterminated string"); 374 *line++ = 0; 375 continue; 376 } 377 378 s = line++; 379 while (!is_stopchar(*line)) line++; 380 381 /* save the stopchar */ 382 x = *line; 383 *line = 0; 384 385 neg = (s[0] == '-'); 386 if (neg && isdigit(s[1])) s++; 387 388 str[count] = s; 389 if (isdigit(s[0])) { 390 num[count] = strtoul(s, 0, 0); 391 if(neg) num[count] = -num[count]; 392 tok[count++] = tNUMBER; 393 goto again; 394 } 395 if (isalpha(s[0])) { 396 num[count] = 0; 397 for (n = tNUMBER + 1; n < NUMTOKENS; n++) { 398 if (!strcasecmp(s, tnames[n])) { 399 str[count] = tnames[n]; 400 tok[count++] = n; 401 goto again; 402 } 403 } 404 405 while (*s) { 406 if (!isalnum(*s) && (*s != '_')) 407 die("invalid character '%c' in identifier", *s); 408 s++; 409 } 410 tok[count++] = tSTRING; 411 goto again; 412 } 413 die("invalid character '%c'", s[0]); 414 } 415 416 alldone: 417 str[count] = ""; 418 num[count] = 0; 419 tok[count++] = tEOL; 420 return count; 421 } 422 423 void expect(unsigned expected, unsigned got) { 424 if (expected != got) 425 die("expected %s, got %s", tnames[expected], tnames[got]); 426 } 427 428 void expect_register(unsigned got) { 429 if (!is_reg(got)) 430 die("expected register, got %s", tnames[got]); 431 } 432 433 #define REG(n) (tnames[FIRST_REGISTER + (n)]) 434 435 #define OP_ALU_RC_RA_RB 0x0000 436 #define OP_ADD_RC_RA_S7 0x0001 437 #define OP_MOV_RC_S10 0x0002 438 #define OP_LW_RC_RA_S7 0x0003 439 #define OP_BNZ_RC_S9 0x0004 440 #define OP_BZ_RC_S9 0x0104 441 #define OP_SW_RC_RA_S7 0x0005 442 #define OP_B_S12 0x0006 443 #define OP_BL_S12 0x000E 444 #define OP_B_RA 0x0007 445 #define OP_BL_RA 0x000F 446 #define OP_NOP 0x0207 447 #define OP_LC_RC_U6 0x0807 448 #define OP_SC_RC_U6 0x0A07 449 #define OP_SHL_RC_RA_1 0x0C07 450 #define OP_SHR_RC_RA_1 0x1C07 451 #define OP_ROL_RC_RA_1 0x2C07 452 #define OP_ROR_RC_RA_1 0x3C07 453 #define OP_MHI_RC_RA_S7 0x8007 454 455 #define ALU_AND 0 456 #define ALU_ORR 1 457 #define ALU_XOR 2 458 #define ALU_NOT 3 459 #define ALU_ADD 4 460 #define ALU_SUB 5 461 #define ALU_SLT 6 462 #define ALU_SLU 7 463 #define ALU_SHL 8 464 #define ALU_SHR 9 465 #define ALU_ROL 10 466 #define ALU_ROR 11 467 #define ALU_MUL 12 468 #define ALU_DUP 13 469 #define ALU_SWP 14 470 #define ALU_MHI 15 471 472 #define T0 tok[0] 473 #define T1 tok[1] 474 #define T2 tok[2] 475 #define T3 tok[3] 476 #define T4 tok[4] 477 #define T5 tok[5] 478 #define T6 tok[6] 479 #define T7 tok[7] 480 481 void assemble_line(int n, unsigned *tok, unsigned *num, char **str) { 482 unsigned instr = 0; 483 unsigned tmp; 484 if (T0 == tSTRING) { 485 if (T1 == tCOLON) { 486 setlabel(str[0], PC); 487 tok += 2; 488 num += 2; 489 str += 2; 490 n -= 2; 491 } else { 492 die("unexpected identifier '%s'", str[0]); 493 } 494 } 495 496 switch(T0) { 497 case tEOL: 498 /* blank lines are fine */ 499 return; 500 case tNOP: 501 emit(OP_NOP); 502 return; 503 case tNOT: 504 expect_register(T1); 505 expect(tCOMMA, T2); 506 expect_register(T3); 507 emit(OP_ALU_RC_RA_RB | _F(ALU_NOT) | _C(to_reg(T1)) | _A(to_reg(T3))); 508 return; 509 case tMOV: 510 expect_register(T1); 511 expect(tCOMMA, T2); 512 if (is_reg(T3)) { 513 emit(OP_ALU_RC_RA_RB | _F(ALU_AND) | _C(to_reg(T1)) | _A(to_reg(T3)) | _B(to_reg(T3))); 514 return; 515 } 516 expect(tNUMBER, T3); 517 emit(OP_MOV_RC_S10 | _C(to_reg(T1)) | _I10(num[3])); 518 if (!is_signed10(num[3])) { 519 // load high bits if needed 520 emit(OP_MHI_RC_RA_S7 | _C(to_reg(T1)) | _A(to_reg(T1)) | _I7(num[3] >> 10)); 521 } 522 return; 523 case tMHI: 524 expect_register(T1); 525 expect(tCOMMA, T2); 526 if (tok[3] == tNUMBER) { 527 if (num[3] & 0xFFC0) { 528 die("constant out of range for MHI"); 529 } 530 emit(OP_MHI_RC_RA_S7 | _C(to_reg(T1)) | _A(to_reg(T1)) | _I7(num[3])); 531 return; 532 } 533 // will be handled by general ALU path 534 break; 535 case tSHL: 536 case tSHR: 537 case tROL: 538 case tROR: 539 switch (T0) { 540 case tSHL: instr = OP_SHL_RC_RA_1; break; 541 case tSHR: instr = OP_SHR_RC_RA_1; break; 542 case tROL: instr = OP_ROL_RC_RA_1; break; 543 case tROR: instr = OP_ROR_RC_RA_1; break; 544 } 545 expect_register(T1); 546 expect(tCOMMA, T2); 547 expect_register(T3); 548 expect(tCOMMA, T4); 549 expect(tNUMBER, T5); 550 if (num[5] == 4) { 551 instr |= 0x200; 552 } else if(num[5] != 1) { 553 die("shift/rotate immediate not 1 or 4"); 554 } 555 emit(instr | _C(to_reg(T1)) | _A(to_reg(T3))); 556 return; 557 case tLW: 558 case tSW: 559 instr = (T0 == tLW ? OP_LW_RC_RA_S7 : OP_SW_RC_RA_S7); 560 expect_register(T1); 561 expect(tCOMMA, T2); 562 expect(tOBRACK, T3); 563 expect_register(T4); 564 if (T5 == tCOMMA) { 565 expect(tNUMBER, T6); 566 expect(tCBRACK, T7); 567 tmp = num[6]; 568 } else { 569 expect(tCBRACK, T5); 570 tmp = 0; 571 } 572 if (!is_signed7(tmp)) die("index too large"); 573 emit(instr | _C(to_reg(T1)) | _A(to_reg(T4)) | _I7(tmp)); 574 return; 575 case tLC: 576 case tSC: 577 instr = (T0 == tLC ? OP_LC_RC_U6 : OP_SC_RC_U6); 578 expect_register(T1); 579 expect(tCOMMA, T2); 580 expect(tNUMBER, T3); 581 if (!is_unsigned6(num[3])) die("invalid control register"); 582 emit(instr | _C(to_reg(T1)) | _U6(num[3])); 583 return; 584 case tB: 585 case tBL: 586 if (is_reg(T1)) { 587 instr = (T0 == tB) ? OP_B_RA : OP_BL_RA; 588 emit(instr | _A(to_reg(T1))); 589 } else { 590 instr = (T0 == tB) ? OP_B_S12 : OP_BL_S12; 591 if (T1 == tSTRING) { 592 emit(instr); 593 uselabel(str[1], PC - 1, TYPE_PCREL_S12); 594 } else if (T1 == tDOT) { 595 emit(instr | _I12(-1)); 596 } else { 597 die("expected register or address"); 598 } 599 } 600 return; 601 case tBZ: 602 case tBNZ: 603 instr = (T0 == tBZ) ? OP_BZ_RC_S9 : OP_BNZ_RC_S9; 604 expect_register(T1); 605 expect(tCOMMA, T2); 606 if (T3 == tSTRING) { 607 emit(instr | _C(to_reg(T1))); 608 uselabel(str[3], PC - 1, TYPE_PCREL_S9); 609 } else if (T3 == tDOT) { 610 emit(instr | _C(to_reg(T1)) | _I9(-1)); 611 } else { 612 die("expected register or address"); 613 } 614 return; 615 case tHALT: 616 emit(0xFFFF); //TODO 617 return; 618 case tWORD: 619 tmp = 1; 620 for (;;) { 621 if (tok[tmp] == tSTRING) { 622 emit(0); 623 uselabel(str[tmp++], PC - 1, TYPE_ABS_U16); 624 } else { 625 expect(tNUMBER, tok[tmp]); 626 emit(num[tmp++]); 627 } 628 if (tok[tmp] != tCOMMA) 629 break; 630 tmp++; 631 } 632 return; 633 case tASCII: 634 case tASCIIZ: { 635 unsigned n = 0, c = 0; 636 const unsigned char *s = (void*) str[1]; 637 expect(tSTRING, tok[1]); 638 while (*s) { 639 n |= ((*s) << (c++ * 8)); 640 if (c == 2) { 641 emit(n); 642 n = 0; 643 c = 0; 644 } 645 s++; 646 } 647 emit(n); 648 return; 649 } 650 } 651 if (is_alu_op(T0)) { 652 expect_register(T1); 653 expect(T2, tCOMMA); 654 expect_register(T3); 655 expect(T4, tCOMMA); 656 if ((tok[5] == tNUMBER) && (T0 == tADD)) { 657 if (!is_signed7(num[5])) { 658 die("add immediate must be +/-128"); 659 } 660 emit(OP_ADD_RC_RA_S7 | _C(to_reg(T1)) | _A(to_reg(T3)) | _I7(num[5])); 661 return; 662 } 663 expect_register(T5); 664 emit(OP_ALU_RC_RA_RB | _C(to_reg(T1)) | _A(to_reg(T3)) | _B(to_reg(T5)) | _F(to_func(T0))); 665 return; 666 } 667 668 die("HUH"); 669 } 670 671 void assemble(const char *fn) { 672 FILE *fp; 673 char line[256]; 674 int n; 675 676 unsigned tok[MAXTOKEN]; 677 unsigned num[MAXTOKEN]; 678 char *str[MAXTOKEN]; 679 char *s; 680 681 fp = fopen(fn, "r"); 682 if (!fp) die("cannot open '%s'", fn); 683 684 while (fgets(line, sizeof(line)-1, fp)) { 685 strcpy(linestring, line); 686 s = linestring; 687 while (*s) { 688 if ((*s == '\r') || (*s == '\n')) *s = 0; 689 else s++; 690 } 691 n = tokenize(line, tok, num, str); 692 #if DEBUG 693 { 694 int i 695 printf("%04d: (%02d) ", linenumber, n); 696 for (i = 0; i < n; i++) 697 printf("%s ", tnames[tok[i]]); 698 printf("\n"); 699 } 700 #endif 701 assemble_line(n, tok, num, str); 702 } 703 } 704 705 int main(int argc, char **argv) { 706 const char *outname = "out.hex"; 707 filename = argv[1]; 708 709 if (argc < 2) 710 die("no file specified"); 711 if (argc == 3) 712 outname = argv[2]; 713 714 assemble(filename); 715 linestring[0] = 0; 716 checklabels(); 717 save(outname); 718 719 return 0; 720 }