a32.c (15677B)
1 // Copyright 2009-2012, Brian Swetland. Use at your own risk. 2 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <stdarg.h> 6 #include <ctype.h> 7 #include <strings.h> 8 #include <string.h> 9 10 static unsigned linenumber = 0; 11 static char linestring[256]; 12 static char *filename; 13 14 FILE *ofp = 0; 15 16 void die(const char *fmt, ...) { 17 va_list ap; 18 fprintf(stderr,"%s:%d: ", filename, linenumber); 19 va_start(ap, fmt); 20 vfprintf(stderr, fmt, ap); 21 va_end(ap); 22 fprintf(stderr,"\n"); 23 if (linestring[0]) 24 fprintf(stderr,"%s:%d: >> %s <<\n", filename, linenumber, linestring); 25 exit(1); 26 } 27 28 int is_signed_16(unsigned n) { 29 if (n < 0xFFFF) 30 return 1; 31 if ((n & 0xFFFF0000) == 0xFFFF0000) 32 return 1; 33 return 0; 34 } 35 36 unsigned rom[65535]; 37 unsigned PC = 0; 38 39 #define TYPE_BRANCH_16 1 40 #define TYPE_LW_PC_16 2 41 42 struct fixup { 43 struct fixup *next; 44 unsigned pc; 45 unsigned type; 46 }; 47 48 struct label { 49 struct label *next; 50 struct fixup *fixups; 51 const char *name; 52 unsigned pc; 53 unsigned defined; 54 }; 55 56 struct label *labels; 57 struct fixup *fixups; 58 59 void fixup_branch(const char *name, int addr, int btarget, int type) { 60 unsigned n; 61 62 switch(type) { 63 case TYPE_BRANCH_16: 64 n = btarget - addr - 1; 65 break; 66 case TYPE_LW_PC_16: 67 n = 4 * (btarget - addr - 2); 68 break; 69 default: 70 die("unknown branch type %d\n",type); 71 } 72 73 if (!is_signed_16(n)) { 74 die("label '%s' at %08x is out of range of %08x\n", 75 name, btarget, addr); 76 } 77 rom[addr] = (rom[addr] & 0xFFFF0000) | (n & 0xFFFF); 78 } 79 80 void setlabel(const char *name, unsigned pc) { 81 struct label *l; 82 struct fixup *f; 83 84 for (l = labels; l; l = l->next) { 85 if (!strcasecmp(l->name, name)) { 86 if (l->defined) die("cannot redefine '%s'", name); 87 l->pc = pc; 88 l->defined = 1; 89 for (f = l->fixups; f; f = f->next) { 90 fixup_branch(name, f->pc, l->pc, f->type); 91 } 92 return; 93 } 94 } 95 l = malloc(sizeof(*l)); 96 l->name = strdup(name); 97 l->pc = pc; 98 l->fixups = 0; 99 l->defined = 1; 100 l->next = labels; 101 labels = l; 102 } 103 104 const char *getlabel(unsigned pc) { 105 struct label *l; 106 for (l = labels; l; l = l->next) 107 if (l->pc == pc) 108 return l->name; 109 return 0; 110 } 111 112 void uselabel(const char *name, unsigned pc, unsigned type) { 113 struct label *l; 114 struct fixup *f; 115 116 for (l = labels; l; l = l->next) { 117 if (!strcasecmp(l->name, name)) { 118 if (l->defined) { 119 fixup_branch(name, pc, l->pc, type); 120 return; 121 } else { 122 goto add_fixup; 123 } 124 } 125 } 126 l = malloc(sizeof(*l)); 127 l->name = strdup(name); 128 l->pc = 0; 129 l->fixups = 0; 130 l->defined = 0; 131 l->next = labels; 132 labels = l; 133 add_fixup: 134 f = malloc(sizeof(*f)); 135 f->pc = pc; 136 f->type = type; 137 f->next = l->fixups; 138 l->fixups = f; 139 } 140 141 void checklabels(void) { 142 struct label *l; 143 for (l = labels; l; l = l->next) { 144 if (!l->defined) { 145 die("undefined label '%s'", l->name); 146 } 147 } 148 } 149 150 void disassemble(char *buf, unsigned pc, unsigned instr); 151 152 void emit(unsigned instr) { 153 rom[PC++] = instr; 154 } 155 156 void save(const char *fn) { 157 const char *name; 158 unsigned n; 159 char dis[128]; 160 FILE *fp = fopen(fn, "w"); 161 if (!fp) die("cannot write to '%s'", fn); 162 for (n = 0; n < PC; n++) { 163 disassemble(dis, n * 4, rom[n]); 164 name = getlabel(n); 165 if (name) { 166 fprintf(fp, "%08x // %04x: %-25s <- %s\n", rom[n], n*4, dis, name); 167 } else { 168 fprintf(fp, "%08x // %04x: %s\n", rom[n], n*4, dis); 169 } 170 } 171 fclose(fp); 172 } 173 174 #define MAXTOKEN 32 175 176 enum tokens { 177 tEOL, 178 tCOMMA, tCOLON, tOBRACK, tCBRACK, tDOT, 179 tSTRING, 180 tNUMBER, 181 tORR, tAND, tADD, tSUB, tSHL, tSHR, tXOR, tTBS, 182 tBIS, tBIC, tSLT, tSGT, tMLO, tMHI, tASR, tMUL, 183 tB, tBL, tBZ, tBNZ, tBLZ, tBLNZ, tLW, tSW, 184 tR0, tR1, tR2, tR3, tR4, tR5, tR6, tR7, 185 rR8, tR9, tR10, tR11, tR12, tSP, tLR, tZR, tPC, 186 tNOP, tSNE, tNOT, tMOV, 187 tEQU, tWORD, tASCII, tASCIIZ, 188 NUMTOKENS, 189 }; 190 191 char *tnames[] = { 192 "<EOL>", 193 ",", ":", "[", "]", ".", 194 "<STRING>", 195 "<NUMBER>", 196 "ORR", "AND", "ADD", "SUB", "SHL", "SHR", "XOR", "TBS", 197 "BIS", "BIC", "SLT", "SGT", "MLO", "MHI", "ASR", "MUL", 198 "B", "BL", "BZ", "BNZ", "BLZ", "BLNZ", "LW", "SW", 199 "R0", "R1", "R2", "R3", "R4", "R5", "R6", "R7", 200 "R8", "R9", "R10", "R11", "R12", "SP", "LR", "ZR", "PC", 201 "NOP", "SNE", "NOT", "MOV", 202 "EQU", "WORD", "STRING", "ASCIIZ" 203 }; 204 205 #define FIRST_ALU_OP tORR 206 #define LAST_ALU_OP tMUL 207 #define FIRST_REGISTER tR0 208 #define LAST_REGISTER tZR 209 210 int is_register(unsigned tok) { 211 return ((tok >= FIRST_REGISTER) && (tok <= LAST_REGISTER)); 212 } 213 214 int is_alu_op(int token) { 215 return ((token >= FIRST_ALU_OP) && (token <= LAST_ALU_OP)); 216 } 217 218 unsigned to_register(unsigned tok) { 219 if (!is_register(tok)) die("not a register (%s)", tnames[tok]); 220 return tok - FIRST_REGISTER; 221 } 222 223 int is_stopchar(unsigned x) { 224 switch (x) { 225 case 0: 226 case ' ': 227 case '\t': 228 case '\r': 229 case '\n': 230 case ',': 231 case ':': 232 case '[': 233 case ']': 234 case '.': 235 case '"': 236 return 1; 237 default: 238 return 0; 239 } 240 } 241 int is_eoschar(unsigned x) { 242 switch (x) { 243 case 0: 244 case '\t': 245 case '\r': 246 case '"': 247 return 1; 248 default: 249 return 0; 250 } 251 } 252 253 int tokenize(char *line, unsigned *tok, unsigned *num, char **str) { 254 char *s; 255 int count = 0; 256 unsigned x, n, neg; 257 linenumber++; 258 259 for (;;) { 260 x = *line; 261 again: 262 if (count == 31) die("line too complex"); 263 264 switch (x) { 265 case 0: 266 goto alldone; 267 case ' ': 268 case '\t': 269 case '\r': 270 case '\n': 271 line++; 272 continue; 273 case '/': 274 if (line[1] == '/') 275 goto alldone; 276 277 case ',': 278 str[count] = ","; 279 num[count] = 0; 280 tok[count++] = tCOMMA; 281 line++; 282 continue; 283 case ':': 284 str[count] = ":"; 285 num[count] = 0; 286 tok[count++] = tCOLON; 287 line++; 288 continue; 289 case '[': 290 str[count] = "["; 291 num[count] = 0; 292 tok[count++] = tOBRACK; 293 line++; 294 continue; 295 case ']': 296 str[count] = "]"; 297 num[count] = 0; 298 tok[count++] = tCBRACK; 299 line++; 300 continue; 301 case '.': 302 str[count] = "."; 303 num[count] = 0; 304 tok[count++] = tDOT; 305 line++; 306 continue; 307 case '"': 308 str[count] = ++line; 309 num[count] = 0; 310 tok[count++] = tSTRING; 311 while (!is_eoschar(*line)) line++; 312 if (*line != '"') 313 die("unterminated string"); 314 *line++ = 0; 315 continue; 316 } 317 318 s = line++; 319 while (!is_stopchar(*line)) line++; 320 321 /* save the stopchar */ 322 x = *line; 323 *line = 0; 324 325 neg = (s[0] == '-'); 326 if (neg && isdigit(s[1])) s++; 327 328 str[count] = s; 329 if (isdigit(s[0])) { 330 num[count] = strtoul(s, 0, 0); 331 if(neg) num[count] = -num[count]; 332 tok[count++] = tNUMBER; 333 goto again; 334 } 335 if (isalpha(s[0])) { 336 num[count] = 0; 337 for (n = tNUMBER + 1; n < NUMTOKENS; n++) { 338 if (!strcasecmp(s, tnames[n])) { 339 str[count] = tnames[n]; 340 tok[count++] = n; 341 goto again; 342 } 343 } 344 345 while (*s) { 346 if (!isalnum(*s) && (*s != '_')) 347 die("invalid character '%c' in identifier", *s); 348 s++; 349 } 350 tok[count++] = tSTRING; 351 goto again; 352 } 353 die("invalid character '%c'", s[0]); 354 } 355 356 alldone: 357 str[count] = ""; 358 num[count] = 0; 359 tok[count++] = tEOL; 360 return count; 361 } 362 363 364 void expect(unsigned expected, unsigned got) { 365 if (expected != got) 366 die("expected %s, got %s", tnames[expected], tnames[got]); 367 } 368 369 void expect_register(unsigned got) { 370 if (!is_register(got)) 371 die("expected register, got %s", tnames[got]); 372 } 373 374 #define REG(n) (tnames[FIRST_REGISTER + (n)]) 375 376 int match(unsigned n, unsigned mask, unsigned value) { 377 return (n & mask) == value; 378 } 379 380 char *append(char *buf, char *s) 381 { 382 while (*s) 383 *buf++ = *s++; 384 return buf; 385 } 386 char *append_u32(char *buf, unsigned n) { 387 sprintf(buf,"%08x",n); 388 return buf + strlen(buf); 389 } 390 char *append_u16(char *buf, unsigned n) { 391 sprintf(buf,"%04x",n&0xFFFF); 392 return buf + strlen(buf); 393 } 394 char *append_s16(char *buf, short n) { 395 sprintf(buf,"%d",n); 396 return buf + strlen(buf); 397 } 398 399 void printinst(char *buf, unsigned pc, unsigned instr, const char *fmt) { 400 unsigned fn = (instr >> 24) & 0xF; 401 unsigned a = (instr >> 20) & 0xF; 402 unsigned b = (instr >> 16) & 0xF; 403 unsigned d = (instr >> 12) & 0xF; 404 unsigned i16 = instr & 0xFFFF; 405 int s16 = ((short) i16) * 4; 406 407 while (*fmt) { 408 if (*fmt != '@') { 409 *buf++ = *fmt++; 410 continue; 411 } 412 switch (*++fmt) { 413 case 'A': 414 buf = append(buf,REG(a)); 415 break; 416 case 'B': 417 buf = append(buf,REG(b)); 418 break; 419 case 'D': 420 buf = append(buf,REG(d)); 421 break; 422 case 'F': /* alu function */ 423 buf = append(buf,tnames[FIRST_ALU_OP + fn]); 424 break; 425 case 'u': 426 buf = append_u16(buf,i16); 427 break; 428 case 's': 429 buf = append_s16(buf,(short)i16); 430 break; 431 case 'r': 432 buf = append(buf,"0x"); 433 buf = append_u32(buf,(pc + 4 + s16)); 434 break; 435 case 'l': 436 buf = append(buf,"0x"); 437 buf = append_u32(buf,(pc + 8 + ((short)i16))); 438 break; 439 case 0: 440 goto done; 441 } 442 fmt++; 443 } 444 done: 445 *buf = 0; 446 } 447 448 struct { 449 unsigned mask; 450 unsigned value; 451 const char *fmt; 452 } decode[] = { 453 { 0xFFFFFFFF, 0x00000000, "NOP", }, 454 { 0xFFFFFFFF, 0xEEEEEEEE, "NOP", }, 455 { 0xFFFFFFFF, 0xFFFFFFFF, "HALT", }, 456 { 0xFFF00000, 0x10F00000, "MOV @B, #@s", }, // ORR Rd, Rz, #I 457 { 0xFFF00000, 0x1CF00000, "MLO @B, #0x@u", }, // MLO Rd, Rz, #I 458 { 0xFFF00000, 0x1DF00000, "MOV @B, #0x@u0000", }, // MHI Rd, Rz, #I 459 { 0xFF000000, 0x1C000000, "MLO @B, @A, #0x@u", }, // MLO Rd, Ra, #I 460 { 0xF0000000, 0x00000000, "@F @D, @A, @B", }, 461 { 0xF0000000, 0x10000000, "@F @B, @A, @s", }, 462 { 0xFF00FFFF, 0x22000000, "LW @B, [@A]", }, 463 { 0xFF000000, 0x22000000, "LW @B, [@A, #@s]", }, 464 // { 0xFF000000, 0x82000000, "LW @B, [PC, #@s]", }, 465 { 0xFF000000, 0x82000000, "LW @B, [@l]", }, 466 { 0xFF00FFFF, 0x32000000, "SW @B, [@A]", }, 467 { 0xFF000000, 0x32000000, "SW @B, [@A, #@s]", }, 468 { 0xFFFF0000, 0x4FFF0000, "B @r", }, 469 { 0xFFFF0000, 0x4FFE0000, "BL @r", }, 470 { 0xFF0F0000, 0x4F0F0000, "BZ @A, @r", }, 471 { 0xFF0F0000, 0x4F0E0000, "BLZ @A, @r", }, 472 { 0xFFF0F000, 0x6FF0F000, "B @B", }, 473 { 0xFFF0F000, 0x6FF0E000, "BL @B", }, 474 { 0xFF00F000, 0x6F00F000, "BZ @A, @B", }, 475 { 0xFF00F000, 0x6F00E000, "BLZ @A, @B", }, 476 { 0xFF0F0000, 0x5F0F0000, "BNZ @A, @r", }, 477 { 0xFF0F0000, 0x5F0E0000, "BLNZ @A, @r", }, 478 { 0xFF00F000, 0x7F00F000, "BNZ @A, @B", }, 479 { 0xFF00F000, 0x7F00E000, "BLNZ @A, @B", }, 480 { 0x00000000, 0x00000000, "UNDEFINED", }, 481 }; 482 483 void disassemble(char *buf, unsigned pc, unsigned instr) { 484 int n = 0; 485 for (n = 0 ;; n++) { 486 if ((instr & decode[n].mask) == decode[n].value) { 487 printinst(buf, pc, instr, decode[n].fmt); 488 return; 489 } 490 } 491 } 492 493 #define TO_A(n) (((n) & 0xF) << 20) 494 #define TO_B(n) (((n) & 0xF) << 16) 495 #define TO_D(n) (((n) & 0xF) << 12) 496 #define TO_I16(n) ((n) & 0xFFFF) 497 498 void assemble_line(int n, unsigned *tok, unsigned *num, char **str) { 499 unsigned instr = 0; 500 unsigned tmp, tmp2; 501 502 if (tok[0] == tSTRING) { 503 if (tok[1] == tCOLON) { 504 setlabel(str[0],PC); 505 tok+=2; 506 num+=2; 507 str+=2; 508 n-=2; 509 } else { 510 die("unexpected identifier '%s'", str[0]); 511 } 512 } 513 514 switch(tok[0]) { 515 case tEOL: 516 /* blank lines are fine */ 517 return; 518 case tNOP: 519 emit(0xEEEEEEEE); 520 return; 521 case tMOV: 522 expect_register(tok[1]); 523 expect(tCOMMA,tok[2]); 524 if (is_register(tok[3])) { 525 /* MOV A,B -> ORR A, B, B */ 526 tmp = to_register(tok[3]); 527 emit(TO_D(to_register(tok[1])) | TO_A(tmp) | TO_B(tmp)); 528 return; 529 } 530 expect(tNUMBER,tok[3]); 531 if (num[3] == 0xFFFF) { 532 /* special case, need to use MLO */ 533 emit(0x1CF0FFFF | TO_B(to_register(tok[1]))); 534 return; 535 } 536 tmp = num[3] & 0xFFFF8000; 537 if ((tmp == 0) || (tmp == 0xFFFF8000)) { 538 /* otherwise, sign extending MOV instruction will work */ 539 emit(0x10F00000 | TO_B(to_register(tok[1])) | TO_I16(num[3])); 540 return; 541 } 542 /* MHI Rd, Rz, #I instruction to set the high bits */ 543 emit(0x1DF00000 | TO_B(to_register(tok[1])) | (TO_I16(num[3] >> 16))); 544 if (num[3] & 0xFFFF) { 545 /* MLO Rd, Rd, #I - in the low bits if present */ 546 emit(0x1C000000 | TO_A(to_register(tok[1])) | TO_B(to_register(tok[1])) | TO_I16(num[3])); 547 } 548 return; 549 case tMHI: 550 expect_register(tok[1]); 551 expect(tCOMMA,tok[2]); 552 expect(tNUMBER,tok[3]); 553 emit(0x1D000000 | TO_B(to_register(tok[1])) | TO_I16(num[3])); 554 return; 555 case tB: 556 case tBL: 557 if (tok[0] == tB) { 558 tmp = 15; 559 } else { 560 tmp = 14; 561 } 562 if (is_register(tok[1])) { 563 emit(0x6FF00000 | TO_D(tmp) | TO_B(to_register(tok[1]))); 564 } else if (tok[1] == tSTRING) { 565 emit(0x4FF00000 | TO_B(tmp)); 566 uselabel(str[1], PC - 1, TYPE_BRANCH_16); 567 } else if (tok[1] == tDOT) { 568 emit(0x4FF00000 | TO_B(tmp) | TO_I16(-1)); 569 } else { 570 die("expected branch target, got %s", tnames[tok[1]]); 571 } 572 return; 573 case tBNZ: 574 case tBZ: 575 case tBLNZ: 576 case tBLZ: 577 switch (tok[0]) { 578 case tBZ: instr = 0x4F000000; tmp = 15; break; 579 case tBNZ: instr = 0x5F000000; tmp = 15; break; 580 case tBLZ: instr = 0x4F000000; tmp = 14; break; 581 case tBLNZ: instr = 0x5F000000; tmp = 14; break; 582 } 583 expect_register(tok[1]); 584 expect(tCOMMA,tok[2]); 585 instr |= TO_A(to_register(tok[1])); 586 if (is_register(tok[3])) { 587 emit(instr | 0x20000000 | TO_D(tmp) | TO_B(to_register(tok[3]))); 588 } else if (tok[3] == tSTRING) { 589 emit(instr | TO_B(tmp)); 590 uselabel(str[3], PC - 1, TYPE_BRANCH_16); 591 } else if (tok[3] == tDOT) { 592 emit(instr | TO_I16(-1)); 593 } else { 594 die("expected branch target, got %s", tnames[tok[1]]); 595 } 596 return; 597 case tLW: 598 case tSW: 599 if (tok[0] == tLW) { 600 instr = 0x22000000; 601 } else { 602 instr = 0x32000000; 603 } 604 expect_register(tok[1]); 605 expect(tCOMMA,tok[2]); 606 if (tok[3] == tSTRING) { 607 if (tok[0] == tSW) 608 die("cannot store pc relative"); 609 emit(0x82F00000 | TO_B(to_register(tok[1]))); 610 uselabel(str[3], PC - 1, TYPE_LW_PC_16); 611 return; 612 } 613 expect(tOBRACK,tok[3]); 614 if (tok[4] == tPC) { 615 if (tok[0] == tSW) 616 die("cannot store pc relative"); 617 tmp2 = 0xF; 618 instr = 0x82000000; 619 } else { 620 expect_register(tok[4]); 621 tmp2 = to_register(tok[4]); 622 } 623 if (tok[5] == tCOMMA) { 624 expect(tNUMBER, tok[6]); 625 expect(tCBRACK, tok[7]); 626 tmp = num[6]; 627 } else { 628 expect(tCBRACK, tok[5]); 629 tmp = 0; 630 } 631 if (!is_signed_16(tmp)) die("index too large"); 632 instr |= TO_B(to_register(tok[1])) | TO_A(tmp2) | TO_I16(tmp); 633 emit(instr); 634 return; 635 case tWORD: 636 tmp = 1; 637 for (;;) { 638 expect(tNUMBER, tok[tmp]); 639 emit(num[tmp++]); 640 if (tok[tmp] != tCOMMA) 641 break; 642 tmp++; 643 } 644 return; 645 case tASCII: 646 case tASCIIZ: { 647 unsigned n = 0, c = 0; 648 const unsigned char *s = (void*) str[1]; 649 expect(tSTRING, tok[1]); 650 while (*s) { 651 n |= ((*s) << (c++ * 8)); 652 if (c == 4) { 653 emit(n); 654 n = 0; 655 c = 0; 656 } 657 s++; 658 } 659 emit(n); 660 return; 661 } 662 } 663 if (is_alu_op(tok[0])) { 664 expect_register(tok[1]); 665 expect(tok[2],tCOMMA); 666 expect_register(tok[3]); 667 expect(tok[4],tCOMMA); 668 669 instr = ((tok[0] - FIRST_ALU_OP) << 24) | TO_A(tok[3]); 670 671 if (is_register(tok[5])) { 672 emit(instr | TO_B(to_register(tok[5])) | TO_D(to_register(tok[1]))); 673 } else if (tok[5] == tNUMBER) { 674 if ((num[5] > 65535) && ((num[5] & 0xFFFF8000) != 0xFFFF8000)) die("immediate too large"); 675 emit(instr | 0x10000000 | TO_B(to_register(tok[1])) | TO_I16(num[5])); 676 } else { 677 die("expected register or #, got %s", tnames[tok[5]]); 678 } 679 return; 680 } 681 682 die("HUH"); 683 684 } 685 686 void assemble(const char *fn) 687 { 688 FILE *fp; 689 char line[256]; 690 int n; 691 692 unsigned tok[MAXTOKEN]; 693 unsigned num[MAXTOKEN]; 694 char *str[MAXTOKEN]; 695 char *s; 696 697 fp = fopen(fn, "r"); 698 if (!fp) die("cannot open '%s'", fn); 699 700 while (fgets(line, sizeof(line)-1, fp)) { 701 strcpy(linestring, line); 702 s = linestring; 703 while (*s) { 704 if ((*s == '\r') || (*s == '\n')) *s = 0; 705 else s++; 706 } 707 n = tokenize(line, tok, num, str); 708 #if DEBUG 709 { 710 int i 711 printf("%04d: (%02d) ", linenumber, n); 712 for (i = 0; i < n; i++) 713 printf("%s ", tnames[tok[i]]); 714 printf("\n"); 715 } 716 #endif 717 assemble_line(n, tok, num, str); 718 } 719 } 720 721 722 int main(int argc, char **argv) 723 { 724 const char *outname = "out.hex"; 725 filename = argv[1]; 726 727 if (argc < 2) 728 die("no file specified"); 729 if (argc == 3) 730 outname = argv[2]; 731 732 assemble(filename); 733 linestring[0] = 0; 734 checklabels(); 735 save(outname); 736 737 return 0; 738 }