cpu32

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit d140293442b0a02edddafd6f5c77c101292d374e
parent c56a07483d3776294346b62851a9ec0ea65f6c5b
Author: Brian Swetland <swetland@frotz.net>
Date:   Sun, 12 Feb 2012 00:57:57 -0800

cpu32: pipelined

- three stages: fetch/decode/execute | memory | writeback
- no protection against hazards between register reads and writes (yet)
- todo: stall in the presence of hazards, etc
- now uses syncsram instead of async

Diffstat:
MMakefile | 2+-
Ma32.c | 49++++++++++++++++++++++++++++---------------------
Mde0nano/de0nano.v | 2+-
Misa.txt | 8+++++++-
Mtestbench.sav | 56++++++++++++++++++++++++++++++++++++++------------------
Mverilog/alu.v | 2+-
Mverilog/control.v | 29++++++++++++++++-------------
Mverilog/cpu32.v | 163++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Averilog/syncram.v | 22++++++++++++++++++++++
Mverilog/testbench.v | 6+++---
10 files changed, 258 insertions(+), 81 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,7 +1,7 @@ # Copyright 2012, Brian Swetland. Use at your own risk. SRC := verilog/testbench.v -SRC += verilog/ram.v verilog/rom.v verilog/control.v +SRC += verilog/ram.v verilog/syncram.v verilog/rom.v verilog/control.v SRC += verilog/cpu32.v verilog/alu.v verilog/regfile.v SRC += verilog/uart.v SRC += verilog/library.v diff --git a/a32.c b/a32.c @@ -435,6 +435,7 @@ struct { const char *fmt; } decode[] = { { 0xFFFFFFFF, 0x00000000, "NOP", }, + { 0xFFFFFFFF, 0xEEEEEEEE, "NOP", }, { 0xFFFFFFFF, 0xFFFFFFFF, "HALT", }, { 0xFFF00000, 0x10F00000, "MOV @B, #@s", }, // ORR Rd, Rz, #I { 0xFFF00000, 0x1CF00000, "MLO @B, #0x@u", }, // MLO Rd, Rz, #I @@ -446,18 +447,18 @@ struct { { 0xFF000000, 0x22000000, "LW @B, [@A, #@s]", }, { 0xFF00FFFF, 0x32000000, "SW @B, [@A]", }, { 0xFF000000, 0x32000000, "SW @B, [@A, #@s]", }, - { 0xFFFF0000, 0x40FF0000, "B @r", }, - { 0xFFFF0000, 0x40FE0000, "BL @r", }, - { 0xFF0F0000, 0x400F0000, "BZ @A, @r", }, - { 0xFF0F0000, 0x400E0000, "BLZ @A, @r", }, - { 0xFFF0F000, 0x50F0F000, "B @B", }, - { 0xFFF0F000, 0x50F0E000, "BL @B", }, - { 0xFF00F000, 0x5000F000, "BZ @A, @B", }, - { 0xFF00F000, 0x5000E000, "BLZ @A, @B", }, - { 0xFF0F0000, 0x480F0000, "BNZ @A, @r", }, - { 0xFF0F0000, 0x480E0000, "BLNZ @A, @r", }, - { 0xFF00F000, 0x5800F000, "BNZ @A, @B", }, - { 0xFF00F000, 0x5800E000, "BLNZ @A, @B", }, + { 0xFFFF0000, 0x4FFF0000, "B @r", }, + { 0xFFFF0000, 0x4FFE0000, "BL @r", }, + { 0xFF0F0000, 0x4F0F0000, "BZ @A, @r", }, + { 0xFF0F0000, 0x4F0E0000, "BLZ @A, @r", }, + { 0xFFF0F000, 0x6FF0F000, "B @B", }, + { 0xFFF0F000, 0x6FF0E000, "BL @B", }, + { 0xFF00F000, 0x6F00F000, "BZ @A, @B", }, + { 0xFF00F000, 0x6F00E000, "BLZ @A, @B", }, + { 0xFF0F0000, 0x5F0F0000, "BNZ @A, @r", }, + { 0xFF0F0000, 0x5F0E0000, "BLNZ @A, @r", }, + { 0xFF00F000, 0x7F00F000, "BNZ @A, @B", }, + { 0xFF00F000, 0x7F00E000, "BLNZ @A, @B", }, { 0x00000000, 0x00000000, "UNDEFINED", }, }; @@ -497,11 +498,17 @@ void assemble_line(int n, unsigned *tok, unsigned *num, char **str) { /* blank lines are fine */ return; case tNOP: - emit(0x00000000); + emit(0xEEEEEEEE); return; case tMOV: expect_register(tok[1]); expect(tCOMMA,tok[2]); + if (is_register(tok[3])) { + /* MOV A,B -> ORR A, B, B */ + tmp = to_register(tok[3]); + emit(TO_D(to_register(tok[1])) | TO_A(tmp) | TO_B(tmp)); + return; + } expect(tNUMBER,tok[3]); if (num[3] == 0xFFFF) { /* special case, need to use MLO */ @@ -535,13 +542,13 @@ void assemble_line(int n, unsigned *tok, unsigned *num, char **str) { tmp = 14; } if (is_register(tok[1])) { - emit(0x50F00000 | TO_D(tmp) | TO_B(to_register(tok[1]))); + emit(0x6FF00000 | TO_D(tmp) | TO_B(to_register(tok[1]))); } else if (tok[1] == tSTRING) { - emit(0x40F00000 | TO_B(tmp)); + emit(0x4FF00000 | TO_B(tmp)); uselabel(str[1], PC - 1, 16); } else if ((tok[1] == tNUMBER) || (tok[1] == tDOT)) { if (!is_signed_16(num[1])) die("branch target out of range"); - emit(0x40F00000 | TO_B(tmp) | TO_I16(num[1])); + emit(0x4FF00000 | TO_B(tmp) | TO_I16(num[1])); } else { die("expected branch target, got %s", tnames[tok[1]]); } @@ -551,16 +558,16 @@ void assemble_line(int n, unsigned *tok, unsigned *num, char **str) { case tBLNZ: case tBLZ: switch (tok[0]) { - case tBZ: instr = 0x40000000; tmp = 15; break; - case tBNZ: instr = 0x48000000; tmp = 15; break; - case tBLZ: instr = 0x40000000; tmp = 14; break; - case tBLNZ: instr = 0x48000000; tmp = 14; break; + case tBZ: instr = 0x4F000000; tmp = 15; break; + case tBNZ: instr = 0x5F000000; tmp = 15; break; + case tBLZ: instr = 0x4F000000; tmp = 14; break; + case tBLNZ: instr = 0x5F000000; tmp = 14; break; } expect_register(tok[1]); expect(tCOMMA,tok[2]); instr |= TO_A(to_register(tok[1])); if (is_register(tok[3])) { - emit(instr | 0x10000000 | TO_D(tmp) | TO_B(to_register(tok[3]))); + emit(instr | 0x20000000 | TO_D(tmp) | TO_B(to_register(tok[3]))); } else if (tok[3] == tSTRING) { emit(instr | TO_B(tmp)); uselabel(str[3], PC - 1, 16); diff --git a/de0nano/de0nano.v b/de0nano/de0nano.v @@ -45,7 +45,7 @@ cpu32 cpu( .d_data_r(cpurdata), .d_data_w(ramwdata), .d_addr(ramaddr), - .d_we(ramwe) + .d_data_we(ramwe) ); // ugly hack for now diff --git a/isa.txt b/isa.txt @@ -21,6 +21,11 @@ Core Instruction Set 50 BLZ Rd, Ra, Rb if (Ra == 0) { Rd = PC + 4, PC = Rb } 58 BLNZ Rd, Ra, Rb if (Ra != 0) { Rd = PC + 4, PC = Rb } +4F BLZ Rd, Ra, rel if (Ra == 0) { Rd = PC + 4, PC += I } +5F BLNZ Rd, Ra, rel if (Ra != 0) { Rd = PC + 4, PC += I } +6F BLZ Rd, Ra, Rb if (Ra == 0) { Rd = PC + 4, PC = Rb } +7F BLNZ Rd, Ra, Rb if (Ra != 0) { Rd = PC + 4, PC = Rb } + Extended Instruction Set (tbd) ------------------------ @@ -50,7 +55,7 @@ XB SGT Rd, Ra, Rb Rd = Ra > Rb XC MLO Rd, Ra, Rb Rd = (Ra & 0xFFFF0000) | (Rb & 0xFFFF) XD MHI Rd, Ra, Rb Rd = (Ra & 0xFFFF) | (Rb << 16) XE ASR Rd, Ra, Rb Rd = (Ra >>> Rb[0:4]) -XF MUL Rd, Ra, Rb Rd = Ra * Rb +XF NOP Rd, Ra, Rb Rd = Ra Pseudo Instructions ------------------- @@ -58,6 +63,7 @@ Pseudo Instructions MOV Rd, Rb OR Rd, R0, Rb SNE Rd, Ra, Rb SUB Rd, Ra, Rb NOT Rd, Ra XOR Rd, Ra, #-1 +NOP BLNZ Rz, Rz, 0xFFFF (0x5FFFFFFF) Registers --------- diff --git a/testbench.sav b/testbench.sav @@ -1,37 +1,57 @@ [timestart] 0 -[size] 1248 600 +[size] 1260 725 [pos] -1 -1 -*-6.007232 37 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +*-5.861536 147 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 [treeopen] testbench. [treeopen] testbench.cpu. @28 -testbench.clk +testbench.cpu.clk +@800200 +-F/D/E @22 testbench.cpu.pc[31:0] testbench.cpu.ir[31:0] @28 -testbench.cpu.ctl_ram_rd +testbench.cpu.ctl_ram_we testbench.cpu.ctl_regs_we -testbench.cpu.ctl_branch +testbench.cpu.ctl_wdata_ram +@22 +testbench.cpu.result[31:0] +@29 +testbench.cpu.hazard_rrw +@1000200 +-F/D/E +@800200 +-MEM +@22 +testbench.cpu.mem.regs_wsel[3:0] +@28 +testbench.cpu.mem.regs_we +testbench.cpu.mem.mem_we +testbench.cpu.mem.wdata_ram +@1000200 +-MEM @800200 --ALU +-WB +@28 +testbench.cpu.mem.in_wdata_ram +@22 +testbench.cpu.wb.data[31:0] +@28 +testbench.cpu.wb.we @22 -testbench.cpu.alu.left[31:0] -testbench.cpu.alu.right[31:0] -testbench.cpu.alu.out[31:0] +testbench.cpu.wb.wsel[3:0] @1000200 --ALU +-WB @800200 --MEMORY +-RAM @22 -testbench.cpu.d_addr[31:0] -testbench.cpu.d_data_r[31:0] -testbench.cpu.d_data_w[31:0] +testbench.ram.addr[7:0] +testbench.ram.rdata[31:0] +testbench.ram.wdata[31:0] @28 -testbench.cpu.d_we +testbench.ram.we @1000200 --MEMORY -@23 -testbench.cpu.REGS.wdata[31:0] +-RAM [pattern_trace] 1 [pattern_trace] 0 diff --git a/verilog/alu.v b/verilog/alu.v @@ -31,6 +31,6 @@ always @ (*) 4'b1100: out <= { left[31:16], right[15:0] }; 4'b1101: out <= { right[15:0], left[31:16] }; 4'b1110: out <= (left >>> right[4:0]); - 4'b1111: out <= (left * right); + 4'b1111: out <= left; endcase endmodule diff --git a/verilog/control.v b/verilog/control.v @@ -11,7 +11,7 @@ module control ( output ctl_regs_we, // 1=write to reg file output ctl_ram_we, // 1=write to ram output ctl_alu_altdest, // 0=alu.daddr=opd, 1=alu.daddr=opb - output [1:0] ctl_wdata_src, // 00=alu,01=ram,10=pc+4,11=0 + output ctl_wdata_ram, // 0=alu, 1=ram output ctl_branch_ind, // 0=relative branch, 1=indirect branch output ctl_branch_taken // 0=pc=pc+4, 1=pc=branch_to @@ -20,26 +20,29 @@ module control ( wire ctl_branch_op; wire ctl_branch_nz; -reg [7:0] control; +reg [6:0] control; always @ (*) case (opcode) - 4'b0000: control = 8'b00100000; // ALU Rd, Ra, Rb - 4'b0001: control = 8'b01101000; // ALU Rd, Ra, #I - 4'b0010: control = 8'b01101001; // LW Rd, [Ra, #I] - 4'b0011: control = 8'b01010000; // SW Rd, [Ra, #I] - 4'b0100: control = 8'b10101110; // B rel16 - 4'b0101: control = 8'b10100110; // B Rb - default: control = 8'b00000000; + 4'b0000: control = 7'b0010000; // ALU Rd, Ra, Rb + 4'b0001: control = 7'b0110100; // ALU Rd, Ra, #I + 4'b0010: control = 7'b0110101; // LW Rd, [Ra, #I] + 4'b0011: control = 7'b0101000; // SW Rd, [Ra, #I] + 4'b0100: control = 7'b1010110; // BLZ rel16 + 4'b0101: control = 7'b1010110; // BLNZ rel16 + 4'b0110: control = 7'b1010010; // BLZ Rb + 4'b0111: control = 7'b1010010; // BLNZ Rb + 4'b1110: control = 7'b1100000; // NOP + default: control = 7'b0000000; endcase assign { ctl_alu_pc, ctl_alu_imm, ctl_regs_we, ctl_ram_we, - ctl_alu_altdest, ctl_branch_op, ctl_wdata_src - } = control[7:0]; + ctl_alu_altdest, ctl_branch_op, ctl_wdata_ram + } = control; -assign ctl_branch_nz = opfunc[3]; -assign ctl_branch_ind = opcode[0]; +assign ctl_branch_nz = opcode[0]; +assign ctl_branch_ind = opcode[1]; assign ctl_branch_taken = (ctl_branch_op & (ctl_adata_zero != ctl_branch_nz)); endmodule diff --git a/verilog/cpu32.v b/verilog/cpu32.v @@ -12,7 +12,7 @@ module cpu32 ( output [31:0] d_addr, output [31:0] d_data_w, input [31:0] d_data_r, - output d_we + output d_data_we ); wire [31:0] ir, pc; @@ -23,7 +23,6 @@ wire [3:0] opcode, opfunc, opsela, opselb, opseld; wire [15:0] opimm16; wire [31:0] adata, bdata, wdata, result; -wire [3:0] alu_wsel; assign opcode = ir[31:28]; assign opfunc = ir[27:24]; @@ -40,7 +39,7 @@ wire ctl_alu_imm; wire ctl_regs_we; wire ctl_ram_we; wire ctl_alu_altdest; -wire [1:0] ctl_wdata_src; +wire ctl_wdata_ram; wire ctl_branch_ind; wire ctl_branch_taken; @@ -54,7 +53,7 @@ control control( .ctl_regs_we(ctl_regs_we), .ctl_ram_we(ctl_ram_we), .ctl_alu_altdest(ctl_alu_altdest), - .ctl_wdata_src(ctl_wdata_src), + .ctl_wdata_ram(ctl_wdata_ram), .ctl_branch_ind(ctl_branch_ind), .ctl_branch_taken(ctl_branch_taken) @@ -71,23 +70,27 @@ register #(32) PC ( .dout(pc) ); +/* these arrive from writeback */ +wire [31:0] regs_wdata; +wire [3:0] regs_wsel; +wire regs_we; + regfile REGS ( .reset(reset), .clk(clk), - .we(ctl_regs_we), - .wsel(alu_wsel), .wdata(wdata), + .we(regs_we), + .wsel(regs_wsel), .wdata(regs_wdata), .asel(opsela), .adata(adata), .bsel(opselb), .bdata(bdata) ); -mux4 #(32) mux_wdata_src( - .sel(ctl_wdata_src), - .in0(result), - .in1(d_data_r), - .in2(pc_plus_4), - .in3(32'b0), - .out(wdata) - ); +// attempt to identify hazards +wire hazard_rrw; +assign hazard_rrw = (((regs_wsel == opsela) | (regs_wsel == opselb)) & regs_we); + +assign i_addr = pc; +assign ir = i_data; +//assign ir = (hazard_rrw ? 32'hEEEEEEEE : i_data); assign pc_plus_4 = (pc + 32'h4); @@ -106,9 +109,6 @@ mux2 #(32) mux_pc_source( .out(next_pc) ); -assign i_addr = pc; -assign ir = i_data; - wire [31:0] ainput; wire [31:0] binput; @@ -126,11 +126,13 @@ mux2 #(32) mux_alu_right( .out(binput) ); +wire [3:0] ctl_wsel; + mux2 #(4) alu_wsel_mux( .sel(ctl_alu_altdest), .in0(opseld), .in1(opselb), - .out(alu_wsel) + .out(ctl_wsel) ); alu alu( @@ -140,9 +142,126 @@ alu alu( .out(result) ); -// SW operation always writes Rb (aka Rd) -assign d_addr = result; -assign d_data_w = bdata; -assign d_we = ctl_ram_we; +wire [31:0] mem_data; +wire [3:0] mem_wsel; +wire mem_we; + +memory mem( + .clk(clk), + + .in_alu_data(result), + .in_reg_data(bdata), + + .in_mem_we(ctl_ram_we), + .in_regs_we(ctl_regs_we), + .in_regs_wsel(ctl_wsel), + .in_wdata_ram(ctl_wdata_ram), + + .out_data(mem_data), + .out_wsel(mem_wsel), + .out_we(mem_we), + + .d_addr(d_addr), + .d_data_r(d_data_r), + .d_data_w(d_data_w), + .d_data_we(d_data_we) + ); + +writeback wb( + .clk(clk), + + .in_data(mem_data), + .in_wsel(mem_wsel), + .in_we(mem_we), + + .out_we(regs_we), + .out_wsel(regs_wsel), + .out_data(regs_wdata) + ); endmodule + + +module memory( + input clk, + + /* interface to sync sram */ + output [31:0] d_addr, + input [31:0] d_data_r, + output [31:0] d_data_w, + output d_data_we, + + /* interface to processor core */ + input [31:0] in_alu_data, + input [31:0] in_reg_data, + + input in_mem_we, + input in_regs_we, + input [3:0] in_regs_wsel, + input in_wdata_ram, + + output [31:0] out_data, + output [3:0] out_wsel, + output out_we + ); + + reg [31:0] alu_data; + reg [31:0] reg_data; + reg mem_we; + reg regs_we; + reg [3:0] regs_wsel; + reg wdata_ram; + + always @(posedge clk) begin + alu_data <= in_alu_data; + reg_data <= in_reg_data; + mem_we <= in_mem_we; + regs_we <= in_regs_we; + regs_wsel <= in_regs_wsel; + wdata_ram <= in_wdata_ram; + end + + assign d_addr = in_alu_data; + assign d_data_w = in_reg_data; + assign d_data_we = in_mem_we; + + mux2 #(32) mux_data( + .sel(wdata_ram), + .in0(alu_data), + .in1(d_data_r), + .out(out_data) + ); + + assign out_wsel = regs_wsel; + assign out_we = regs_we; +endmodule + +module writeback( + input clk, + + input [31:0] in_data, + input [3:0] in_wsel, + input in_we, + + output out_we, + output [3:0] out_wsel, + output [31:0] out_data + ); + + reg [31:0] data; + reg [3:0] wsel; + reg we; + + always @(posedge clk) begin + data <= in_data; + wsel <= in_wsel; + we <= in_we; + end + + assign out_we = we; + assign out_wsel = wsel; + assign out_data = data; +endmodule + + + diff --git a/verilog/syncram.v b/verilog/syncram.v @@ -0,0 +1,22 @@ +// RAM - Does not instantiate optimally on Altera FPGAs +// +// Copyright 2009, Brian Swetland. Use at your own risk. + +`timescale 1ns/1ns + +module syncram #(parameter DWIDTH=16, parameter AWIDTH=3) ( + input clk, input we, + input [AWIDTH-1:0] addr, + input [DWIDTH-1:0] wdata, + output reg [DWIDTH-1:0] rdata + ); + +reg [DWIDTH-1:0] R[0:2**AWIDTH-1]; + +always @ (posedge clk) begin + if (we) + R[addr] <= wdata; + rdata <= R[addr]; +end + +endmodule diff --git a/verilog/testbench.v b/verilog/testbench.v @@ -31,7 +31,7 @@ cpu32 cpu( // .d_data_r({24'b0,urdata}), .d_data_w(ramwdata), .d_addr(ramaddr), - .d_we(ramwe) + .d_data_we(ramwe) ); rom #(32,8) rom( @@ -39,7 +39,7 @@ rom #(32,8) rom( .data(romdata) ); -ram #(32,8) ram( +syncram #(32,8) ram( .clk(clk), .addr(ramaddr[9:2]), .rdata(ramrdata), @@ -72,7 +72,7 @@ initial begin $dumpvars(0,testbench); end -initial #10000 $finish; +initial #1000 $finish; always @(posedge clk) begin