gateware

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 59efd750e9acc57dbbe70fb1e8bbbbcab987bdbb
parent 5bef5d98cf39d15a54fde93298eb14ef40eb18fb
Author: Brian Swetland <swetland@frotz.net>
Date:   Sat,  1 Dec 2018 16:32:46 -0800

cpu16: switch over to isa v5, add/update tests, tidy up

It's working in verilator and on ICE40UP5K.

Issues/Todo:
- there's a single cycle hazard after lw rX, [...] where
  rX has stale data.
- none of the control registers do anything
- need some more exhaustive instruction set tests

Diffstat:
MMakefile | 10+++++-----
Mhdl/cpu16.sv | 705+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Ahdl/cpu16_alu.sv | 32++++++++++++++++++++++++++++++++
Ahdl/cpu16_regs.sv | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mhdl/testbench.sv | 2+-
Mtests/000-mov-imm.s | 1+
Mtests/001-mov-imm-bits.s | 1+
Mtests/002-alu-ops-1.s | 4++--
Dtests/004-alu-ops-ext.s | 21---------------------
Atests/005-shift-ops.s | 20++++++++++++++++++++
Mtests/020-mem-write-offsets.s | 21+++++++++++++--------
Atests/030-cond-branch.s | 41+++++++++++++++++++++++++++++++++++++++++
12 files changed, 557 insertions(+), 380 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,6 +1,6 @@ #CPU_SRCS := hdl/cpu/cpu.v hdl/cpu/alu.v hdl/cpu/regfile.v -CPU_SRCS := hdl/cpu16.sv +CPU_SRCS := hdl/cpu16.sv hdl/cpu16_regs.sv hdl/cpu16_alu.sv VGA_SRCS := hdl/vga/vga40x30x2.v hdl/vga/vga.v hdl/vga/videoram.v hdl/vga/chardata.v @@ -82,13 +82,13 @@ out/test16.hex: src/test16.s out/a16 out/d16 #out/test.hex: test.hex # cp test.hex out/test.hex -out/a16: src/a16v4.c src/d16v4.c +out/a16: src/a16v5.c src/d16v5.c @mkdir -p out - gcc -g -Wall -O1 -o out/a16 src/a16v4.c src/d16v4.c + gcc -g -Wall -O1 -o out/a16 src/a16v5.c src/d16v5.c -out/d16: src/d16v4.c +out/d16: src/d16v5.c @mkdir -p out - gcc -g -Wall -O1 -o out/d16 -DSTANDALONE=1 src/d16v4.c + gcc -g -Wall -O1 -o out/d16 -DSTANDALONE=1 src/d16v5.c out/icetool: src/icetool.c src/ftdi.c src/ftdi.h @mkdir -p out diff --git a/hdl/cpu16.sv b/hdl/cpu16.sv @@ -1,4 +1,4 @@ -// Copyright 2015, Brian Swetland <swetland@frotz.net> +// Copyright 2018, Brian Swetland <swetland@frotz.net> // Licensed under the Apache License, Version 2.0. `timescale 1ns / 1ps @@ -18,419 +18,438 @@ module cpu16( input dat_rd_rdy, input dat_wr_rdy, -`ifdef CPU16_WITH_TRACE - output [63:0]trace, -`endif - input reset ); -//localparam INS_NOP = 16'h0001; -localparam INS_NOP = 16'hAAA1; +// Control Signal Constants -// ---- FETCH ---- -reg [15:0]pc_next; -reg [15:0]ir_next; -reg ir_valid_next; +localparam SEL_ALU_OP_ADD = 2'b00; +localparam SEL_ALU_OP_MHI = 2'b01; +localparam SEL_ALU_OP_FUNC = 2'b10; +localparam SEL_ALU_OP_SHFT = 2'b11; -reg [15:0]pc = 16'd0; -reg [15:0]ir = 16'd0; -reg [15:0]ir_link = 16'b0; -reg ir_valid = 1'b0; +localparam SEL_XDATA_ADATA = 1'b0; +localparam SEL_XDATA_PC = 1'b1; -assign ins_rd_addr = pc_next; -assign ins_rd_req = 1'b1; +localparam SEL_YDATA_BDATA = 1'b0; +localparam SEL_YDATA_IMM = 1'b1; + +localparam SEL_REG_B_IR_B = 1'b0; +localparam SEL_REG_B_IR_C = 1'b1; + +localparam SEL_REG_W_IR_C = 1'b0; +localparam SEL_REG_W_R7 = 1'b1; -wire [15:0]pc_plus_1 = pc + 16'h0001; +localparam SEL_BR_ALU = 1'b0; +localparam SEL_BR_BDATA = 1'b1; -reg do_use_ra; -reg do_use_rb; -// if writeback is writing the register file and execute wants to -// stall to let writeback win -wire stall_writeback = (ex_do_wreg_alu | ex_do_wr_link) & wb_wreg; +wire ex_do_branch; +reg [15:0]ex_branch_tgt; -//TODO better name: this is a write register conflict -wire stall_write = - ex_do_wreg_alu & - ((do_use_ra & (ir_asel == ex_wsel)) | - (do_use_rb & (ir_bsel == ex_wsel))); +wire de_pause; -wire stall_branch = - (ir_valid & (do_branch_imm | do_branch_reg | do_branch_cond)) | - ex_do_branch_imm | ex_do_branch_reg | ex_do_branch_cond; +// Instruction Fetch (if) +reg [15:0]if_pc = 16'd0; -wire stall = (ir_valid & stall_write) | stall_branch | stall_writeback; +assign ins_rd_addr = if_pc_next; +assign ins_rd_req = 1'b1; + +wire [15:0]if_pc_plus_1 = if_pc + 16'h0001; -wire do_load_ir = ins_rd_rdy & (~stall); +wire [15:0]if_de_ir = ins_rd_data; +wire if_de_ir_valid = ins_rd_rdy & (~ex_do_branch); +reg [15:0]if_pc_next; always_comb begin if (reset) begin - pc_next = 16'h0000; + if_pc_next = 16'd0; end else if (ex_do_branch) begin - pc_next = ex_do_branch_reg ? ex_adata : ex_branch_tgt; - end else if (do_load_ir) begin - pc_next = pc_plus_1; + if_pc_next = ex_branch_tgt; + end else if (if_de_ir_valid & (~de_pause)) begin + if_pc_next = if_pc_plus_1; end else begin - pc_next = pc; + if_pc_next = if_pc; end - ir_next = ins_rd_data; +end + +always_ff @(posedge clk) begin + if_pc <= if_pc_next; +end + +// Instruction Decode (de) + +reg [15:0]de_ir = 16'd0; +reg [15:0]de_pc_plus_1 = 16'd0; +reg de_ir_valid = 1'b0; + +// Immediate Forms +// si7 siiiiiixxxxxxxxx -> ssssssssssiiiiii +// si9 siiiiiixjjxxxxxx -> ssssssssjjiiiiii +// si10 siiiiiijjjxxxxxx -> sssssssjjjiiiiii +// si12 siiiiiijjjkkxxxx -> ssssskkjjjiiiiii + +wire de_ir_imm_s = de_ir[15]; +wire [5:0]de_ir_imm_i = de_ir[14:9]; +wire [2:0]de_ir_imm_j = de_ir[8:6]; +wire [1:0]de_ir_imm_k = de_ir[5:4]; +wire [3:0]de_ir_sel_f = de_ir[15:12]; +wire [2:0]de_ir_sel_b = de_ir[11:9]; +wire [2:0]de_ir_sel_a = de_ir[8:6]; +wire [2:0]de_ir_sel_c = de_ir[5:3]; +wire [2:0]de_ir_opcode = de_ir[2:0]; +wire [5:0]de_ir_imm_u = { de_ir[14:12], de_ir[8:6] }; + +reg [15:0]de_ir_imm; +always_comb begin + casez (de_ir_opcode) + 3'b??1: de_ir_imm = { {10{de_ir_imm_s}}, de_ir_imm_i }; // si7 + 3'b?00: de_ir_imm = { {8{de_ir_imm_s}}, de_ir_imm_j[1:0], de_ir_imm_i }; // si9 + 3'b010: de_ir_imm = { {7{de_ir_imm_s}}, de_ir_imm_j, de_ir_imm_i }; // si10 + 3'b110: de_ir_imm = { {5{de_ir_imm_s}}, de_ir_imm_k, de_ir_imm_j, de_ir_imm_i }; // si12 + endcase +end - // for non-branch stalls we don't want to invalidate IR, - // just hold off from updating it - ir_valid_next = (stall_write | stall_writeback) ? ir_valid : do_load_ir; +reg [1:0]de_sel_alu_op; // choose alu op SEL_ALU_OP_* +reg de_sel_xdata; // choose alu x input SEL_XDATA_* +reg de_sel_ydata; // choose alu y input SEL_YDATA_* +reg de_sel_reg_b; // choose reg b addr SEL_REG_B_* +reg de_sel_reg_w; // choose reg w addr SEL_REG_W_* +reg de_sel_br; // choose branch tgt SEL_BR_* +reg de_do_zero_xdata; // force alu x input to 16'h0 +reg de_do_cond_zero; // branch condition (0=NZ, 1=Z) + +reg de_do_wr_reg; // write alu result to register +reg de_do_wr_link; // write PC+1 to R7 +reg de_do_rd_mem; // read memory during ex +reg de_do_wr_mem; // write memory during ex +reg de_do_uncon_branch; // execute unconditional branch +reg de_do_cond_branch; // execute conditional branch + +reg de_using_reg_a; +reg de_using_reg_b; + +reg [3:0]de_alu_op; +reg [2:0]de_regs_bsel; +reg [2:0]de_regs_wsel; +reg [2:0]de_regs_asel; +always_comb begin + case (de_sel_alu_op) + SEL_ALU_OP_ADD: de_alu_op = 4'b0100; + SEL_ALU_OP_MHI: de_alu_op = 4'b1111; + SEL_ALU_OP_FUNC: de_alu_op = de_ir_sel_f; + SEL_ALU_OP_SHFT: de_alu_op = {2'b10,de_ir_sel_f[1:0]}; + endcase + case (de_sel_reg_b) + SEL_REG_B_IR_B: de_regs_bsel = de_ir_sel_b; + SEL_REG_B_IR_C: de_regs_bsel = de_ir_sel_c; + endcase + case (de_sel_reg_w) + SEL_REG_W_IR_C: de_regs_wsel = de_ir_sel_c; + SEL_REG_W_R7: de_regs_wsel = 3'd7; + endcase + de_regs_asel = de_ir_sel_a; end always_ff @(posedge clk) begin - pc <= pc_next; - if (do_load_ir) - ir <= ir_next; - ir_link <= pc_plus_1; - ir_valid <= ir_valid_next; + if (~de_pause) begin + if (if_de_ir_valid) begin + de_ir <= if_de_ir; + de_pc_plus_1 <= if_pc_plus_1; + end + de_ir_valid <= if_de_ir_valid; + end end -// ---- DECODE ---- -reg [11:0]de_ext = 12'b0; -reg de_ext_rdy = 1'b0; - -// s6 alu-reg-imm load/store -// s7 pc-rel-cond-branch -// s9 mov-imm -// s11 pc-rel-branch -// s12 ext - -// fields decoded from instruction -wire [15:0]ir_imm_s6_raw = { {11 {ir[15]}}, ir[14:10] }; -wire [15:0]ir_imm_s7 = { {10 {ir[15]}}, ir[6], ir[14:10] }; -wire [15:0]ir_imm_s9_raw = { {8 {ir[15]}}, ir[9:7], ir[14:10] }; -wire [15:0]ir_imm_s11 = { {6 {ir[15]}}, ir[8:4], ir[14:10] }; -wire [15:0]ir_imm_s12 = { {5 {ir[15]}}, ir[9:4], ir[14:10] }; -wire [2:0]ir_csel = ir[6:4]; -wire [2:0]ir_asel = ir[9:7]; -wire [2:0]ir_bsel = ir[12:10]; -wire [2:0]ir_alu_op = ir[3] ? ir[2:0] : ir[15:13]; -wire [3:0]ir_opcode = ir[3:0]; - -reg [11:0]ir_ext_imm = 12'b0; -reg ir_ext_rdy = 1'b0; - -wire [15:0]ir_imm_s6 = { (ir_ext_rdy ? ir_ext_imm : ir_imm_s6_raw[15:4]), ir_imm_s6_raw[3:0] }; -wire [15:0]ir_imm_s9 = { (ir_ext_rdy ? ir_ext_imm : ir_imm_s9_raw[15:4]), ir_imm_s9_raw[3:0] }; - -// control signals -reg do_wreg_alu; // write from alu -reg do_adata_zero; // pass 0 to alu.xdata (instead of adata) -reg do_bdata_imm; // pass imm to alu.ydata (instead of bdata) -reg do_wr_link; // write PC + 1 to LR -reg do_branch_imm; // branch to imm -reg do_branch_reg; // branch to reg + imm -reg do_branch_cond; // branch if condition met -reg do_branch_zero; // condition zero(1) or notzero(0) -reg do_use_imm9_or_imm6; -reg do_mem_read; -reg do_mem_write; -reg do_set_ext; +wire de_hzd_reg_a; +wire de_hzd_reg_b; always_comb begin - do_use_ra = 1'b0; - do_use_rb = 1'b0; - do_wreg_alu = 1'b0; - do_adata_zero = 1'b0; - do_bdata_imm = 1'b0; - do_wr_link = 1'b0; - do_branch_imm = 1'b0; - do_branch_reg = 1'b0; - do_branch_cond = 1'b0; - do_branch_zero = 1'b0; - do_use_imm9_or_imm6 = 1'b0; - do_mem_read = 1'b0; - do_mem_write = 1'b0; - do_set_ext = 1'b0; - - casez (ir_opcode) - 4'b0000: begin // alu Rc, Ra, Rb - do_use_ra = 1'b1; - do_use_rb = 1'b1; - do_wreg_alu = 1'b1; + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_BDATA; + de_sel_reg_b = SEL_REG_B_IR_B; + de_sel_reg_w = SEL_REG_W_IR_C; + de_sel_br = SEL_BR_ALU; + de_do_zero_xdata = 1'b0; + de_do_cond_zero = 1'b0; + de_do_wr_reg = 1'b0; + de_do_wr_link = 1'b0; + de_do_rd_mem = 1'b0; + de_do_wr_mem = 1'b0; + de_do_uncon_branch = 1'b0; + de_do_cond_branch = 1'b0; + de_using_reg_a = 1'b0; + de_using_reg_b = 1'b0; + case (de_ir_opcode) + 3'b000: begin // ALU Rc, Ra, Rb + de_sel_alu_op = SEL_ALU_OP_FUNC; + de_sel_reg_b = SEL_REG_B_IR_B; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_BDATA; + de_sel_reg_w = SEL_REG_W_IR_C; + de_do_wr_reg = 1'b1; + de_using_reg_a = 1'b1; + de_using_reg_b = 1'b1; end - 4'b0001: begin // expansion (nop) + 3'b001: begin // ADD Rc, Ra, si7 + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_w = SEL_REG_W_IR_C; + de_do_wr_reg = 1'b1; + de_using_reg_a = 1'b1; + de_using_reg_b = 1'b1; end - 4'b0010: begin // ext si12 - do_set_ext = 1'b1; + 3'b010: begin // MOV Rc, si10 + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_w = SEL_REG_W_IR_C; + de_sel_reg_b = SEL_REG_B_IR_C; + de_do_zero_xdata = 1'b1; + de_do_wr_reg = 1'b1; end - 4'b0011: begin // mov Rc, si9 - do_wreg_alu = 1'b1; - do_adata_zero = 1'b1; - do_bdata_imm = 1'b1; - do_use_imm9_or_imm6 = 1'b1; + 3'b011: begin // LW Rc, [Ra, si7] + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_w = SEL_REG_W_IR_C; + de_do_rd_mem = 1'b1; + de_using_reg_a = 1'b1; end - 4'b0100: begin // lw Rc, [Ra, si6] - do_use_ra = 1'b1; - do_mem_read = 1'b1; - do_use_imm9_or_imm6 = 1'b0; + 3'b100: begin // BZ/BNZ Rc, si9 + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_xdata = SEL_XDATA_PC; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_b = SEL_REG_B_IR_C; + de_sel_br = SEL_BR_ALU; + de_do_cond_branch = 1'b1; + de_do_cond_zero = de_ir[8]; + de_using_reg_b = 1'b1; end - 4'b0101: begin // sw Rc, [Ra, si6] - do_use_ra = 1'b1; - do_use_rb = 1'b1; - do_mem_write = 1'b1; - do_use_imm9_or_imm6 = 1'b0; + 3'b101: begin // SW Rc, [Ra, si7] + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_b = SEL_REG_B_IR_C; + de_do_wr_mem = 1'b1; + de_using_reg_a = 1'b1; + de_using_reg_b = 1'b1; end - 4'b0110: begin // b imm12 / bl imm12 - do_branch_imm = 1'b1; - do_wr_link = ir[9]; + 3'b110: begin // B/BL si12 + de_sel_alu_op = SEL_ALU_OP_ADD; + de_sel_xdata = SEL_XDATA_PC; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_br = SEL_BR_ALU; + de_do_wr_link = de_ir[3]; + de_do_uncon_branch = 1'b1; end - 4'b0111: begin // BZ/BNZ/B/BL - if (ir[5]) begin // b Ra / bl Ra - do_branch_reg = 1'b1; - do_wr_link = ir[4]; - end else begin // bz imm7 / bnz imm7 - do_branch_cond = 1'b1; - do_branch_zero = ~ir[4]; + 3'b111: begin + if (de_ir[15]) begin // MHI Rc, Ra, si7 + de_sel_alu_op = SEL_ALU_OP_MHI; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_w = SEL_REG_W_IR_C; + de_do_wr_reg = 1'b1; + de_using_reg_a = 1'b1; + end else case (de_ir[11:9]) + 3'b000: begin // B/BL Ra + de_sel_br = SEL_BR_BDATA; + de_do_wr_link = de_ir[3]; + de_do_uncon_branch = 1'b1; + de_using_reg_a = 1'b1; end - end - 4'b1???: begin // alu Rc, Ra, si6 - do_use_ra = 1'b1; - do_wreg_alu = 1'b1; - do_bdata_imm = 1'b1; - do_use_imm9_or_imm6 = 1'b0; + 3'b001: begin // NOP + end + 3'b010: begin // RSV0 (NOP) + //TODO: FAULT + end + 3'b011: begin // RSV1 (NOP) + //TODO: FAULT + end + 3'b100: begin // LC Rc, u6 + //TODO: CTRL REGS + end + 3'b101: begin // SC Rc, u6 + //TODO: CTRL REGS + end + 3'b110: begin // SHIFT Rc, Ra, 1 + // imm7 bit0 chooses shift-by-1 + de_sel_alu_op = SEL_ALU_OP_SHFT; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_w = SEL_REG_W_IR_C; + de_do_wr_reg = 1'b1; + de_using_reg_a = 1'b1; + end + 3'b111: begin // SHIFT Rc, Ra, 4 + // imm7 bit0 chooses shift-by-4 + de_sel_alu_op = SEL_ALU_OP_SHFT; + de_sel_xdata = SEL_XDATA_ADATA; + de_sel_ydata = SEL_YDATA_IMM; + de_sel_reg_w = SEL_REG_W_IR_C; + de_do_wr_reg = 1'b1; + de_using_reg_a = 1'b1; + end + endcase end endcase end -always_ff @(posedge clk) begin - if (ir_valid) - ir_ext_rdy <= do_set_ext; - if (ir_valid & do_set_ext) - ir_ext_imm <= ir_imm_s12[11:0]; -end +wire [15:0]regs_ex_adata; +wire [15:0]regs_ex_bdata; -wire [15:0]ex_adata; -wire [15:0]ex_bdata; -wire [15:0]ex_alu_rdata; +reg wb_regs_do_wr_reg = 1'b0; +reg wb_regs_do_wr_dat = 1'b0; +reg [2:0]wb_regs_wsel = 3'b0; +reg [15:0]wb_regs_wdata = 16'd0; -reg [2:0]wb_wsel = 3'b0; -reg wb_wreg = 1'b0; +wire [15:0]regs_wdata = wb_regs_do_wr_dat ? dat_rd_data : wb_regs_wdata; -regs16 regs( +cpu16_regs regs( .clk(clk), - .asel(ir_asel), - .bsel(do_mem_write ? ir_csel : ir_bsel), - .wsel(wb_wreg ? wb_wsel : ex_wsel), - .wreg(wb_wreg | ex_do_wreg_alu | ex_do_wr_link), - .adata(ex_adata), - .bdata(ex_bdata), - .wdata(wb_wreg ? dat_rd_data : (ex_do_wr_link ? ex_link : ex_alu_rdata)) + .asel(de_regs_asel), + .adata(regs_ex_adata), + .bsel(de_regs_bsel), + .bdata(regs_ex_bdata), + .wreg(wb_regs_do_wr_reg | wb_regs_do_wr_dat), + .wsel(wb_regs_wsel), + .wdata(regs_wdata) ); -// ---- EXECUTE ---- +// Execute (ex) + +reg [15:0]ex_adata; +reg [15:0]ex_bdata; +reg [15:0]alu_ex_rdata; + +reg [15:0]ex_pc_plus_1 = 16'd0; +reg [15:0]ex_imm = 16'd0; + +reg [3:0]ex_alu_op = 4'd0; +reg [2:0]ex_regs_wsel = 3'd0; +reg ex_sel_xdata = 1'b0; +reg ex_sel_ydata = 1'b0; + +reg ex_sel_br = 1'b0; +reg ex_do_zero_xdata = 1'b0; +reg ex_do_cond_zero = 1'b0; -reg [15:0]ex_branch_tgt = 16'b0; -reg [15:0]ex_link = 16'b0; -reg [2:0]ex_alu_op = 3'b0; -reg [2:0]ex_wsel = 3'b0; -reg ex_do_wreg_alu = 1'b0; -reg ex_do_adata_zero = 1'b0; -reg ex_do_bdata_imm = 1'b0; +reg ex_do_wr_reg = 1'b0; reg ex_do_wr_link = 1'b0; -reg ex_do_branch_imm = 1'b0; -reg ex_do_branch_reg = 1'b0; -reg ex_do_branch_cond = 1'b0; -reg ex_do_branch_zero = 1'b0; -reg ex_do_mem_read = 1'b0; -reg ex_do_mem_write = 1'b0; - -reg [15:0]ex_imm = 16'b0; - -wire ex_adata_zero = (ex_adata == 16'b0); -wire ex_do_branch = ex_do_branch_reg | ex_do_branch_imm | (ex_do_branch_cond & (ex_do_branch_zero == ex_adata_zero)); - -`ifdef CPU16_WITH_TRACE -assign trace = { - pc, - ir, - - ir_valid, - ir_ext_rdy, - ex_alu_op, - ex_wsel, - - ex_do_wreg_alu, - 1'b0, - ex_do_adata_zero, - ex_do_bdata_imm, - ex_do_branch_imm, - ex_do_mem_read, - ex_do_mem_write, - ex_do_wr_link, - - ex_imm - }; -`endif +reg ex_do_rd_mem = 1'b0; +reg ex_do_wr_mem = 1'b0; +reg ex_do_uncon_branch = 1'b0; +reg ex_do_cond_branch = 1'b0; + +reg ex_valid = 1'b0; always_ff @(posedge clk) begin - if (ir_valid && (~(stall_write | stall_writeback))) begin - ex_branch_tgt <= pc + (do_branch_imm ? ir_imm_s11 : ir_imm_s7); - ex_link <= ir_link; - // for mem-read or mem-write we use the ALU for Ra + imm7 - ex_alu_op <= (do_adata_zero | do_mem_read | do_mem_write) ? 3'b0 : ir_alu_op; - ex_wsel <= do_wr_link ? 3'd7 : ir_csel; - ex_do_wreg_alu <= do_wreg_alu; - ex_do_adata_zero <= do_adata_zero; - ex_do_bdata_imm <= do_bdata_imm; - ex_do_wr_link <= do_wr_link; - ex_do_branch_imm <= do_branch_imm; - ex_do_branch_reg <= do_branch_reg; - ex_do_branch_cond <= do_branch_cond; - ex_do_branch_zero <= do_branch_zero; - ex_do_mem_read <= do_mem_read; - ex_do_mem_write <= do_mem_write; - ex_imm <= (do_mem_read | do_mem_write) ? ir_imm_s6 : (do_use_imm9_or_imm6 ? ir_imm_s9 : ir_imm_s6); - end else if (~stall_writeback) begin - // if invalid, parameters are unchanged but actions - // must all be disabled - // TODO: better names to differentiate - ex_do_wreg_alu <= 1'b0; + ex_pc_plus_1 <= de_pc_plus_1; + ex_imm <= de_ir_imm; + ex_alu_op <= de_alu_op; + ex_regs_wsel <= de_regs_wsel; + ex_sel_xdata <= de_sel_xdata; + ex_sel_ydata <= de_sel_ydata; + ex_sel_br <= de_sel_br; + ex_do_cond_zero <= de_do_cond_zero; + ex_valid <= de_ir_valid; + if ((~de_ir_valid) | de_pause | ex_do_branch) begin + ex_do_zero_xdata <= 1'b0; + ex_do_wr_reg <= 1'b0; ex_do_wr_link <= 1'b0; - ex_do_branch_imm <= 1'b0; - ex_do_branch_reg <= 1'b0; - ex_do_branch_cond <= 1'b0; - ex_do_mem_read <= 1'b0; - ex_do_mem_write <= 1'b0; + ex_do_rd_mem <= 1'b0; + ex_do_wr_mem <= 1'b0; + ex_do_uncon_branch <= 1'b0; + ex_do_cond_branch <= 1'b0; + end else begin + ex_do_zero_xdata <= de_do_zero_xdata; + ex_do_wr_reg <= de_do_wr_reg; + ex_do_wr_link <= de_do_wr_link; + ex_do_rd_mem <= de_do_rd_mem; + ex_do_wr_mem <= de_do_wr_mem; + ex_do_uncon_branch <= de_do_uncon_branch; + ex_do_cond_branch <= de_do_cond_branch; end end +wire ex_is_cond_zero = (regs_ex_bdata == 16'd0); -alu16 alu( - .op(ex_alu_op), - .xdata(ex_do_adata_zero ? 16'b0 : ex_adata), - .ydata((ex_do_mem_read | ex_do_mem_write | ex_do_bdata_imm) ? ex_imm : ex_bdata), - .rdata(ex_alu_rdata) - ); +assign ex_do_branch = ex_do_uncon_branch | (ex_do_cond_branch & (ex_do_cond_zero == ex_is_cond_zero)); -assign dat_rw_addr = ex_alu_rdata; -assign dat_wr_data = ex_bdata; -assign dat_rd_req = ex_do_mem_read; -assign dat_wr_req = ex_do_mem_write; +reg [15:0]alu_x; +reg [15:0]alu_y; -always_ff @(posedge clk) begin - wb_wreg <= ex_do_mem_read; - wb_wsel <= ex_wsel; +always_comb begin + if (ex_do_zero_xdata) begin + alu_x = 16'd0; + end else begin + case (ex_sel_xdata) + SEL_XDATA_ADATA: alu_x = regs_ex_adata; + SEL_XDATA_PC: alu_x = ex_pc_plus_1; + endcase + end + case (ex_sel_ydata) + SEL_YDATA_BDATA: alu_y = regs_ex_bdata; + SEL_YDATA_IMM: alu_y = ex_imm; + endcase + case (ex_sel_br) + SEL_BR_ALU: ex_branch_tgt = alu_ex_rdata; + SEL_BR_BDATA: ex_branch_tgt = regs_ex_bdata; + endcase end -// ---- SIMULATION DEBUG ASSIST ---- +cpu16_alu alu( + .op(ex_alu_op), + .x(alu_x), + .y(alu_y), + .r(alu_ex_rdata) + ); -`ifdef verilator -reg [15:0]dbg_addr = 16'd0; -wire [47:0]ir_dbg_dis; -reg [47:0]ex_dbg_dis = 48'd0; +wire [15:0]ex_regs_wdata = ex_do_wr_link ? ex_pc_plus_1 : alu_ex_rdata; + +assign dat_rw_addr = alu_ex_rdata; +assign dat_wr_data = regs_ex_bdata; +assign dat_rd_req = ex_do_rd_mem; +assign dat_wr_req = ex_do_wr_mem; -assign ir_dbg_dis = { ir, 3'b0, ir_ext_rdy, ir_ext_imm, dbg_addr }; +// Write Back (wb) always_ff @(posedge clk) begin - dbg_addr <= pc; - ex_dbg_dis <= ir_dbg_dis; + wb_regs_wsel <= ex_do_wr_link ? 3'd7 : ex_regs_wsel; + wb_regs_wdata <= ex_regs_wdata; + if (!ex_valid) begin + wb_regs_do_wr_reg <= 1'b0; + wb_regs_do_wr_dat <= 1'b0; + end else begin + wb_regs_do_wr_reg <= ex_do_wr_reg | (ex_do_wr_link & ex_do_branch) | ex_do_rd_mem; + wb_regs_do_wr_dat <= ex_do_rd_mem; + end + end -`endif -endmodule +assign de_hzd_reg_a = de_using_reg_a & ( + (ex_do_wr_reg & (de_regs_asel == ex_regs_wsel)) | + (wb_regs_do_wr_reg & (de_regs_asel == wb_regs_wsel))); +assign de_hzd_reg_b = de_using_reg_b & ( + (ex_do_wr_reg & (de_regs_bsel == ex_regs_wsel)) | + (wb_regs_do_wr_reg & (de_regs_bsel == wb_regs_wsel))); +assign de_pause = de_hzd_reg_a | de_hzd_reg_b; -module regs16( - input clk, - input [2:0]asel, - input [2:0]bsel, - input [2:0]wsel, - input wreg, - input [15:0]wdata, - output [15:0]adata, - output [15:0]bdata - ); +// ---- SIMULATION DEBUG ASSIST ---- `ifdef verilator -reg [15:0]rmem[0:7]; -reg [15:0]areg; -reg [15:0]breg; +reg [15:0]dbg_addr = 16'd0; +wire [31:0]ir_dbg_dis; +reg [31:0]ex_dbg_dis = 32'd0; + +assign ir_dbg_dis = { de_ir, dbg_addr }; always_ff @(posedge clk) begin - if (wreg) - rmem[wsel] <= wdata; - areg <= rmem[asel]; - breg <= rmem[bsel]; + dbg_addr <= if_pc; + ex_dbg_dis <= ir_dbg_dis; end - -assign adata = areg; -assign bdata = breg; -`else -`ifdef YOSYS -SB_RAM40_4K #( - .READ_MODE(0), - .WRITE_MODE(0) - ) -`else -SB_RAM256x16 -`endif - bank_a ( - .WADDR(wsel), - .RADDR(asel), - .MASK(16'b0), - .WDATA(wdata), - .RDATA(adata), - .WE(1'b1), - .WCLKE(wreg), - .WCLK(clk), - .RE(1'b1), - .RCLKE(1'b1), - .RCLK(clk) - ); - -`ifdef YOSYS -SB_RAM40_4K #( - .READ_MODE(0), - .WRITE_MODE(0) - ) -`else -SB_RAM256x16 `endif - bank_b ( - .WADDR(wsel), - .RADDR(bsel), - .MASK(16'b0), - .WDATA(wdata), - .RDATA(bdata), - .WE(1'b1), - .WCLKE(wreg), - .WCLK(clk), - .RE(1'b1), - .RCLKE(1'b1), - .RCLK(clk) - ); -`endif - -endmodule - - -module alu16( - input [2:0]op, - input [15:0]xdata, - input [15:0]ydata, - output [15:0]rdata - ); - -reg [15:0]r; - -always_comb begin - case (op) - 3'b000: r = xdata + ydata; - 3'b001: r = xdata - ydata; - 3'b010: r = xdata & ydata; - 3'b011: r = xdata | ydata; - 3'b100: r = xdata ^ ydata; - 3'b101: r = { {15 {1'b0}}, xdata < ydata }; - 3'b110: r = { {15 {1'b0}}, xdata >= ydata }; - 3'b111: r = xdata * ydata; - endcase -end - -assign rdata = r; endmodule diff --git a/hdl/cpu16_alu.sv b/hdl/cpu16_alu.sv @@ -0,0 +1,32 @@ +// Copyright 2018, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +module cpu16_alu( + input reg [3:0]op, + input reg [15:0]x, + input reg [15:0]y, + output reg [15:0]r + ); + +always_comb begin + case (op) + 4'b0000: r = x & y; + 4'b0001: r = x | y; + 4'b0010: r = x ^ y; + 4'b0011: r = ~x; + 4'b0100: r = x + y; + 4'b0101: r = x - y; + 4'b0110: r = { 15'd0, $signed(x) < $signed(y) }; + 4'b0111: r = { 15'd0, x < y }; + 4'b1000: r = y[0] ? {x[11:0], 4'b0} : {x[14:0], 1'b0}; // SHL 4 or 1 + 4'b1001: r = y[0] ? {4'b0, x[15:4]} : {1'b0, x[15:1]}; // SHR 4 or 1 + 4'b1010: r = y[0] ? {x[11:0], x[15:12]} : {x[14:0], x[15]}; // ROL 4 or 1 + 4'b1011: r = y[0] ? {x[3:0], x[15:4]} : {x[0], x[15:1]}; // ROR 4 or 1 + 4'b1100: r = x * y; + 4'b1101: r = { x[7:0], y[7:0] }; + 4'b1110: r = { x[7:0], y[15:8] }; + 4'b1111: r = { y[5:0], x[9:0] }; + endcase +end + +endmodule diff --git a/hdl/cpu16_regs.sv b/hdl/cpu16_regs.sv @@ -0,0 +1,79 @@ +// Copyright 2018, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +module cpu16_regs( + input clk, + input [2:0]asel, + input [2:0]bsel, + input [2:0]wsel, + input wreg, + input [15:0]wdata, + output [15:0]adata, + output [15:0]bdata + ); + +`ifdef verilator +reg [15:0]rmem[0:7]; +reg [15:0]areg; +reg [15:0]breg; + +always_ff @(negedge clk) begin + if (wreg) + rmem[wsel] <= wdata; +end +always_ff @(posedge clk) begin + areg <= rmem[asel]; + breg <= rmem[bsel]; +end + +assign adata = areg; +assign bdata = breg; +`else +`ifdef YOSYS +SB_RAM40_4K #( + .READ_MODE(0), + .WRITE_MODE(0) + ) +`else +SB_RAM256x16 +`endif + bank_a ( + .WADDR(wsel), + .RADDR(asel), + .MASK(16'b0), + .WDATA(wdata), + .RDATA(adata), + .WE(1'b1), + .WCLKE(wreg), + .WCLK(clk), + .RE(1'b1), + .RCLKE(1'b1), + .RCLK(clk) + ); + +`ifdef YOSYS +SB_RAM40_4K #( + .READ_MODE(0), + .WRITE_MODE(0) + ) +`else +SB_RAM256x16 +`endif + bank_b ( + .WADDR(wsel), + .RADDR(bsel), + .MASK(16'b0), + .WDATA(wdata), + .RDATA(bdata), + .WE(1'b1), + .WCLKE(wreg), + .WCLK(clk), + .RE(1'b1), + .RCLKE(1'b1), + .RCLK(clk) + ); +`endif + +endmodule + + diff --git a/hdl/testbench.sv b/hdl/testbench.sv @@ -17,7 +17,7 @@ always @(posedge clk) begin // burp <= (count >= 16'd0010) && (count <= 16'd0012) ? 1'b1 : 1'b0; if (count == 16'd0005) reset <= 1'b0; if (count == 16'd1000) $finish; - if (cpu.ir == 16'hFFFF) begin + if (cpu.de_ir == 16'hFFFF) begin for ( integer i = 0; i < 8; i++ ) begin $display(":REG R%0d %8X", i, cpu.regs.rmem[i]); end diff --git a/tests/000-mov-imm.s b/tests/000-mov-imm.s @@ -8,6 +8,7 @@ mov r6, 0x70 mov r7, 0x80 nop +nop halt ;R0 0010 diff --git a/tests/001-mov-imm-bits.s b/tests/001-mov-imm-bits.s @@ -7,6 +7,7 @@ mov r5, 0x20 mov r6, 0x40 mov r7, -256 nop +nop halt ;R0 0001 diff --git a/tests/002-alu-ops-1.s b/tests/002-alu-ops-1.s @@ -5,8 +5,8 @@ mov r3, 0x44 add r4, r1, r2 sub r5, r3, r1 orr r6, r1, r3 -sge r7, r3, r2 - +slt r7, r2, r3 +nop nop halt diff --git a/tests/004-alu-ops-ext.s b/tests/004-alu-ops-ext.s @@ -1,21 +0,0 @@ -mov r0, 0 -mov r1, 0x7777 -add r2, r0, 0x1234 -and r3, r1, 0xF1F1 -sub r4, r1, 0x1111 -slt r5, r1, 0x8000 -sge r6, r1, 0x8000 -sge r7, r1, 0x7777 - -nop -halt - -;R0 0000 -;R1 7777 -;R2 1234 -;R3 7171 -;R4 6666 -;R5 0001 -;R6 0000 -;R7 0001 - diff --git a/tests/005-shift-ops.s b/tests/005-shift-ops.s @@ -0,0 +1,20 @@ +mov r0, 0x4321 +shr r1, r0, 4 +shr r2, r1, 4 +shr r3, r2, 4 +shr r4, r0, 1 +shr r5, r4, 1 +shr r6, r5, 1 +shr r7, r6, 1 +nop +halt + +;R0 4321 +;R1 0432 +;R2 0043 +;R3 0004 +;R4 2190 +;R5 10c8 +;R6 0864 +;R7 0432 + diff --git a/tests/020-mem-write-offsets.s b/tests/020-mem-write-offsets.s @@ -1,6 +1,7 @@ -mov r0, 0 +mov r0, 0x80 mov r1, 0xff nop +nop sw r1, [r0, 0x00] sw r1, [r0, 0x01] sw r1, [r0, 0x02] @@ -10,11 +11,15 @@ sw r1, [r0, 0x10] sw r0, [r1, -1] sw r1, [r1, -15] -;0000 00ff -;0001 00ff -;0002 00ff -;0004 00ff -;0008 00ff -;0010 00ff -;00fe 0000 +nop +nop +halt + +;0080 00ff +;0081 00ff +;0082 00ff +;0084 00ff +;0088 00ff +;0090 00ff +;00fe 0080 ;00f0 00ff diff --git a/tests/030-cond-branch.s b/tests/030-cond-branch.s @@ -0,0 +1,41 @@ +mov r4, 0x44 +mov r5, 0x55 +mov r6, 0x66 + +mov r0, 0 +mov r2, 10 +mov r1, 0x80 + +again: +sw r0, [r1] +add r0, r0, 1 +slt r3, r0, r2 +bnz r3, again +bz r3, done +mov r5, 0xAA +mov r6, 0xBB +halt + +done: +mov r7, 0x77 +nop +halt + +;0080 0000 +;0080 0001 +;0080 0002 +;0080 0003 +;0080 0004 +;0080 0005 +;0080 0006 +;0080 0007 +;0080 0008 +;0080 0009 +;R0 000a +;R1 0080 +;R2 000a +;R3 0000 +;R4 0044 +;R5 0055 +;R6 0066 +;R7 0077