gateware

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 8caf74ca1c7a04da4e0d0225ec6e313e89d68ddb
parent ede20da576e18b5d89e9144eecec397d23a972e6
Author: Brian Swetland <swetland@frotz.net>
Date:   Sat, 24 Nov 2018 22:46:22 -0800

cpu16: cpu16v4 work in progress

- fetch / decode / execute stages working
- memory write working
- 3 register alu ops work
- 2 register + immediate alu ops work
- wide unconditional branches work (with 2 delay slots)
- extended immediate opcode works

TODO
- memory read
- conditional branches
- register branches
- branch-with-link
- handle data hazards

Diffstat:
MMakefile | 19++++++++++---------
Ahdl/cpu16.sv | 299+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mhdl/ice40.v | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------
Mhdl/simram.sv | 11++++++++++-
Mhdl/testbench.sv | 98+++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------
5 files changed, 441 insertions(+), 70 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,6 @@ -CPU_SRCS := hdl/cpu/cpu.v hdl/cpu/alu.v hdl/cpu/regfile.v +#CPU_SRCS := hdl/cpu/cpu.v hdl/cpu/alu.v hdl/cpu/regfile.v +CPU_SRCS := hdl/cpu16.sv VGA_SRCS := hdl/vga/vga40x30x2.v hdl/vga/vga.v hdl/vga/videoram.v hdl/vga/chardata.v @@ -72,22 +73,22 @@ out/ice40.asc: out/ice40.blif $(ARACHNEPNR) -d 5k -p sg48 -o out/ice40.asc -p hdl/ice40up.pcf out/ice40.blif 2>&1 | tee out/ice40.pnr.log endif -run: out/cpu/Vtestbench out/test.hex - ./out/cpu/Vtestbench -trace out/trace.vcd -dump out/memory.bin -load out/test.hex +run: out/cpu/Vtestbench out/test16.hex + ./out/cpu/Vtestbench -trace out/trace.vcd -dump out/memory.bin -load out/test16.hex -out/test.hex: src/test.s out/a16 out/d16 - out/a16 src/test.s out/test.hex +out/test16.hex: src/test16.s out/a16 out/d16 + out/a16 src/test16.s out/test16.hex #out/test.hex: test.hex # cp test.hex out/test.hex -out/a16: src/a16.c src/d16.c +out/a16: src/a16v4.c src/d16v4.c @mkdir -p out - gcc -g -Wall -O1 -o out/a16 src/a16.c src/d16.c + gcc -g -Wall -O1 -o out/a16 src/a16v4.c src/d16v4.c -out/d16: src/d16.c +out/d16: src/d16v4.c @mkdir -p out - gcc -g -Wall -O1 -o out/d16 -DSTANDALONE=1 src/d16.c + gcc -g -Wall -O1 -o out/d16 -DSTANDALONE=1 src/d16v4.c out/icetool: src/icetool.c src/ftdi.c src/ftdi.h @mkdir -p out diff --git a/hdl/cpu16.sv b/hdl/cpu16.sv @@ -0,0 +1,299 @@ +// Copyright 2015, Brian Swetland <swetland@frotz.net> +// Licensed under the Apache License, Version 2.0. + +`timescale 1ns / 1ps + +module cpu16( + input clk, + output [15:0]ins_rd_addr, + input [15:0]ins_rd_data, + output ins_rd_req, + input ins_rd_rdy, + + output [15:0]dat_rw_addr, + output [15:0]dat_wr_data, + input [15:0]dat_rd_data, + output dat_rd_req, + output dat_wr_req, + input dat_rd_rdy, + input dat_wr_rdy, + + input reset + ); + +localparam INS_NOP = 16'h0001; + +// ---- FETCH ---- +reg [15:0]pc_next; +reg [15:0]ir_next; +reg ir_valid_next; + +reg [15:0]pc = 16'd0; +reg [15:0]ir = 16'd0; +reg ir_valid = 1'b0; + +assign ins_rd_addr = pc_next; +assign ins_rd_req = 1'b1; + +always_comb begin + if (reset) begin + pc_next = 16'h0000; + end else if (ex_do_branch_imm) begin + pc_next = ex_branch_tgt; + end else if (ins_rd_rdy) begin + pc_next = pc + 16'h0001; + end else begin + pc_next = pc; + end + ir_next = ins_rd_rdy ? ins_rd_data : INS_NOP; + ir_valid_next = ins_rd_rdy; +end + +always_ff @(posedge clk) begin + pc <= pc_next; + ir <= ir_next; + ir_valid <= ir_valid_next; +end + +// ---- DECODE ---- +reg [11:0]de_ext = 12'b0; +reg de_ext_rdy = 1'b0; + +// s6 alu-reg-imm load/store +// s7 pc-rel-cond-branch +// s9 mov-imm +// s11 pc-rel-branch +// s12 ext + +// fields decoded from instruction +wire [15:0]ir_imm_s6_raw = { {11 {ir[15]}}, ir[14:10] }; +wire [15:0]ir_imm_s7 = { {10 {ir[15]}}, ir[6], ir[14:10] }; +wire [15:0]ir_imm_s9_raw = { {8 {ir[15]}}, ir[9:7], ir[14:10] }; +wire [15:0]ir_imm_s11 = { {6 {ir[15]}}, ir[8:4], ir[14:10] }; +wire [15:0]ir_imm_s12 = { {5 {ir[15]}}, ir[9:4], ir[14:10] }; +wire [2:0]ir_csel = ir[6:4]; +wire [2:0]ir_asel = ir[9:7]; +wire [2:0]ir_bsel = ir[12:10]; +wire [2:0]ir_alu_op = ir[3] ? ir[2:0] : ir[15:13]; +wire [3:0]ir_opcode = ir[3:0]; + +reg [11:0]ir_ext_imm = 12'b0; +reg ir_ext_rdy = 1'b0; + +wire [15:0]ir_imm_s6 = { (ir_ext_rdy ? ir_ext_imm : ir_imm_s6_raw[15:4]), ir_imm_s6_raw[3:0] }; +wire [15:0]ir_imm_s9 = { (ir_ext_rdy ? ir_ext_imm : ir_imm_s9_raw[15:4]), ir_imm_s9_raw[3:0] }; + +// control signals +reg do_wreg_alu; // write from alu +reg do_wreg_mem; // write from memory +reg do_adata_zero; // pass 0 to alu.xdata (instead of adata) +reg do_bdata_imm; // pass imm to alu.ydata (instead of bdata) +reg do_wr_link; // write PC + 1 to LR +reg do_branch_imm; // branch to imm +reg do_branch_reg; // branch to reg + imm +reg do_branch_cond; // branch if condition met +reg do_branch_zero; // condition zero(1) or notzero(0) +reg do_use_imm9_or_imm6; +reg do_mem_read; +reg do_mem_write; +reg do_set_ext; + +always_comb begin + do_wreg_alu = 1'b0; + do_wreg_mem = 1'b0; + do_adata_zero = 1'b0; + do_bdata_imm = 1'b0; + do_wr_link = 1'b0; + do_branch_imm = 1'b0; + do_branch_reg = 1'b0; + do_branch_cond = 1'b0; + do_branch_zero = 1'b0; + do_use_imm9_or_imm6 = 1'b0; + do_mem_read = 1'b0; + do_mem_write = 1'b0; + do_set_ext = 1'b0; + + casez (ir_opcode) + 4'b0000: begin // alu Rc, Ra, Rb + do_wreg_alu = 1'b1; + end + 4'b0001: begin // expansion (nop) + end + 4'b0010: begin // ext si12 + do_set_ext = 1'b1; + end + 4'b0011: begin // mov Rc, si9 + do_wreg_alu = 1'b1; + do_adata_zero = 1'b1; + do_bdata_imm = 1'b1; + do_use_imm9_or_imm6 = 1'b1; + end + 4'b0100: begin // lw Rc, [Ra, si6] + do_mem_read = 1'b1; + do_use_imm9_or_imm6 = 1'b0; + end + 4'b0101: begin // sw Rc, [Ra, si6] + do_mem_write = 1'b1; + do_use_imm9_or_imm6 = 1'b0; + end + 4'b0110: begin // b imm12 / bl imm12 + do_branch_imm = 1'b1; + do_wr_link = ir[9]; + end + 4'b0111: begin // BZ/BNZ/B/BL + if (ir[5]) begin // b Ra / bl Ra + do_branch_reg = 1'b1; + do_wr_link = ir[4]; + end else begin // bz imm7 / bnz imm7 + do_branch_cond = 1'b1; + do_branch_zero = ~ir[4]; + end + end + 4'b1???: begin // alu Rc, Ra, si6 + do_wreg_alu = 1'b1; + do_bdata_imm = 1'b1; + do_use_imm9_or_imm6 = 1'b0; + end + endcase +end + +always_ff @(posedge clk) begin + if (ir_valid) + ir_ext_rdy <= do_set_ext; + if (ir_valid & do_set_ext) + ir_ext_imm <= ir_imm_s12[11:0]; +end + +regs16 regs( + .clk(clk), + .asel(ir_asel), + .bsel(do_mem_write ? ir_csel : ir_bsel), + .wsel(ex_wsel), + .wreg(ex_do_wreg_alu), + .adata(ex_adata), + .bdata(ex_bdata), + .wdata(ex_alu_rdata) + ); + +// ---- EXECUTE ---- + +wire [15:0]ex_adata; +wire [15:0]ex_bdata; + +wire [15:0]ex_alu_rdata; + +reg [15:0]ex_branch_tgt = 16'b0; +reg [2:0]ex_alu_op = 3'b0; +reg [2:0]ex_wsel = 3'b0; +reg ex_do_wreg_alu = 1'b0; +reg ex_do_wreg_mem = 1'b0; +reg ex_do_adata_zero = 1'b0; +reg ex_do_bdata_imm = 1'b0; +reg ex_do_wr_link = 1'b0; +reg ex_do_branch_imm = 1'b0; +reg ex_do_branch_reg = 1'b0; +reg ex_do_branch_cond = 1'b0; +reg ex_do_branch_zero = 1'b0; +reg ex_do_mem_read = 1'b0; +reg ex_do_mem_write = 1'b0; + +reg [15:0]ex_imm = 16'b0; + +always_ff @(posedge clk) begin + // for mem-read or mem-write we use the ALU for Ra + imm7 + ex_alu_op <= (do_adata_zero | do_mem_read | do_mem_write) ? 3'b0 : ir_alu_op; + ex_wsel <= do_wr_link ? 3'd7 : ir_csel; + ex_branch_tgt <= pc + (do_branch_imm ? ir_imm_s11 : ir_imm_s7); + ex_do_wreg_alu <= do_wreg_alu; + ex_do_wreg_mem <= do_wreg_mem; + ex_do_adata_zero <= do_adata_zero; + ex_do_bdata_imm <= do_bdata_imm; + ex_do_wr_link <= do_wr_link; + ex_do_branch_imm <= do_branch_imm; + ex_do_branch_reg <= do_branch_reg; + ex_do_branch_cond <= do_branch_cond; + ex_do_branch_zero <= do_branch_zero; + ex_do_mem_read = do_mem_read; + ex_do_mem_write = do_mem_write; + ex_imm <= (do_mem_read | do_mem_write) ? ir_imm_s7 : (do_use_imm9_or_imm6 ? ir_imm_s9 : ir_imm_s6); +end + + +alu16 alu( + .op(ex_alu_op), + .xdata(ex_do_adata_zero ? 16'b0 : ex_adata), + .ydata((ex_do_mem_read | ex_do_mem_write | ex_do_bdata_imm) ? ex_imm : ex_bdata), + .rdata(ex_alu_rdata) + ); + +assign dat_rw_addr = ex_alu_rdata; +assign dat_wr_data = ex_bdata; +assign dat_rd_req = ex_do_mem_read; +assign dat_wr_req = ex_do_mem_write; + +// ---- SIMULATION DEBUG ASSIST ---- + +`ifdef verilator +reg [15:0]dbg_addr = 16'd0; +wire [47:0]ir_dbg_dis; +reg [47:0]ex_dbg_dis = 48'd0; + +assign ir_dbg_dis = { ir, 3'b0, ir_ext_rdy, ir_ext_imm, dbg_addr }; + +always_ff @(posedge clk) begin + dbg_addr <= pc; + ex_dbg_dis <= ir_dbg_dis; +end +`endif + +endmodule + +module regs16( + input clk, + input [2:0]asel, + input [2:0]bsel, + input [2:0]wsel, + input wreg, + input [15:0]wdata, + output [15:0]adata, + output [15:0]bdata + ); + +reg [15:0]rmem[0:7]; +reg [15:0]areg; +reg [15:0]breg; + +always_ff @(posedge clk) begin + if (wreg) + rmem[wsel] <= wdata; + areg <= rmem[asel]; + breg <= rmem[bsel]; +end + +assign adata = areg; +assign bdata = breg; + +endmodule + + +module alu16( + input [2:0]op, + input [15:0]xdata, + input [15:0]ydata, + output [15:0]rdata + ); + +always_comb begin + case (op) + 3'b000: rdata = xdata + ydata; + 3'b001: rdata = xdata - ydata; + 3'b010: rdata = xdata & ydata; + 3'b011: rdata = xdata | ydata; + 3'b100: rdata = xdata ^ ydata; + 3'b101: rdata = { {15 {1'b0}}, xdata < ydata }; + 3'b110: rdata = { {15 {1'b0}}, xdata >= ydata }; + 3'b111: rdata = xdata * ydata; + endcase +end +endmodule + diff --git a/hdl/ice40.v b/hdl/ice40.v @@ -31,26 +31,54 @@ pll_12_25 pll0( wire sys_clk = clk12m; -wire [15:0]cpu_waddr /* synthesis syn_keep=1 */; -wire [15:0]cpu_wdata /* synthesis syn_keep=1 */; -wire cpu_we /* synthesis syn_keep=1 */; -wire [15:0]cpu_raddr /* synthesis syn_keep=1 */; -wire [15:0]cpu_rdata /* synthesis syn_keep=1 */; -wire cpu_re /* synthesis syn_keep=1 */; - reg cpu_reset = 1'b0; -cpu #( - .RWIDTH(16), - .SWIDTH(4) - )cpu0( +// cpu memory interface +wire [15:0]ins_rd_addr; +wire [15:0]ins_rd_data; +wire ins_rd_req; + +wire [15:0]dat_rw_addr; +wire [15:0]dat_rd_data; +wire dat_rd_req; +wire [15:0]dat_wr_data; +wire dat_wr_req; + +// fake arbitration that never denies a request +reg ins_rd_rdy = 1'b0; +reg dat_rd_rdy = 1'b0; +reg dat_wr_rdy = 1'b0; + +always_ff @(posedge sys_clk) begin + if (cpu_reset) begin + ins_rd_rdy <= 1'b0; + dat_rd_rdy <= 1'b0; + dat_wr_rdy <= 1'b0; + end else begin + ins_rd_rdy <= ins_rd_req; + dat_rd_rdy <= dat_rd_req; + dat_wr_rdy <= dat_wr_req; + end +end + +// until arbitration works +assign dat_rd_data = 16'hEEEE; + +cpu16 cpu( .clk(sys_clk), - .mem_waddr_o(cpu_waddr), - .mem_wdata_o(cpu_wdata), - .mem_wr_o(cpu_we), - .mem_raddr_o(cpu_raddr), - .mem_rdata_i(cpu_rdata), - .mem_rd_o(cpu_re), + .ins_rd_addr(ins_rd_addr), + .ins_rd_data(ins_rd_data), + .ins_rd_req(ins_rd_req), + .ins_rd_rdy(ins_rd_rdy), + + .dat_rw_addr(dat_rw_addr), + .dat_wr_data(dat_wr_data), + .dat_rd_data(dat_rd_data), + .dat_rd_req(dat_rd_req), + .dat_rd_rdy(dat_rd_rdy), + .dat_wr_req(dat_wr_req), + .dat_wr_rdy(dat_wr_rdy), + .reset(cpu_reset) ) /* synthesis syn_keep=1 */; @@ -70,9 +98,9 @@ spi_debug_ifc sdi( ); // debug interface has priority over cpu writes -wire we = dbg_we | cpu_we; -wire [15:0]waddr = dbg_we ? dbg_waddr : cpu_waddr; -wire [15:0]wdata = dbg_we ? dbg_wdata : cpu_wdata; +wire we = dbg_we | dat_wr_req; +wire [15:0]waddr = dbg_we ? dbg_waddr : dat_rw_addr; +wire [15:0]wdata = dbg_we ? dbg_wdata : dat_wr_data; wire cs_sram = (waddr[15:12] == 4'h0); wire cs_vram = (waddr[15:12] == 4'h8); @@ -86,24 +114,24 @@ end //assign out1 = cpu_reset; //assign out2 = cpu_raddr[0]; -assign out1 = cpu_we; +assign out1 = dat_wr_req; assign out2 = dbg_we; -wire cs0r = ~cpu_raddr[8]; -wire cs1r = cpu_raddr[8]; +wire cs0r = ~ins_rd_addr[8]; +wire cs1r = ins_rd_addr[8]; wire cs0w = ~waddr[8]; wire cs1w = waddr[8]; wire [15:0]rdata0; wire [15:0]rdata1; -assign cpu_rdata = cs0r ? rdata0 : rdata1; +assign ins_rd_data = cs0r ? rdata0 : rdata1; sram ram0( .clk(sys_clk), - .raddr(cpu_raddr), + .raddr(ins_rd_addr), .rdata(rdata0), - .re(cpu_re & cs0r & cs_sram), + .re(ins_rd_req & cs0r & cs_sram), .waddr(waddr), .wdata(wdata), .we(we & cs0w & cs_sram) @@ -111,9 +139,9 @@ sram ram0( sram ram1( .clk(sys_clk), - .raddr(cpu_raddr), + .raddr(ins_rd_addr), .rdata(rdata1), - .re(cpu_re & cs1r & cs_sram), + .re(ins_rd_req & cs1r & cs_sram), .waddr(waddr), .wdata(wdata), .we(we & cs1w & cs_sram) diff --git a/hdl/simram.sv b/hdl/simram.sv @@ -17,15 +17,24 @@ module simram( ); wire [31:0]rawdata; + wire [31:0]junk; - always @(posedge clk) begin + // hack: this should be posedge but if we do that + // then the dpi_mem_write() happens too early + always @(negedge clk) begin if (we) begin $display(":WRI %08x %08x", waddr, wdata); dpi_mem_write({16'd0, waddr}, {16'd0, wdata}); end + end + always @(posedge clk) begin if (re) begin dpi_mem_read({16'd0, raddr}, rawdata); rdata <= rawdata[15:0]; + end else begin + //junk = $random(); + //rdata <= junk[15:0]; + rdata <= 16'hEEEE; end end endmodule diff --git a/hdl/testbench.sv b/hdl/testbench.sv @@ -1,4 +1,4 @@ -// Copyright 2015, Brian Swetland <swetland@frotz.net> +// Copyright 2018, Brian Swetland <swetland@frotz.net> // Licensed under the Apache License, Version 2.0. `timescale 1ns / 1ps @@ -8,52 +8,86 @@ module testbench( ); reg [15:0]count = 16'd0; +reg reset = 1'b0; + +reg burp = 1'b0; always @(posedge clk) begin count <= count + 16'd1; + burp <= (count >= 16'd0010) && (count <= 16'd0012) ? 1'b1 : 1'b0; + if (count == 16'd0005) reset <= 1'b0; if (count == 16'd1000) $finish; - if (cpu0.ir == 16'hFFFF) begin - for ( integer i = 0; i < 16; i++ ) begin - $display(":REG R%0d %8X", i, cpu0.regs.R[i]); + if (cpu.ir == 16'hFFFF) begin + for ( integer i = 0; i < 8; i++ ) begin + $display(":REG R%0d %8X", i, cpu.regs.rmem[i]); end $display(":END"); $finish; end end -wire [15:0]wdata; -wire [15:0]waddr; -wire [15:0]raddr; -wire [15:0]rdata; -wire wr; -wire rd; +wire [15:0]ins_rd_addr; +wire [15:0]ins_rd_data; +wire ins_rd_req; + +wire [15:0]dat_rw_addr; +wire [15:0]dat_rd_data; +wire dat_rd_req; +wire [15:0]dat_wr_data; +wire dat_wr_req; + +reg ins_rd_rdy = 1'b0; +reg dat_rd_rdy = 1'b0; +reg dat_wr_rdy = 1'b0; + +always_ff @(posedge clk) begin + if (reset) begin + ins_rd_rdy <= 1'b0; + dat_rd_rdy <= 1'b0; + dat_wr_rdy <= 1'b0; + end else begin + ins_rd_rdy <= ins_rd_req; + dat_rd_rdy <= dat_rd_req; + dat_wr_rdy <= dat_wr_req; + end +end -simram dram( +simram ins_ram( .clk(clk), - .waddr(waddr), - .wdata(wdata), - .we(wr), - .raddr(raddr), - .rdata(rdata), - .re(rd) + .waddr(16'd0), + .wdata(16'd0), + .we(1'd0), + .raddr(ins_rd_addr), + .rdata(ins_rd_data), + .re(1'd1) ); -cpu -`ifdef BIGCPU - #( - .RWIDTH(32), - .SWIDTH(5) - ) -`endif - cpu0( +simram dat_ram( .clk(clk), - .mem_raddr_o(raddr), - .mem_rdata_i(rdata), - .mem_waddr_o(waddr), - .mem_wdata_o(wdata), - .mem_wr_o(wr), - .mem_rd_o(rd), - .reset(1'b0) + .waddr(dat_rw_addr), + .wdata(dat_wr_data), + .we(dat_wr_req), + .raddr(dat_rw_addr), + .rdata(dat_rd_data), + .re(dat_rd_req) + ); + +cpu16 cpu( + .clk(clk), + .ins_rd_addr(ins_rd_addr), + .ins_rd_data(burp ? 16'hEEEE : ins_rd_data), + .ins_rd_req(ins_rd_req), + .ins_rd_rdy(ins_rd_rdy & ~burp), + + .dat_rw_addr(dat_rw_addr), + .dat_wr_data(dat_wr_data), + .dat_rd_data(dat_rd_data), + .dat_rd_req(dat_rd_req), + .dat_rd_rdy(dat_rd_rdy), + .dat_wr_req(dat_wr_req), + .dat_wr_rdy(dat_wr_rdy), + + .reset(reset) ); endmodule