commit 8caf74ca1c7a04da4e0d0225ec6e313e89d68ddb
parent ede20da576e18b5d89e9144eecec397d23a972e6
Author: Brian Swetland <swetland@frotz.net>
Date: Sat, 24 Nov 2018 22:46:22 -0800
cpu16: cpu16v4 work in progress
- fetch / decode / execute stages working
- memory write working
- 3 register alu ops work
- 2 register + immediate alu ops work
- wide unconditional branches work (with 2 delay slots)
- extended immediate opcode works
TODO
- memory read
- conditional branches
- register branches
- branch-with-link
- handle data hazards
Diffstat:
M | Makefile | | | 19 | ++++++++++--------- |
A | hdl/cpu16.sv | | | 299 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | hdl/ice40.v | | | 84 | +++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------- |
M | hdl/simram.sv | | | 11 | ++++++++++- |
M | hdl/testbench.sv | | | 98 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
5 files changed, 441 insertions(+), 70 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,5 +1,6 @@
-CPU_SRCS := hdl/cpu/cpu.v hdl/cpu/alu.v hdl/cpu/regfile.v
+#CPU_SRCS := hdl/cpu/cpu.v hdl/cpu/alu.v hdl/cpu/regfile.v
+CPU_SRCS := hdl/cpu16.sv
VGA_SRCS := hdl/vga/vga40x30x2.v hdl/vga/vga.v hdl/vga/videoram.v hdl/vga/chardata.v
@@ -72,22 +73,22 @@ out/ice40.asc: out/ice40.blif
$(ARACHNEPNR) -d 5k -p sg48 -o out/ice40.asc -p hdl/ice40up.pcf out/ice40.blif 2>&1 | tee out/ice40.pnr.log
endif
-run: out/cpu/Vtestbench out/test.hex
- ./out/cpu/Vtestbench -trace out/trace.vcd -dump out/memory.bin -load out/test.hex
+run: out/cpu/Vtestbench out/test16.hex
+ ./out/cpu/Vtestbench -trace out/trace.vcd -dump out/memory.bin -load out/test16.hex
-out/test.hex: src/test.s out/a16 out/d16
- out/a16 src/test.s out/test.hex
+out/test16.hex: src/test16.s out/a16 out/d16
+ out/a16 src/test16.s out/test16.hex
#out/test.hex: test.hex
# cp test.hex out/test.hex
-out/a16: src/a16.c src/d16.c
+out/a16: src/a16v4.c src/d16v4.c
@mkdir -p out
- gcc -g -Wall -O1 -o out/a16 src/a16.c src/d16.c
+ gcc -g -Wall -O1 -o out/a16 src/a16v4.c src/d16v4.c
-out/d16: src/d16.c
+out/d16: src/d16v4.c
@mkdir -p out
- gcc -g -Wall -O1 -o out/d16 -DSTANDALONE=1 src/d16.c
+ gcc -g -Wall -O1 -o out/d16 -DSTANDALONE=1 src/d16v4.c
out/icetool: src/icetool.c src/ftdi.c src/ftdi.h
@mkdir -p out
diff --git a/hdl/cpu16.sv b/hdl/cpu16.sv
@@ -0,0 +1,299 @@
+// Copyright 2015, Brian Swetland <swetland@frotz.net>
+// Licensed under the Apache License, Version 2.0.
+
+`timescale 1ns / 1ps
+
+module cpu16(
+ input clk,
+ output [15:0]ins_rd_addr,
+ input [15:0]ins_rd_data,
+ output ins_rd_req,
+ input ins_rd_rdy,
+
+ output [15:0]dat_rw_addr,
+ output [15:0]dat_wr_data,
+ input [15:0]dat_rd_data,
+ output dat_rd_req,
+ output dat_wr_req,
+ input dat_rd_rdy,
+ input dat_wr_rdy,
+
+ input reset
+ );
+
+localparam INS_NOP = 16'h0001;
+
+// ---- FETCH ----
+reg [15:0]pc_next;
+reg [15:0]ir_next;
+reg ir_valid_next;
+
+reg [15:0]pc = 16'd0;
+reg [15:0]ir = 16'd0;
+reg ir_valid = 1'b0;
+
+assign ins_rd_addr = pc_next;
+assign ins_rd_req = 1'b1;
+
+always_comb begin
+ if (reset) begin
+ pc_next = 16'h0000;
+ end else if (ex_do_branch_imm) begin
+ pc_next = ex_branch_tgt;
+ end else if (ins_rd_rdy) begin
+ pc_next = pc + 16'h0001;
+ end else begin
+ pc_next = pc;
+ end
+ ir_next = ins_rd_rdy ? ins_rd_data : INS_NOP;
+ ir_valid_next = ins_rd_rdy;
+end
+
+always_ff @(posedge clk) begin
+ pc <= pc_next;
+ ir <= ir_next;
+ ir_valid <= ir_valid_next;
+end
+
+// ---- DECODE ----
+reg [11:0]de_ext = 12'b0;
+reg de_ext_rdy = 1'b0;
+
+// s6 alu-reg-imm load/store
+// s7 pc-rel-cond-branch
+// s9 mov-imm
+// s11 pc-rel-branch
+// s12 ext
+
+// fields decoded from instruction
+wire [15:0]ir_imm_s6_raw = { {11 {ir[15]}}, ir[14:10] };
+wire [15:0]ir_imm_s7 = { {10 {ir[15]}}, ir[6], ir[14:10] };
+wire [15:0]ir_imm_s9_raw = { {8 {ir[15]}}, ir[9:7], ir[14:10] };
+wire [15:0]ir_imm_s11 = { {6 {ir[15]}}, ir[8:4], ir[14:10] };
+wire [15:0]ir_imm_s12 = { {5 {ir[15]}}, ir[9:4], ir[14:10] };
+wire [2:0]ir_csel = ir[6:4];
+wire [2:0]ir_asel = ir[9:7];
+wire [2:0]ir_bsel = ir[12:10];
+wire [2:0]ir_alu_op = ir[3] ? ir[2:0] : ir[15:13];
+wire [3:0]ir_opcode = ir[3:0];
+
+reg [11:0]ir_ext_imm = 12'b0;
+reg ir_ext_rdy = 1'b0;
+
+wire [15:0]ir_imm_s6 = { (ir_ext_rdy ? ir_ext_imm : ir_imm_s6_raw[15:4]), ir_imm_s6_raw[3:0] };
+wire [15:0]ir_imm_s9 = { (ir_ext_rdy ? ir_ext_imm : ir_imm_s9_raw[15:4]), ir_imm_s9_raw[3:0] };
+
+// control signals
+reg do_wreg_alu; // write from alu
+reg do_wreg_mem; // write from memory
+reg do_adata_zero; // pass 0 to alu.xdata (instead of adata)
+reg do_bdata_imm; // pass imm to alu.ydata (instead of bdata)
+reg do_wr_link; // write PC + 1 to LR
+reg do_branch_imm; // branch to imm
+reg do_branch_reg; // branch to reg + imm
+reg do_branch_cond; // branch if condition met
+reg do_branch_zero; // condition zero(1) or notzero(0)
+reg do_use_imm9_or_imm6;
+reg do_mem_read;
+reg do_mem_write;
+reg do_set_ext;
+
+always_comb begin
+ do_wreg_alu = 1'b0;
+ do_wreg_mem = 1'b0;
+ do_adata_zero = 1'b0;
+ do_bdata_imm = 1'b0;
+ do_wr_link = 1'b0;
+ do_branch_imm = 1'b0;
+ do_branch_reg = 1'b0;
+ do_branch_cond = 1'b0;
+ do_branch_zero = 1'b0;
+ do_use_imm9_or_imm6 = 1'b0;
+ do_mem_read = 1'b0;
+ do_mem_write = 1'b0;
+ do_set_ext = 1'b0;
+
+ casez (ir_opcode)
+ 4'b0000: begin // alu Rc, Ra, Rb
+ do_wreg_alu = 1'b1;
+ end
+ 4'b0001: begin // expansion (nop)
+ end
+ 4'b0010: begin // ext si12
+ do_set_ext = 1'b1;
+ end
+ 4'b0011: begin // mov Rc, si9
+ do_wreg_alu = 1'b1;
+ do_adata_zero = 1'b1;
+ do_bdata_imm = 1'b1;
+ do_use_imm9_or_imm6 = 1'b1;
+ end
+ 4'b0100: begin // lw Rc, [Ra, si6]
+ do_mem_read = 1'b1;
+ do_use_imm9_or_imm6 = 1'b0;
+ end
+ 4'b0101: begin // sw Rc, [Ra, si6]
+ do_mem_write = 1'b1;
+ do_use_imm9_or_imm6 = 1'b0;
+ end
+ 4'b0110: begin // b imm12 / bl imm12
+ do_branch_imm = 1'b1;
+ do_wr_link = ir[9];
+ end
+ 4'b0111: begin // BZ/BNZ/B/BL
+ if (ir[5]) begin // b Ra / bl Ra
+ do_branch_reg = 1'b1;
+ do_wr_link = ir[4];
+ end else begin // bz imm7 / bnz imm7
+ do_branch_cond = 1'b1;
+ do_branch_zero = ~ir[4];
+ end
+ end
+ 4'b1???: begin // alu Rc, Ra, si6
+ do_wreg_alu = 1'b1;
+ do_bdata_imm = 1'b1;
+ do_use_imm9_or_imm6 = 1'b0;
+ end
+ endcase
+end
+
+always_ff @(posedge clk) begin
+ if (ir_valid)
+ ir_ext_rdy <= do_set_ext;
+ if (ir_valid & do_set_ext)
+ ir_ext_imm <= ir_imm_s12[11:0];
+end
+
+regs16 regs(
+ .clk(clk),
+ .asel(ir_asel),
+ .bsel(do_mem_write ? ir_csel : ir_bsel),
+ .wsel(ex_wsel),
+ .wreg(ex_do_wreg_alu),
+ .adata(ex_adata),
+ .bdata(ex_bdata),
+ .wdata(ex_alu_rdata)
+ );
+
+// ---- EXECUTE ----
+
+wire [15:0]ex_adata;
+wire [15:0]ex_bdata;
+
+wire [15:0]ex_alu_rdata;
+
+reg [15:0]ex_branch_tgt = 16'b0;
+reg [2:0]ex_alu_op = 3'b0;
+reg [2:0]ex_wsel = 3'b0;
+reg ex_do_wreg_alu = 1'b0;
+reg ex_do_wreg_mem = 1'b0;
+reg ex_do_adata_zero = 1'b0;
+reg ex_do_bdata_imm = 1'b0;
+reg ex_do_wr_link = 1'b0;
+reg ex_do_branch_imm = 1'b0;
+reg ex_do_branch_reg = 1'b0;
+reg ex_do_branch_cond = 1'b0;
+reg ex_do_branch_zero = 1'b0;
+reg ex_do_mem_read = 1'b0;
+reg ex_do_mem_write = 1'b0;
+
+reg [15:0]ex_imm = 16'b0;
+
+always_ff @(posedge clk) begin
+ // for mem-read or mem-write we use the ALU for Ra + imm7
+ ex_alu_op <= (do_adata_zero | do_mem_read | do_mem_write) ? 3'b0 : ir_alu_op;
+ ex_wsel <= do_wr_link ? 3'd7 : ir_csel;
+ ex_branch_tgt <= pc + (do_branch_imm ? ir_imm_s11 : ir_imm_s7);
+ ex_do_wreg_alu <= do_wreg_alu;
+ ex_do_wreg_mem <= do_wreg_mem;
+ ex_do_adata_zero <= do_adata_zero;
+ ex_do_bdata_imm <= do_bdata_imm;
+ ex_do_wr_link <= do_wr_link;
+ ex_do_branch_imm <= do_branch_imm;
+ ex_do_branch_reg <= do_branch_reg;
+ ex_do_branch_cond <= do_branch_cond;
+ ex_do_branch_zero <= do_branch_zero;
+ ex_do_mem_read = do_mem_read;
+ ex_do_mem_write = do_mem_write;
+ ex_imm <= (do_mem_read | do_mem_write) ? ir_imm_s7 : (do_use_imm9_or_imm6 ? ir_imm_s9 : ir_imm_s6);
+end
+
+
+alu16 alu(
+ .op(ex_alu_op),
+ .xdata(ex_do_adata_zero ? 16'b0 : ex_adata),
+ .ydata((ex_do_mem_read | ex_do_mem_write | ex_do_bdata_imm) ? ex_imm : ex_bdata),
+ .rdata(ex_alu_rdata)
+ );
+
+assign dat_rw_addr = ex_alu_rdata;
+assign dat_wr_data = ex_bdata;
+assign dat_rd_req = ex_do_mem_read;
+assign dat_wr_req = ex_do_mem_write;
+
+// ---- SIMULATION DEBUG ASSIST ----
+
+`ifdef verilator
+reg [15:0]dbg_addr = 16'd0;
+wire [47:0]ir_dbg_dis;
+reg [47:0]ex_dbg_dis = 48'd0;
+
+assign ir_dbg_dis = { ir, 3'b0, ir_ext_rdy, ir_ext_imm, dbg_addr };
+
+always_ff @(posedge clk) begin
+ dbg_addr <= pc;
+ ex_dbg_dis <= ir_dbg_dis;
+end
+`endif
+
+endmodule
+
+module regs16(
+ input clk,
+ input [2:0]asel,
+ input [2:0]bsel,
+ input [2:0]wsel,
+ input wreg,
+ input [15:0]wdata,
+ output [15:0]adata,
+ output [15:0]bdata
+ );
+
+reg [15:0]rmem[0:7];
+reg [15:0]areg;
+reg [15:0]breg;
+
+always_ff @(posedge clk) begin
+ if (wreg)
+ rmem[wsel] <= wdata;
+ areg <= rmem[asel];
+ breg <= rmem[bsel];
+end
+
+assign adata = areg;
+assign bdata = breg;
+
+endmodule
+
+
+module alu16(
+ input [2:0]op,
+ input [15:0]xdata,
+ input [15:0]ydata,
+ output [15:0]rdata
+ );
+
+always_comb begin
+ case (op)
+ 3'b000: rdata = xdata + ydata;
+ 3'b001: rdata = xdata - ydata;
+ 3'b010: rdata = xdata & ydata;
+ 3'b011: rdata = xdata | ydata;
+ 3'b100: rdata = xdata ^ ydata;
+ 3'b101: rdata = { {15 {1'b0}}, xdata < ydata };
+ 3'b110: rdata = { {15 {1'b0}}, xdata >= ydata };
+ 3'b111: rdata = xdata * ydata;
+ endcase
+end
+endmodule
+
diff --git a/hdl/ice40.v b/hdl/ice40.v
@@ -31,26 +31,54 @@ pll_12_25 pll0(
wire sys_clk = clk12m;
-wire [15:0]cpu_waddr /* synthesis syn_keep=1 */;
-wire [15:0]cpu_wdata /* synthesis syn_keep=1 */;
-wire cpu_we /* synthesis syn_keep=1 */;
-wire [15:0]cpu_raddr /* synthesis syn_keep=1 */;
-wire [15:0]cpu_rdata /* synthesis syn_keep=1 */;
-wire cpu_re /* synthesis syn_keep=1 */;
-
reg cpu_reset = 1'b0;
-cpu #(
- .RWIDTH(16),
- .SWIDTH(4)
- )cpu0(
+// cpu memory interface
+wire [15:0]ins_rd_addr;
+wire [15:0]ins_rd_data;
+wire ins_rd_req;
+
+wire [15:0]dat_rw_addr;
+wire [15:0]dat_rd_data;
+wire dat_rd_req;
+wire [15:0]dat_wr_data;
+wire dat_wr_req;
+
+// fake arbitration that never denies a request
+reg ins_rd_rdy = 1'b0;
+reg dat_rd_rdy = 1'b0;
+reg dat_wr_rdy = 1'b0;
+
+always_ff @(posedge sys_clk) begin
+ if (cpu_reset) begin
+ ins_rd_rdy <= 1'b0;
+ dat_rd_rdy <= 1'b0;
+ dat_wr_rdy <= 1'b0;
+ end else begin
+ ins_rd_rdy <= ins_rd_req;
+ dat_rd_rdy <= dat_rd_req;
+ dat_wr_rdy <= dat_wr_req;
+ end
+end
+
+// until arbitration works
+assign dat_rd_data = 16'hEEEE;
+
+cpu16 cpu(
.clk(sys_clk),
- .mem_waddr_o(cpu_waddr),
- .mem_wdata_o(cpu_wdata),
- .mem_wr_o(cpu_we),
- .mem_raddr_o(cpu_raddr),
- .mem_rdata_i(cpu_rdata),
- .mem_rd_o(cpu_re),
+ .ins_rd_addr(ins_rd_addr),
+ .ins_rd_data(ins_rd_data),
+ .ins_rd_req(ins_rd_req),
+ .ins_rd_rdy(ins_rd_rdy),
+
+ .dat_rw_addr(dat_rw_addr),
+ .dat_wr_data(dat_wr_data),
+ .dat_rd_data(dat_rd_data),
+ .dat_rd_req(dat_rd_req),
+ .dat_rd_rdy(dat_rd_rdy),
+ .dat_wr_req(dat_wr_req),
+ .dat_wr_rdy(dat_wr_rdy),
+
.reset(cpu_reset)
) /* synthesis syn_keep=1 */;
@@ -70,9 +98,9 @@ spi_debug_ifc sdi(
);
// debug interface has priority over cpu writes
-wire we = dbg_we | cpu_we;
-wire [15:0]waddr = dbg_we ? dbg_waddr : cpu_waddr;
-wire [15:0]wdata = dbg_we ? dbg_wdata : cpu_wdata;
+wire we = dbg_we | dat_wr_req;
+wire [15:0]waddr = dbg_we ? dbg_waddr : dat_rw_addr;
+wire [15:0]wdata = dbg_we ? dbg_wdata : dat_wr_data;
wire cs_sram = (waddr[15:12] == 4'h0);
wire cs_vram = (waddr[15:12] == 4'h8);
@@ -86,24 +114,24 @@ end
//assign out1 = cpu_reset;
//assign out2 = cpu_raddr[0];
-assign out1 = cpu_we;
+assign out1 = dat_wr_req;
assign out2 = dbg_we;
-wire cs0r = ~cpu_raddr[8];
-wire cs1r = cpu_raddr[8];
+wire cs0r = ~ins_rd_addr[8];
+wire cs1r = ins_rd_addr[8];
wire cs0w = ~waddr[8];
wire cs1w = waddr[8];
wire [15:0]rdata0;
wire [15:0]rdata1;
-assign cpu_rdata = cs0r ? rdata0 : rdata1;
+assign ins_rd_data = cs0r ? rdata0 : rdata1;
sram ram0(
.clk(sys_clk),
- .raddr(cpu_raddr),
+ .raddr(ins_rd_addr),
.rdata(rdata0),
- .re(cpu_re & cs0r & cs_sram),
+ .re(ins_rd_req & cs0r & cs_sram),
.waddr(waddr),
.wdata(wdata),
.we(we & cs0w & cs_sram)
@@ -111,9 +139,9 @@ sram ram0(
sram ram1(
.clk(sys_clk),
- .raddr(cpu_raddr),
+ .raddr(ins_rd_addr),
.rdata(rdata1),
- .re(cpu_re & cs1r & cs_sram),
+ .re(ins_rd_req & cs1r & cs_sram),
.waddr(waddr),
.wdata(wdata),
.we(we & cs1w & cs_sram)
diff --git a/hdl/simram.sv b/hdl/simram.sv
@@ -17,15 +17,24 @@ module simram(
);
wire [31:0]rawdata;
+ wire [31:0]junk;
- always @(posedge clk) begin
+ // hack: this should be posedge but if we do that
+ // then the dpi_mem_write() happens too early
+ always @(negedge clk) begin
if (we) begin
$display(":WRI %08x %08x", waddr, wdata);
dpi_mem_write({16'd0, waddr}, {16'd0, wdata});
end
+ end
+ always @(posedge clk) begin
if (re) begin
dpi_mem_read({16'd0, raddr}, rawdata);
rdata <= rawdata[15:0];
+ end else begin
+ //junk = $random();
+ //rdata <= junk[15:0];
+ rdata <= 16'hEEEE;
end
end
endmodule
diff --git a/hdl/testbench.sv b/hdl/testbench.sv
@@ -1,4 +1,4 @@
-// Copyright 2015, Brian Swetland <swetland@frotz.net>
+// Copyright 2018, Brian Swetland <swetland@frotz.net>
// Licensed under the Apache License, Version 2.0.
`timescale 1ns / 1ps
@@ -8,52 +8,86 @@ module testbench(
);
reg [15:0]count = 16'd0;
+reg reset = 1'b0;
+
+reg burp = 1'b0;
always @(posedge clk) begin
count <= count + 16'd1;
+ burp <= (count >= 16'd0010) && (count <= 16'd0012) ? 1'b1 : 1'b0;
+ if (count == 16'd0005) reset <= 1'b0;
if (count == 16'd1000) $finish;
- if (cpu0.ir == 16'hFFFF) begin
- for ( integer i = 0; i < 16; i++ ) begin
- $display(":REG R%0d %8X", i, cpu0.regs.R[i]);
+ if (cpu.ir == 16'hFFFF) begin
+ for ( integer i = 0; i < 8; i++ ) begin
+ $display(":REG R%0d %8X", i, cpu.regs.rmem[i]);
end
$display(":END");
$finish;
end
end
-wire [15:0]wdata;
-wire [15:0]waddr;
-wire [15:0]raddr;
-wire [15:0]rdata;
-wire wr;
-wire rd;
+wire [15:0]ins_rd_addr;
+wire [15:0]ins_rd_data;
+wire ins_rd_req;
+
+wire [15:0]dat_rw_addr;
+wire [15:0]dat_rd_data;
+wire dat_rd_req;
+wire [15:0]dat_wr_data;
+wire dat_wr_req;
+
+reg ins_rd_rdy = 1'b0;
+reg dat_rd_rdy = 1'b0;
+reg dat_wr_rdy = 1'b0;
+
+always_ff @(posedge clk) begin
+ if (reset) begin
+ ins_rd_rdy <= 1'b0;
+ dat_rd_rdy <= 1'b0;
+ dat_wr_rdy <= 1'b0;
+ end else begin
+ ins_rd_rdy <= ins_rd_req;
+ dat_rd_rdy <= dat_rd_req;
+ dat_wr_rdy <= dat_wr_req;
+ end
+end
-simram dram(
+simram ins_ram(
.clk(clk),
- .waddr(waddr),
- .wdata(wdata),
- .we(wr),
- .raddr(raddr),
- .rdata(rdata),
- .re(rd)
+ .waddr(16'd0),
+ .wdata(16'd0),
+ .we(1'd0),
+ .raddr(ins_rd_addr),
+ .rdata(ins_rd_data),
+ .re(1'd1)
);
-cpu
-`ifdef BIGCPU
- #(
- .RWIDTH(32),
- .SWIDTH(5)
- )
-`endif
- cpu0(
+simram dat_ram(
.clk(clk),
- .mem_raddr_o(raddr),
- .mem_rdata_i(rdata),
- .mem_waddr_o(waddr),
- .mem_wdata_o(wdata),
- .mem_wr_o(wr),
- .mem_rd_o(rd),
- .reset(1'b0)
+ .waddr(dat_rw_addr),
+ .wdata(dat_wr_data),
+ .we(dat_wr_req),
+ .raddr(dat_rw_addr),
+ .rdata(dat_rd_data),
+ .re(dat_rd_req)
+ );
+
+cpu16 cpu(
+ .clk(clk),
+ .ins_rd_addr(ins_rd_addr),
+ .ins_rd_data(burp ? 16'hEEEE : ins_rd_data),
+ .ins_rd_req(ins_rd_req),
+ .ins_rd_rdy(ins_rd_rdy & ~burp),
+
+ .dat_rw_addr(dat_rw_addr),
+ .dat_wr_data(dat_wr_data),
+ .dat_rd_data(dat_rd_data),
+ .dat_rd_req(dat_rd_req),
+ .dat_rd_rdy(dat_rd_rdy),
+ .dat_wr_req(dat_wr_req),
+ .dat_wr_rdy(dat_wr_rdy),
+
+ .reset(reset)
);
endmodule