From 1f503b2d8018f758bb37f5789b1becc9092d16af Mon Sep 17 00:00:00 2001 From: Byron Lathi Date: Thu, 21 Sep 2023 20:35:52 -0700 Subject: [PATCH] update sim environment --- hw/efinix_fpga/simulation/Makefile | 23 +- .../super6502_sdram_controller_define.vh | 4 +- .../simulation/interrupt_controller_tb.sv | 76 - .../simulation/spi_controller_tb.sv | 102 -- .../simulation/{ => src}/generic_sdr.v | 2 +- .../simulation/{ => src}/sim_top.sv | 2 - .../simulation/src/verilog-6502/ALU.v | 108 ++ .../simulation/src/verilog-6502/README.md | 69 + .../simulation/src/verilog-6502/cpu.v | 1220 ++++++++++++++ .../simulation/src/verilog-6502/cpu_65c02.v | 1418 +++++++++++++++++ hw/efinix_fpga/simulation/timer_tb.sv | 75 - hw/efinix_fpga/simulation/verilog-6502 | 1 - hw/efinix_fpga/src/sdram_adapter.sv | 5 +- 13 files changed, 2831 insertions(+), 274 deletions(-) rename hw/efinix_fpga/simulation/{ => include}/super6502_sdram_controller_define.vh (90%) delete mode 100644 hw/efinix_fpga/simulation/interrupt_controller_tb.sv delete mode 100644 hw/efinix_fpga/simulation/spi_controller_tb.sv rename hw/efinix_fpga/simulation/{ => src}/generic_sdr.v (97%) rename hw/efinix_fpga/simulation/{ => src}/sim_top.sv (68%) create mode 100755 hw/efinix_fpga/simulation/src/verilog-6502/ALU.v create mode 100644 hw/efinix_fpga/simulation/src/verilog-6502/README.md create mode 100644 hw/efinix_fpga/simulation/src/verilog-6502/cpu.v create mode 100644 hw/efinix_fpga/simulation/src/verilog-6502/cpu_65c02.v delete mode 100644 hw/efinix_fpga/simulation/timer_tb.sv delete mode 160000 hw/efinix_fpga/simulation/verilog-6502 diff --git a/hw/efinix_fpga/simulation/Makefile b/hw/efinix_fpga/simulation/Makefile index c3b0f2e..bd842ad 100644 --- a/hw/efinix_fpga/simulation/Makefile +++ b/hw/efinix_fpga/simulation/Makefile @@ -1,20 +1,17 @@ -TARGETS= timer interrupt_controller spi_controller -TB=$(patsubst %, %_tb.sv, $(TARGETS)) +SRCS=$(shell find src/ -type f -name "*.*v") +SRCS+=$(shell find ../ip/ -type f -name "*.*v" -not \( -name "*tmpl*" \)) +SRCS+=$(shell find ../src/ -type f -name "*.*v") -all: $(TARGETS) +INC=$(shell find include/ -type f) -timer: timer_tb.sv - iverilog -g2005-sv -s sim -o $@ $@_tb.sv ../$@.sv +#TODO implement something like sources.list -spi_controller: spi_controller_tb.sv ../spi_controller.sv - iverilog -g2005-sv -s sim -o $@ $@_tb.sv ../$@.sv +TOP_MODULE=sim_top +TARGET=sim_top -interrupt_controller: interrupt_controller_tb.sv - iverilog -g2005-sv -s sim -o $@ $@_tb.sv ../$@.sv +all: + iverilog -g2005-sv -s $(TOP_MODULE) -o $(TARGET) $(INC) $(SRCS) .PHONY: clean - clean: - rm -f $(TARGETS) - rm -f *.vcd - rm -f *.vvp \ No newline at end of file + rm -rf $(TARGET) \ No newline at end of file diff --git a/hw/efinix_fpga/simulation/super6502_sdram_controller_define.vh b/hw/efinix_fpga/simulation/include/super6502_sdram_controller_define.vh similarity index 90% rename from hw/efinix_fpga/simulation/super6502_sdram_controller_define.vh rename to hw/efinix_fpga/simulation/include/super6502_sdram_controller_define.vh index a36208b..077f158 100644 --- a/hw/efinix_fpga/simulation/super6502_sdram_controller_define.vh +++ b/hw/efinix_fpga/simulation/include/super6502_sdram_controller_define.vh @@ -5,10 +5,10 @@ localparam tIORT_u = 2; localparam BL = 1; localparam DDIO_TYPE = "SOFT"; localparam DQ_WIDTH = 8; -localparam DQ_GROUP = 4; +localparam DQ_GROUP = 2; localparam BA_WIDTH = 2; localparam ROW_WIDTH = 13; -localparam COL_WIDTH = 10; +localparam COL_WIDTH = 9; localparam tPWRUP = 200000; localparam tRAS = 44; localparam tRC = 66; diff --git a/hw/efinix_fpga/simulation/interrupt_controller_tb.sv b/hw/efinix_fpga/simulation/interrupt_controller_tb.sv deleted file mode 100644 index 7cd41e8..0000000 --- a/hw/efinix_fpga/simulation/interrupt_controller_tb.sv +++ /dev/null @@ -1,76 +0,0 @@ -module sim(); - -timeunit 10ns; -timeprecision 1ns; - -logic clk; -logic reset; -logic [2:0] addr; -logic [7:0] i_data; -logic [7:0] o_data; -logic cs; -logic rwb; - -logic irqb_master; -logic irqb0, irqb1, irqb2, irqb3, irqb4, irqb5, irqb6, irqb7; - -interrupt_controller dut( - .*); - -always #100 clk = clk === 1'b0; - -task write_reg(input logic [2:0] _addr, input logic [7:0] _data); - @(negedge clk); - cs <= '1; - addr <= _addr; - rwb <= '0; - i_data <= '1; - @(posedge clk); - i_data <= _data; - @(negedge clk); - cs <= '0; - rwb <= '1; -endtask - -task read_reg(input logic [2:0] _addr, output logic [7:0] _data); - @(negedge clk); - cs <= '1; - addr <= _addr; - rwb <= '1; - i_data <= '1; - @(posedge clk); - _data <= o_data; - @(negedge clk); - cs <= '0; - rwb <= '1; -endtask - -initial -begin - $dumpfile("interrupt_controller.vcd"); - $dumpvars(0,sim); -end - -initial begin - reset <= '1; - irqb0 <= '1; - irqb1 <= '1; - irqb2 <= '1; - irqb3 <= '1; - irqb4 <= '1; - irqb5 <= '1; - irqb6 <= '1; - irqb7 <= '1; - repeat(5) @(posedge clk); - reset <= '0; - - repeat(5) @(posedge clk); - - irqb0 <= '0; - - repeat(5) @(posedge clk); - - $finish(); -end - -endmodule diff --git a/hw/efinix_fpga/simulation/spi_controller_tb.sv b/hw/efinix_fpga/simulation/spi_controller_tb.sv deleted file mode 100644 index ad20da4..0000000 --- a/hw/efinix_fpga/simulation/spi_controller_tb.sv +++ /dev/null @@ -1,102 +0,0 @@ -module sim(); - -timeunit 10ns; -timeprecision 1ns; - -logic clk_50; - -logic i_clk; -logic i_rst; - -logic i_cs; -logic i_rwb; -logic [1:0] i_addr; -logic [7:0] i_data; -logic [7:0] o_data; - -logic o_spi_cs; -logic o_spi_clk; -logic o_spi_mosi; -logic i_spi_miso; - -spi_controller dut(.*); - -always #1 clk_50 = clk_50 === 1'b0; -always #100 i_clk = i_clk === 1'b0; - -task write_reg(input logic [2:0] _addr, input logic [7:0] _data); - @(negedge i_clk); - i_cs <= '1; - i_addr <= _addr; - i_rwb <= '0; - i_data <= '1; - @(posedge i_clk); - i_data <= _data; - @(negedge i_clk); - i_cs <= '0; - i_rwb <= '1; -endtask - -task read_reg(input logic [2:0] _addr, output logic [7:0] _data); - @(negedge i_clk); - i_cs <= '1; - i_addr <= _addr; - i_rwb <= '1; - i_data <= '1; - @(posedge i_clk); - _data <= o_data; - @(negedge i_clk); - i_cs <= '0; - i_rwb <= '1; -endtask - -initial -begin - $dumpfile("spi_controller.vcd"); - $dumpvars(0,sim); -end - -logic [7:0] data; - -initial begin - i_rst <= '1; - repeat(5) @(posedge i_clk); - i_cs <= '0; - i_rwb <= '1; - i_addr <= '0; - i_rst <= '0; - - repeat(5) @(posedge i_clk); - - write_reg(3, 1); - write_reg(2, 8'hFF); - data = (1 << 7); - while(data & (1 << 7)) begin - read_reg(3, data); - end - write_reg(3, 0); - read_reg(1, data); - assert(data == 8'h55); - - repeat(50) @(posedge i_clk); - - $finish(); -end - - -logic [7:0] _spi_device_data; - -initial begin - _spi_device_data <= 8'h55; -end - -always @(edge o_spi_clk) begin - if (o_spi_cs == '0) begin - if (o_spi_clk == '1) - i_spi_miso <= _spi_device_data[7]; - if (o_spi_clk == '0) - _spi_device_data <= _spi_device_data << 1; - end -end - -endmodule diff --git a/hw/efinix_fpga/simulation/generic_sdr.v b/hw/efinix_fpga/simulation/src/generic_sdr.v similarity index 97% rename from hw/efinix_fpga/simulation/generic_sdr.v rename to hw/efinix_fpga/simulation/src/generic_sdr.v index 912d519..06eaa99 100644 --- a/hw/efinix_fpga/simulation/generic_sdr.v +++ b/hw/efinix_fpga/simulation/src/generic_sdr.v @@ -49,7 +49,7 @@ module generic_sdr (Dq, Addr, Ba, Clk, Cke, Cs_n, Ras_n, Cas_n, We_n, Dqm); -`include "super6502_sdram_controller_define.vh" +`include "include/super6502_sdram_controller_define.vh" parameter tCK = 1000/fCK_MHz; // tCK ns Nominal Clock Cycle Time `ifdef CLK_200 diff --git a/hw/efinix_fpga/simulation/sim_top.sv b/hw/efinix_fpga/simulation/src/sim_top.sv similarity index 68% rename from hw/efinix_fpga/simulation/sim_top.sv rename to hw/efinix_fpga/simulation/src/sim_top.sv index 81ceefc..c20fa0f 100644 --- a/hw/efinix_fpga/simulation/sim_top.sv +++ b/hw/efinix_fpga/simulation/src/sim_top.sv @@ -6,7 +6,5 @@ cpu_65c02 u_cpu(); //TODO: also this super6502 u_dut(); -//TODO: decide what to do here -memory u_mem(); endmodule \ No newline at end of file diff --git a/hw/efinix_fpga/simulation/src/verilog-6502/ALU.v b/hw/efinix_fpga/simulation/src/verilog-6502/ALU.v new file mode 100755 index 0000000..8d05fc0 --- /dev/null +++ b/hw/efinix_fpga/simulation/src/verilog-6502/ALU.v @@ -0,0 +1,108 @@ +/* + * ALU. + * + * AI and BI are 8 bit inputs. Result in OUT. + * CI is Carry In. + * CO is Carry Out. + * + * op[3:0] is defined as follows: + * + * 0011 AI + BI + * 0111 AI - BI + * 1011 AI + AI + * 1100 AI | BI + * 1101 AI & BI + * 1110 AI ^ BI + * 1111 AI + * + */ + +module ALU( clk, op, right, AI, BI, CI, CO, BCD, OUT, V, Z, N, HC, RDY ); + input clk; + input right; + input [3:0] op; // operation + input [7:0] AI; + input [7:0] BI; + input CI; + input BCD; // BCD style carry + output [7:0] OUT; + output CO; + output V; + output Z; + output N; + output HC; + input RDY; + +reg [7:0] OUT; +reg CO; +wire V; +wire Z; +reg N; +reg HC; + +reg AI7; +reg BI7; +reg [8:0] temp_logic; +reg [7:0] temp_BI; +reg [4:0] temp_l; +reg [4:0] temp_h; +wire [8:0] temp = { temp_h, temp_l[3:0] }; +wire adder_CI = (right | (op[3:2] == 2'b11)) ? 0 : CI; + +// calculate the logic operations. The 'case' can be done in 1 LUT per +// bit. The 'right' shift is a simple mux that can be implemented by +// F5MUX. +always @* begin + case( op[1:0] ) + 2'b00: temp_logic = AI | BI; + 2'b01: temp_logic = AI & BI; + 2'b10: temp_logic = AI ^ BI; + 2'b11: temp_logic = AI; + endcase + + if( right ) + temp_logic = { AI[0], CI, AI[7:1] }; +end + +// Add logic result to BI input. This only makes sense when logic = AI. +// This stage can be done in 1 LUT per bit, using carry chain logic. +always @* begin + case( op[3:2] ) + 2'b00: temp_BI = BI; // A+B + 2'b01: temp_BI = ~BI; // A-B + 2'b10: temp_BI = temp_logic; // A+A + 2'b11: temp_BI = 0; // A+0 + endcase +end + +// HC9 is the half carry bit when doing BCD add +wire HC9 = BCD & (temp_l[3:1] >= 3'd5); + +// CO9 is the carry-out bit when doing BCD add +wire CO9 = BCD & (temp_h[3:1] >= 3'd5); + +// combined half carry bit +wire temp_HC = temp_l[4] | HC9; + +// perform the addition as 2 separate nibble, so we get +// access to the half carry flag +always @* begin + temp_l = temp_logic[3:0] + temp_BI[3:0] + adder_CI; + temp_h = temp_logic[8:4] + temp_BI[7:4] + temp_HC; +end + +// calculate the flags +always @(posedge clk) + if( RDY ) begin + AI7 <= AI[7]; + BI7 <= temp_BI[7]; + OUT <= temp[7:0]; + CO <= temp[8] | CO9; + N <= temp[7]; + HC <= temp_HC; + end + +assign V = AI7 ^ BI7 ^ CO ^ N; +assign Z = ~|OUT; + +endmodule diff --git a/hw/efinix_fpga/simulation/src/verilog-6502/README.md b/hw/efinix_fpga/simulation/src/verilog-6502/README.md new file mode 100644 index 0000000..42e6ad6 --- /dev/null +++ b/hw/efinix_fpga/simulation/src/verilog-6502/README.md @@ -0,0 +1,69 @@ +======================================================== +A Verilog HDL version of the old MOS 6502 and 65C02 CPUs +======================================================== + +Original 6502 core by Arlet Ottens + +65C02 extensions by David Banks and Ed Spittles + +========== +6502 Core +========== + +Arlet's original 6502 core (cpu.v) is unchanged. + +Note: the 6502/65C02 cores assumes a synchronous memory. This means +that valid data (DI) is expected on the cycle *after* valid +address. This allows direct connection to (Xilinx) block RAMs. When +using asynchronous memory, I suggest registering the address/control +lines for glitchless output signals. + +[Also check out my new 65C02 project](https://github.com/Arlet/verilog-65c02) + +Have fun. + +========== +65C02 Core +========== + +A second core (cpu_65c02.v) has been added, based on Arlet's 6502 +core, with additional 65C02 instructions and addressing modes: +- PHX, PHY, PLX, PLY +- BRA +- INC A, DEC A +- (zp) addressing mode +- STZ +- BIT zpx, absx, imm +- TSB/TRB +- JMP (,X) +- NOPs (optional) +- 65C02 BCD N/Z flags (optional, disabled) + +The Rockwell/WDC specific instructions (RMB/SMB/BBR/BBS/WAI/STP) are +not currently implemented + +The 65C02 core passes the Dormann 6502 test suite, and also passes the +Dormann 65C02 test suite if the optional support for NOPs and 65C02 +BCD flags is enabled. + +It has been tested as a BBC Micro "Matchbox" 65C02 Co Processor, in a +XC6SLX9-2 FPGA, running at 80MHz using 64KB of internel block RAM. It +just meets timing at 80MHz in this environment. It successfully runs +BBC Basic IV and Tube Elite. + +============ +Known Issues +============ + +The Matchbox Co Processor needed one wait state (via RDY) to be added +to each ROM access (only needed early in the boot process, as +eventually everything runs from RAM). The DIHOLD logic did not work +correctly with a single wait state, and so has been commented out. + +I now believe the correct fix is actually just: + +always @(posedge clk ) + if( RDY ) + DIHOLD <= DI; + +assign DIMUX = ~RDY ? DIHOLD : DI; diff --git a/hw/efinix_fpga/simulation/src/verilog-6502/cpu.v b/hw/efinix_fpga/simulation/src/verilog-6502/cpu.v new file mode 100644 index 0000000..ed8da62 --- /dev/null +++ b/hw/efinix_fpga/simulation/src/verilog-6502/cpu.v @@ -0,0 +1,1220 @@ +/* + * verilog model of 6502 CPU. + * + * (C) Arlet Ottens, + * + * Feel free to use this code in any project (commercial or not), as long as you + * keep this message, and the copyright notice. This code is provided "as is", + * without any warranties of any kind. + * + */ + +/* + * Note that not all 6502 interface signals are supported (yet). The goal + * is to create an Acorn Atom model, and the Atom didn't use all signals on + * the main board. + * + * The data bus is implemented as separate read/write buses. Combine them + * on the output pads if external memory is required. + */ + +module cpu( clk, reset, AB, DI, DO, WE, IRQ, NMI, RDY ); + +input clk; // CPU clock +input reset; // reset signal +output reg [15:0] AB; // address bus +input [7:0] DI; // data in, read bus +output [7:0] DO; // data out, write bus +output WE; // write enable +input IRQ; // interrupt request +input NMI; // non-maskable interrupt request +input RDY; // Ready signal. Pauses CPU when RDY=0 + +/* + * internal signals + */ + +reg [15:0] PC; // Program Counter +reg [7:0] ABL; // Address Bus Register LSB +reg [7:0] ABH; // Address Bus Register MSB +wire [7:0] ADD; // Adder Hold Register (registered in ALU) + +reg [7:0] DIHOLD; // Hold for Data In +reg DIHOLD_valid; // +wire [7:0] DIMUX; // + +reg [7:0] IRHOLD; // Hold for Instruction register +reg IRHOLD_valid; // Valid instruction in IRHOLD + +reg [7:0] AXYS[3:0]; // A, X, Y and S register file + +reg C = 0; // carry flag (init at zero to avoid X's in ALU sim) +reg Z = 0; // zero flag +reg I = 0; // interrupt flag +reg D = 0; // decimal flag +reg V = 0; // overflow flag +reg N = 0; // negative flag +wire AZ; // ALU Zero flag +wire AV; // ALU overflow flag +wire AN; // ALU negative flag +wire HC; // ALU half carry + +reg [7:0] AI; // ALU Input A +reg [7:0] BI; // ALU Input B +wire [7:0] DI; // Data In +wire [7:0] IR; // Instruction register +reg [7:0] DO; // Data Out +reg WE; // Write Enable +reg CI; // Carry In +wire CO; // Carry Out +wire [7:0] PCH = PC[15:8]; +wire [7:0] PCL = PC[7:0]; + +reg NMI_edge = 0; // captured NMI edge + +reg [1:0] regsel; // Select A, X, Y or S register +wire [7:0] regfile = AXYS[regsel]; // Selected register output + +parameter + SEL_A = 2'd0, + SEL_S = 2'd1, + SEL_X = 2'd2, + SEL_Y = 2'd3; + +/* + * define some signals for watching in simulator output + */ + + +`ifdef SIM +wire [7:0] A = AXYS[SEL_A]; // Accumulator +wire [7:0] X = AXYS[SEL_X]; // X register +wire [7:0] Y = AXYS[SEL_Y]; // Y register +wire [7:0] S = AXYS[SEL_S]; // Stack pointer +`endif + +wire [7:0] P = { N, V, 2'b11, D, I, Z, C }; + +/* + * instruction decoder/sequencer + */ + +reg [5:0] state; + +/* + * control signals + */ + +reg PC_inc; // Increment PC +reg [15:0] PC_temp; // intermediate value of PC + +reg [1:0] src_reg; // source register index +reg [1:0] dst_reg; // destination register index + +reg index_y; // if set, then Y is index reg rather than X +reg load_reg; // loading a register (A, X, Y, S) in this instruction +reg inc; // increment +reg write_back; // set if memory is read/modified/written +reg load_only; // LDA/LDX/LDY instruction +reg store; // doing store (STA/STX/STY) +reg adc_sbc; // doing ADC/SBC +reg compare; // doing CMP/CPY/CPX +reg shift; // doing shift/rotate instruction +reg rotate; // doing rotate (no shift) +reg backwards; // backwards branch +reg cond_true; // branch condition is true +reg [2:0] cond_code; // condition code bits from instruction +reg shift_right; // Instruction ALU shift/rotate right +reg alu_shift_right; // Current cycle shift right enable +reg [3:0] op; // Main ALU operation for instruction +reg [3:0] alu_op; // Current cycle ALU operation +reg adc_bcd; // ALU should do BCD style carry +reg adj_bcd; // results should be BCD adjusted + +/* + * some flip flops to remember we're doing special instructions. These + * get loaded at the DECODE state, and used later + */ +reg bit_ins; // doing BIT instruction +reg plp; // doing PLP instruction +reg php; // doing PHP instruction +reg clc; // clear carry +reg sec; // set carry +reg cld; // clear decimal +reg sed; // set decimal +reg cli; // clear interrupt +reg sei; // set interrupt +reg clv; // clear overflow +reg brk; // doing BRK + +reg res; // in reset + +/* + * ALU operations + */ + +parameter + OP_OR = 4'b1100, + OP_AND = 4'b1101, + OP_EOR = 4'b1110, + OP_ADD = 4'b0011, + OP_SUB = 4'b0111, + OP_ROL = 4'b1011, + OP_A = 4'b1111; + +/* + * Microcode state machine. Basically, every addressing mode has its own + * path through the state machine. Additional information, such as the + * operation, source and destination registers are decoded in parallel, and + * kept in separate flops. + */ + +parameter + ABS0 = 6'd0, // ABS - fetch LSB + ABS1 = 6'd1, // ABS - fetch MSB + ABSX0 = 6'd2, // ABS, X - fetch LSB and send to ALU (+X) + ABSX1 = 6'd3, // ABS, X - fetch MSB and send to ALU (+Carry) + ABSX2 = 6'd4, // ABS, X - Wait for ALU (only if needed) + BRA0 = 6'd5, // Branch - fetch offset and send to ALU (+PC[7:0]) + BRA1 = 6'd6, // Branch - fetch opcode, and send PC[15:8] to ALU + BRA2 = 6'd7, // Branch - fetch opcode (if page boundary crossed) + BRK0 = 6'd8, // BRK/IRQ - push PCH, send S to ALU (-1) + BRK1 = 6'd9, // BRK/IRQ - push PCL, send S to ALU (-1) + BRK2 = 6'd10, // BRK/IRQ - push P, send S to ALU (-1) + BRK3 = 6'd11, // BRK/IRQ - write S, and fetch @ fffe + DECODE = 6'd12, // IR is valid, decode instruction, and write prev reg + FETCH = 6'd13, // fetch next opcode, and perform prev ALU op + INDX0 = 6'd14, // (ZP,X) - fetch ZP address, and send to ALU (+X) + INDX1 = 6'd15, // (ZP,X) - fetch LSB at ZP+X, calculate ZP+X+1 + INDX2 = 6'd16, // (ZP,X) - fetch MSB at ZP+X+1 + INDX3 = 6'd17, // (ZP,X) - fetch data + INDY0 = 6'd18, // (ZP),Y - fetch ZP address, and send ZP to ALU (+1) + INDY1 = 6'd19, // (ZP),Y - fetch at ZP+1, and send LSB to ALU (+Y) + INDY2 = 6'd20, // (ZP),Y - fetch data, and send MSB to ALU (+Carry) + INDY3 = 6'd21, // (ZP),Y) - fetch data (if page boundary crossed) + JMP0 = 6'd22, // JMP - fetch PCL and hold + JMP1 = 6'd23, // JMP - fetch PCH + JMPI0 = 6'd24, // JMP IND - fetch LSB and send to ALU for delay (+0) + JMPI1 = 6'd25, // JMP IND - fetch MSB, proceed with JMP0 state + JSR0 = 6'd26, // JSR - push PCH, save LSB, send S to ALU (-1) + JSR1 = 6'd27, // JSR - push PCL, send S to ALU (-1) + JSR2 = 6'd28, // JSR - write S + JSR3 = 6'd29, // JSR - fetch MSB + PULL0 = 6'd30, // PLP/PLA - save next op in IRHOLD, send S to ALU (+1) + PULL1 = 6'd31, // PLP/PLA - fetch data from stack, write S + PULL2 = 6'd32, // PLP/PLA - prefetch op, but don't increment PC + PUSH0 = 6'd33, // PHP/PHA - send A to ALU (+0) + PUSH1 = 6'd34, // PHP/PHA - write A/P, send S to ALU (-1) + READ = 6'd35, // Read memory for read/modify/write (INC, DEC, shift) + REG = 6'd36, // Read register for reg-reg transfers + RTI0 = 6'd37, // RTI - send S to ALU (+1) + RTI1 = 6'd38, // RTI - read P from stack + RTI2 = 6'd39, // RTI - read PCL from stack + RTI3 = 6'd40, // RTI - read PCH from stack + RTI4 = 6'd41, // RTI - read PCH from stack + RTS0 = 6'd42, // RTS - send S to ALU (+1) + RTS1 = 6'd43, // RTS - read PCL from stack + RTS2 = 6'd44, // RTS - write PCL to ALU, read PCH + RTS3 = 6'd45, // RTS - load PC and increment + WRITE = 6'd46, // Write memory for read/modify/write + ZP0 = 6'd47, // Z-page - fetch ZP address + ZPX0 = 6'd48, // ZP, X - fetch ZP, and send to ALU (+X) + ZPX1 = 6'd49; // ZP, X - load from memory + +`ifdef SIM + +/* + * easy to read names in simulator output + */ +reg [8*6-1:0] statename; + +always @* + case( state ) + DECODE: statename = "DECODE"; + REG: statename = "REG"; + ZP0: statename = "ZP0"; + ZPX0: statename = "ZPX0"; + ZPX1: statename = "ZPX1"; + ABS0: statename = "ABS0"; + ABS1: statename = "ABS1"; + ABSX0: statename = "ABSX0"; + ABSX1: statename = "ABSX1"; + ABSX2: statename = "ABSX2"; + INDX0: statename = "INDX0"; + INDX1: statename = "INDX1"; + INDX2: statename = "INDX2"; + INDX3: statename = "INDX3"; + INDY0: statename = "INDY0"; + INDY1: statename = "INDY1"; + INDY2: statename = "INDY2"; + INDY3: statename = "INDY3"; + READ: statename = "READ"; + WRITE: statename = "WRITE"; + FETCH: statename = "FETCH"; + PUSH0: statename = "PUSH0"; + PUSH1: statename = "PUSH1"; + PULL0: statename = "PULL0"; + PULL1: statename = "PULL1"; + PULL2: statename = "PULL2"; + JSR0: statename = "JSR0"; + JSR1: statename = "JSR1"; + JSR2: statename = "JSR2"; + JSR3: statename = "JSR3"; + RTI0: statename = "RTI0"; + RTI1: statename = "RTI1"; + RTI2: statename = "RTI2"; + RTI3: statename = "RTI3"; + RTI4: statename = "RTI4"; + RTS0: statename = "RTS0"; + RTS1: statename = "RTS1"; + RTS2: statename = "RTS2"; + RTS3: statename = "RTS3"; + BRK0: statename = "BRK0"; + BRK1: statename = "BRK1"; + BRK2: statename = "BRK2"; + BRK3: statename = "BRK3"; + BRA0: statename = "BRA0"; + BRA1: statename = "BRA1"; + BRA2: statename = "BRA2"; + JMP0: statename = "JMP0"; + JMP1: statename = "JMP1"; + JMPI0: statename = "JMPI0"; + JMPI1: statename = "JMPI1"; + endcase + +//always @( PC ) +// $display( "%t, PC:%04x IR:%02x A:%02x X:%02x Y:%02x S:%02x C:%d Z:%d V:%d N:%d P:%02x", $time, PC, IR, A, X, Y, S, C, Z, V, N, P ); + +`endif + + + +/* + * Program Counter Increment/Load. First calculate the base value in + * PC_temp. + */ +always @* + case( state ) + DECODE: if( (~I & IRQ) | NMI_edge ) + PC_temp = { ABH, ABL }; + else + PC_temp = PC; + + + JMP1, + JMPI1, + JSR3, + RTS3, + RTI4: PC_temp = { DIMUX, ADD }; + + BRA1: PC_temp = { ABH, ADD }; + + BRA2: PC_temp = { ADD, PCL }; + + BRK2: PC_temp = res ? 16'hfffc : + NMI_edge ? 16'hfffa : 16'hfffe; + + default: PC_temp = PC; + endcase + +/* + * Determine wether we need PC_temp, or PC_temp + 1 + */ +always @* + case( state ) + DECODE: if( (~I & IRQ) | NMI_edge ) + PC_inc = 0; + else + PC_inc = 1; + + ABS0, + ABSX0, + FETCH, + BRA0, + BRA2, + BRK3, + JMPI1, + JMP1, + RTI4, + RTS3: PC_inc = 1; + + BRA1: PC_inc = CO ^~ backwards; + + default: PC_inc = 0; + endcase + +/* + * Set new PC + */ +always @(posedge clk) + if( RDY ) + PC <= PC_temp + PC_inc; + +/* + * Address Generator + */ + +parameter + ZEROPAGE = 8'h00, + STACKPAGE = 8'h01; + +always @* + case( state ) + ABSX1, + INDX3, + INDY2, + JMP1, + JMPI1, + RTI4, + ABS1: AB = { DIMUX, ADD }; + + BRA2, + INDY3, + ABSX2: AB = { ADD, ABL }; + + BRA1: AB = { ABH, ADD }; + + JSR0, + PUSH1, + RTS0, + RTI0, + BRK0: AB = { STACKPAGE, regfile }; + + BRK1, + JSR1, + PULL1, + RTS1, + RTS2, + RTI1, + RTI2, + RTI3, + BRK2: AB = { STACKPAGE, ADD }; + + INDY1, + INDX1, + ZPX1, + INDX2: AB = { ZEROPAGE, ADD }; + + ZP0, + INDY0: AB = { ZEROPAGE, DIMUX }; + + REG, + READ, + WRITE: AB = { ABH, ABL }; + + default: AB = PC; + endcase + +/* + * ABH/ABL pair is used for registering previous address bus state. + * This can be used to keep the current address, freeing up the original + * source of the address, such as the ALU or DI. + */ +always @(posedge clk) + if( state != PUSH0 && state != PUSH1 && RDY && + state != PULL0 && state != PULL1 && state != PULL2 ) + begin + ABL <= AB[7:0]; + ABH <= AB[15:8]; + end + +/* + * Data Out MUX + */ +always @* + case( state ) + WRITE: DO = ADD; + + JSR0, + BRK0: DO = PCH; + + JSR1, + BRK1: DO = PCL; + + PUSH1: DO = php ? P : ADD; + + BRK2: DO = (IRQ | NMI_edge) ? (P & 8'b1110_1111) : P; + + default: DO = regfile; + endcase + +/* + * Write Enable Generator + */ + +always @* + case( state ) + BRK0, // writing to stack or memory + BRK1, + BRK2, + JSR0, + JSR1, + PUSH1, + WRITE: WE = 1; + + INDX3, // only if doing a STA, STX or STY + INDY3, + ABSX2, + ABS1, + ZPX1, + ZP0: WE = store; + + default: WE = 0; + endcase + +/* + * register file, contains A, X, Y and S (stack pointer) registers. At each + * cycle only 1 of those registers needs to be accessed, so they combined + * in a small memory, saving resources. + */ + +reg write_register; // set when register file is written + +always @* + case( state ) + DECODE: write_register = load_reg & ~plp; + + PULL1, + RTS2, + RTI3, + BRK3, + JSR0, + JSR2 : write_register = 1; + + default: write_register = 0; + endcase + +/* + * BCD adjust logic + */ + +always @(posedge clk) + adj_bcd <= adc_sbc & D; // '1' when doing a BCD instruction + +reg [3:0] ADJL; +reg [3:0] ADJH; + +// adjustment term to be added to ADD[3:0] based on the following +// adj_bcd: '1' if doing ADC/SBC with D=1 +// adc_bcd: '1' if doing ADC with D=1 +// HC : half carry bit from ALU +always @* begin + casex( {adj_bcd, adc_bcd, HC} ) + 3'b0xx: ADJL = 4'd0; // no BCD instruction + 3'b100: ADJL = 4'd10; // SBC, and digital borrow + 3'b101: ADJL = 4'd0; // SBC, but no borrow + 3'b110: ADJL = 4'd0; // ADC, but no carry + 3'b111: ADJL = 4'd6; // ADC, and decimal/digital carry + endcase +end + +// adjustment term to be added to ADD[7:4] based on the following +// adj_bcd: '1' if doing ADC/SBC with D=1 +// adc_bcd: '1' if doing ADC with D=1 +// CO : carry out bit from ALU +always @* begin + casex( {adj_bcd, adc_bcd, CO} ) + 3'b0xx: ADJH = 4'd0; // no BCD instruction + 3'b100: ADJH = 4'd10; // SBC, and digital borrow + 3'b101: ADJH = 4'd0; // SBC, but no borrow + 3'b110: ADJH = 4'd0; // ADC, but no carry + 3'b111: ADJH = 4'd6; // ADC, and decimal/digital carry + endcase +end + +/* + * write to a register. Usually this is the (BCD corrected) output of the + * ALU, but in case of the JSR0 we use the S register to temporarily store + * the PCL. This is possible, because the S register itself is stored in + * the ALU during those cycles. + */ +always @(posedge clk) + if( write_register & RDY ) + AXYS[regsel] <= (state == JSR0) ? DIMUX : { ADD[7:4] + ADJH, ADD[3:0] + ADJL }; + +/* + * register select logic. This determines which of the A, X, Y or + * S registers will be accessed. + */ + +always @* + case( state ) + INDY1, + INDX0, + ZPX0, + ABSX0 : regsel = index_y ? SEL_Y : SEL_X; + + + DECODE : regsel = dst_reg; + + BRK0, + BRK3, + JSR0, + JSR2, + PULL0, + PULL1, + PUSH1, + RTI0, + RTI3, + RTS0, + RTS2 : regsel = SEL_S; + + default: regsel = src_reg; + endcase + +/* + * ALU + */ + +ALU ALU( .clk(clk), + .op(alu_op), + .right(alu_shift_right), + .AI(AI), + .BI(BI), + .CI(CI), + .BCD(adc_bcd & (state == FETCH)), + .CO(CO), + .OUT(ADD), + .V(AV), + .Z(AZ), + .N(AN), + .HC(HC), + .RDY(RDY) ); + +/* + * Select current ALU operation + */ + +always @* + case( state ) + READ: alu_op = op; + + BRA1: alu_op = backwards ? OP_SUB : OP_ADD; + + FETCH, + REG : alu_op = op; + + DECODE, + ABS1: alu_op = 1'bx; + + PUSH1, + BRK0, + BRK1, + BRK2, + JSR0, + JSR1: alu_op = OP_SUB; + + default: alu_op = OP_ADD; + endcase + +/* + * Determine shift right signal to ALU + */ + +always @* + if( state == FETCH || state == REG || state == READ ) + alu_shift_right = shift_right; + else + alu_shift_right = 0; + +/* + * Sign extend branch offset. + */ + +always @(posedge clk) + if( RDY ) + backwards <= DIMUX[7]; + +/* + * ALU A Input MUX + */ + +always @* + case( state ) + JSR1, + RTS1, + RTI1, + RTI2, + BRK1, + BRK2, + INDX1: AI = ADD; + + REG, + ZPX0, + INDX0, + ABSX0, + RTI0, + RTS0, + JSR0, + JSR2, + BRK0, + PULL0, + INDY1, + PUSH0, + PUSH1: AI = regfile; + + BRA0, + READ: AI = DIMUX; + + BRA1: AI = ABH; // don't use PCH in case we're + + FETCH: AI = load_only ? 0 : regfile; + + DECODE, + ABS1: AI = 8'hxx; // don't care + + default: AI = 0; + endcase + + +/* + * ALU B Input mux + */ + +always @* + case( state ) + BRA1, + RTS1, + RTI0, + RTI1, + RTI2, + INDX1, + READ, + REG, + JSR0, + JSR1, + JSR2, + BRK0, + BRK1, + BRK2, + PUSH0, + PUSH1, + PULL0, + RTS0: BI = 8'h00; + + BRA0: BI = PCL; + + DECODE, + ABS1: BI = 8'hxx; + + default: BI = DIMUX; + endcase + +/* + * ALU CI (carry in) mux + */ + +always @* + case( state ) + INDY2, + BRA1, + ABSX1: CI = CO; + + DECODE, + ABS1: CI = 1'bx; + + READ, + REG: CI = rotate ? C : + shift ? 0 : inc; + + FETCH: CI = rotate ? C : + compare ? 1 : + (shift | load_only) ? 0 : C; + + PULL0, + RTI0, + RTI1, + RTI2, + RTS0, + RTS1, + INDY0, + INDX1: CI = 1; + + default: CI = 0; + endcase + +/* + * Processor Status Register update + * + */ + +/* + * Update C flag when doing ADC/SBC, shift/rotate, compare + */ +always @(posedge clk ) + if( shift && state == WRITE ) + C <= CO; + else if( state == RTI2 ) + C <= DIMUX[0]; + else if( ~write_back && state == DECODE ) begin + if( adc_sbc | shift | compare ) + C <= CO; + else if( plp ) + C <= ADD[0]; + else begin + if( sec ) C <= 1; + if( clc ) C <= 0; + end + end + +/* + * Update Z, N flags when writing A, X, Y, Memory, or when doing compare + */ + +always @(posedge clk) + if( state == WRITE ) + Z <= AZ; + else if( state == RTI2 ) + Z <= DIMUX[1]; + else if( state == DECODE ) begin + if( plp ) + Z <= ADD[1]; + else if( (load_reg & (regsel != SEL_S)) | compare | bit_ins ) + Z <= AZ; + end + +always @(posedge clk) + if( state == WRITE ) + N <= AN; + else if( state == RTI2 ) + N <= DIMUX[7]; + else if( state == DECODE ) begin + if( plp ) + N <= ADD[7]; + else if( (load_reg & (regsel != SEL_S)) | compare ) + N <= AN; + end else if( state == FETCH && bit_ins ) + N <= DIMUX[7]; + +/* + * Update I flag + */ + +always @(posedge clk) + if( state == BRK3 ) + I <= 1; + else if( state == RTI2 ) + I <= DIMUX[2]; + else if( state == REG ) begin + if( sei ) I <= 1; + if( cli ) I <= 0; + end else if( state == DECODE ) + if( plp ) I <= ADD[2]; + +/* + * Update D flag + */ +always @(posedge clk ) + if( state == RTI2 ) + D <= DIMUX[3]; + else if( state == DECODE ) begin + if( sed ) D <= 1; + if( cld ) D <= 0; + if( plp ) D <= ADD[3]; + end + +/* + * Update V flag + */ +always @(posedge clk ) + if( state == RTI2 ) + V <= DIMUX[6]; + else if( state == DECODE ) begin + if( adc_sbc ) V <= AV; + if( clv ) V <= 0; + if( plp ) V <= ADD[6]; + end else if( state == FETCH && bit_ins ) + V <= DIMUX[6]; + +/* + * Instruction decoder + */ + +/* + * IR register/mux. Hold previous DI value in IRHOLD in PULL0 and PUSH0 + * states. In these states, the IR has been prefetched, and there is no + * time to read the IR again before the next decode. + */ + +always @(posedge clk ) + if( reset ) + IRHOLD_valid <= 0; + else if( RDY ) begin + if( state == PULL0 || state == PUSH0 ) begin + IRHOLD <= DIMUX; + IRHOLD_valid <= 1; + end else if( state == DECODE ) + IRHOLD_valid <= 0; + end + +assign IR = (IRQ & ~I) | NMI_edge ? 8'h00 : + IRHOLD_valid ? IRHOLD : DIMUX; + +always @(posedge clk ) + if( RDY ) + DIHOLD <= DI; + +assign DIMUX = ~RDY ? DIHOLD : DI; + +/* + * Microcode state machine + */ +always @(posedge clk or posedge reset) + if( reset ) + state <= BRK0; + else if( RDY ) case( state ) + DECODE : + casex ( IR ) + 8'b0000_0000: state <= BRK0; + 8'b0010_0000: state <= JSR0; + 8'b0010_1100: state <= ABS0; // BIT abs + 8'b0100_0000: state <= RTI0; // + 8'b0100_1100: state <= JMP0; + 8'b0110_0000: state <= RTS0; + 8'b0110_1100: state <= JMPI0; + 8'b0x00_1000: state <= PUSH0; + 8'b0x10_1000: state <= PULL0; + 8'b0xx1_1000: state <= REG; // CLC, SEC, CLI, SEI + 8'b1xx0_00x0: state <= FETCH; // IMM + 8'b1xx0_1100: state <= ABS0; // X/Y abs + 8'b1xxx_1000: state <= REG; // DEY, TYA, ... + 8'bxxx0_0001: state <= INDX0; + 8'bxxx0_01xx: state <= ZP0; + 8'bxxx0_1001: state <= FETCH; // IMM + 8'bxxx0_1101: state <= ABS0; // even E column + 8'bxxx0_1110: state <= ABS0; // even E column + 8'bxxx1_0000: state <= BRA0; // odd 0 column + 8'bxxx1_0001: state <= INDY0; // odd 1 column + 8'bxxx1_01xx: state <= ZPX0; // odd 4,5,6,7 columns + 8'bxxx1_1001: state <= ABSX0; // odd 9 column + 8'bxxx1_11xx: state <= ABSX0; // odd C, D, E, F columns + 8'bxxxx_1010: state <= REG; // A, TXA, ... NOP + endcase + + ZP0 : state <= write_back ? READ : FETCH; + + ZPX0 : state <= ZPX1; + ZPX1 : state <= write_back ? READ : FETCH; + + ABS0 : state <= ABS1; + ABS1 : state <= write_back ? READ : FETCH; + + ABSX0 : state <= ABSX1; + ABSX1 : state <= (CO | store | write_back) ? ABSX2 : FETCH; + ABSX2 : state <= write_back ? READ : FETCH; + + INDX0 : state <= INDX1; + INDX1 : state <= INDX2; + INDX2 : state <= INDX3; + INDX3 : state <= FETCH; + + INDY0 : state <= INDY1; + INDY1 : state <= INDY2; + INDY2 : state <= (CO | store) ? INDY3 : FETCH; + INDY3 : state <= FETCH; + + READ : state <= WRITE; + WRITE : state <= FETCH; + FETCH : state <= DECODE; + + REG : state <= DECODE; + + PUSH0 : state <= PUSH1; + PUSH1 : state <= DECODE; + + PULL0 : state <= PULL1; + PULL1 : state <= PULL2; + PULL2 : state <= DECODE; + + JSR0 : state <= JSR1; + JSR1 : state <= JSR2; + JSR2 : state <= JSR3; + JSR3 : state <= FETCH; + + RTI0 : state <= RTI1; + RTI1 : state <= RTI2; + RTI2 : state <= RTI3; + RTI3 : state <= RTI4; + RTI4 : state <= DECODE; + + RTS0 : state <= RTS1; + RTS1 : state <= RTS2; + RTS2 : state <= RTS3; + RTS3 : state <= FETCH; + + BRA0 : state <= cond_true ? BRA1 : DECODE; + BRA1 : state <= (CO ^ backwards) ? BRA2 : DECODE; + BRA2 : state <= DECODE; + + JMP0 : state <= JMP1; + JMP1 : state <= DECODE; + + JMPI0 : state <= JMPI1; + JMPI1 : state <= JMP0; + + BRK0 : state <= BRK1; + BRK1 : state <= BRK2; + BRK2 : state <= BRK3; + BRK3 : state <= JMP0; + + endcase + +/* + * Additional control signals + */ + +always @(posedge clk) + if( reset ) + res <= 1; + else if( state == DECODE ) + res <= 0; + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0xx01010, // ASLA, ROLA, LSRA, RORA + 8'b0xxxxx01, // ORA, AND, EOR, ADC + 8'b100x10x0, // DEY, TYA, TXA, TXS + 8'b1010xxx0, // LDA/LDX/LDY + 8'b10111010, // TSX + 8'b1011x1x0, // LDX/LDY + 8'b11001010, // DEX + 8'b1x1xxx01, // LDA, SBC + 8'bxxx01000: // DEY, TAY, INY, INX + load_reg <= 1; + + default: load_reg <= 0; + endcase + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'b1110_1000, // INX + 8'b1100_1010, // DEX + 8'b101x_xx10: // LDX, TAX, TSX + dst_reg <= SEL_X; + + 8'b0x00_1000, // PHP, PHA + 8'b1001_1010: // TXS + dst_reg <= SEL_S; + + 8'b1x00_1000, // DEY, DEX + 8'b101x_x100, // LDY + 8'b1010_x000: // LDY #imm, TAY + dst_reg <= SEL_Y; + + default: dst_reg <= SEL_A; + endcase + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'b1011_1010: // TSX + src_reg <= SEL_S; + + 8'b100x_x110, // STX + 8'b100x_1x10, // TXA, TXS + 8'b1110_xx00, // INX, CPX + 8'b1100_1010: // DEX + src_reg <= SEL_X; + + 8'b100x_x100, // STY + 8'b1001_1000, // TYA + 8'b1100_xx00, // CPY + 8'b1x00_1000: // DEY, INY + src_reg <= SEL_Y; + + default: src_reg <= SEL_A; + endcase + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'bxxx1_0001, // INDY + 8'b10x1_x110, // LDX/STX zpg/abs, Y + 8'bxxxx_1001: // abs, Y + index_y <= 1; + + default: index_y <= 0; + endcase + + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'b100x_x1x0, // STX, STY + 8'b100x_xx01: // STA + store <= 1; + + default: store <= 0; + + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0xxx_x110, // ASL, ROL, LSR, ROR + 8'b11xx_x110: // DEC/INC + write_back <= 1; + + default: write_back <= 0; + endcase + + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b101x_xxxx: // LDA, LDX, LDY + load_only <= 1; + default: load_only <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b111x_x110, // INC + 8'b11x0_1000: // INX, INY + inc <= 1; + + default: inc <= 0; + endcase + +always @(posedge clk ) + if( (state == DECODE || state == BRK0) && RDY ) + casex( IR ) + 8'bx11x_xx01: // SBC, ADC + adc_sbc <= 1; + + default: adc_sbc <= 0; + endcase + +always @(posedge clk ) + if( (state == DECODE || state == BRK0) && RDY ) + casex( IR ) + 8'b011x_xx01: // ADC + adc_bcd <= D; + + default: adc_bcd <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0xxx_x110, // ASL, ROL, LSR, ROR (abs, absx, zpg, zpgx) + 8'b0xxx_1010: // ASL, ROL, LSR, ROR (acc) + shift <= 1; + + default: shift <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b11x0_0x00, // CPX, CPY (imm/zp) + 8'b11x0_1100, // CPX, CPY (abs) + 8'b110x_xx01: // CMP + compare <= 1; + + default: compare <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b01xx_xx10: // ROR, LSR + shift_right <= 1; + + default: shift_right <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0x1x_1010, // ROL A, ROR A + 8'b0x1x_x110: // ROR, ROL + rotate <= 1; + + default: rotate <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b00xx_xx10: // ROL, ASL + op <= OP_ROL; + + 8'b0010_x100: // BIT zp/abs + op <= OP_AND; + + 8'b01xx_xx10: // ROR, LSR + op <= OP_A; + + 8'b1000_1000, // DEY + 8'b1100_1010, // DEX + 8'b110x_x110, // DEC + 8'b11xx_xx01, // CMP, SBC + 8'b11x0_0x00, // CPX, CPY (imm, zpg) + 8'b11x0_1100: op <= OP_SUB; + + 8'b010x_xx01, // EOR + 8'b00xx_xx01: // ORA, AND + op <= { 2'b11, IR[6:5] }; + + default: op <= OP_ADD; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0010_x100: // BIT zp/abs + bit_ins <= 1; + + default: bit_ins <= 0; + endcase + +/* + * special instructions + */ +always @(posedge clk ) + if( state == DECODE && RDY ) begin + php <= (IR == 8'h08); + clc <= (IR == 8'h18); + plp <= (IR == 8'h28); + sec <= (IR == 8'h38); + cli <= (IR == 8'h58); + sei <= (IR == 8'h78); + clv <= (IR == 8'hb8); + cld <= (IR == 8'hd8); + sed <= (IR == 8'hf8); + brk <= (IR == 8'h00); + end + +always @(posedge clk) + if( RDY ) + cond_code <= IR[7:5]; + +always @* + case( cond_code ) + 3'b000: cond_true = ~N; + 3'b001: cond_true = N; + 3'b010: cond_true = ~V; + 3'b011: cond_true = V; + 3'b100: cond_true = ~C; + 3'b101: cond_true = C; + 3'b110: cond_true = ~Z; + 3'b111: cond_true = Z; + endcase + + +reg NMI_1 = 0; // delayed NMI signal + +always @(posedge clk) + NMI_1 <= NMI; + +always @(posedge clk ) + if( NMI_edge && state == BRK3 ) + NMI_edge <= 0; + else if( NMI & ~NMI_1 ) + NMI_edge <= 1; + +endmodule diff --git a/hw/efinix_fpga/simulation/src/verilog-6502/cpu_65c02.v b/hw/efinix_fpga/simulation/src/verilog-6502/cpu_65c02.v new file mode 100644 index 0000000..967b7a3 --- /dev/null +++ b/hw/efinix_fpga/simulation/src/verilog-6502/cpu_65c02.v @@ -0,0 +1,1418 @@ +/* + * verilog model of 65C02 CPU. + * + * Based on original 6502 "Arlet 6502 Core" by Arlet Ottens + * + * (C) Arlet Ottens, + * + * Feel free to use this code in any project (commercial or not), as long as you + * keep this message, and the copyright notice. This code is provided "as is", + * without any warranties of any kind. + * + * Support for 65C02 instructions and addressing modes by David Banks and Ed Spittles + * + * (C) 2016 David Banks and Ed Spittles + * + * Feel free to use this code in any project (commercial or not), as long as you + * keep this message, and the copyright notice. This code is provided "as is", + * without any warranties of any kind. + * + */ + +/* + * Note that not all 6502 interface signals are supported (yet). The goal + * is to create an Acorn Atom model, and the Atom didn't use all signals on + * the main board. + * + * The data bus is implemented as separate read/write buses. Combine them + * on the output pads if external memory is required. + */ + +/* + * Two things were needed to correctly implement 65C02 NOPs + * 1. Ensure the microcode state machine uses an appropriate addressing mode for the opcode length + * 2. Ensure there are no side-effects (e.g. register updates, memory stores, etc) + * + * If IMPLEMENT_NOPS is defined, the state machine is modified accordingly. + */ + +`define IMPLEMENT_NOPS + +/* + * Two things were needed to correctly implement 65C02 BCD arithmentic + * 1. The Z flag needs calculating over the BCD adjusted ALU output + * 2. The N flag needs calculating over the BCD adjusted ALU output + * + * If IMPLEMENT_CORRECT_BCD_FLAGS is defined, this additional logic is added + */ + +// `define IMPLEMENT_CORRECT_BCD_FLAGS + +module cpu_65c02( clk, reset, AB, DI, DO, WE, IRQ, NMI, RDY, SYNC ); + +input clk; // CPU clock +input reset; // reset signal +output reg [15:0] AB; // address bus +input [7:0] DI; // data in, read bus +output [7:0] DO; // data out, write bus +output WE; // write enable +input IRQ; // interrupt request +input NMI; // non-maskable interrupt request +input RDY; // Ready signal. Pauses CPU when RDY=0 +output reg SYNC; // AB is first cycle of the intruction + +/* + * internal signals + */ + +reg [15:0] PC; // Program Counter +reg [7:0] ABL; // Address Bus Register LSB +reg [7:0] ABH; // Address Bus Register MSB +wire [7:0] ADD; // Adder Hold Register (registered in ALU) + +reg [7:0] DIHOLD; // Hold for Data In +reg DIHOLD_valid; // +wire [7:0] DIMUX; // + +reg [7:0] IRHOLD; // Hold for Instruction register +reg IRHOLD_valid; // Valid instruction in IRHOLD + +reg [7:0] AXYS[3:0]; // A, X, Y and S register file + +reg C = 0; // carry flag (init at zero to avoid X's in ALU sim) +reg Z = 0; // zero flag +reg I = 0; // interrupt flag +reg D = 0; // decimal flag +reg V = 0; // overflow flag +reg N = 0; // negative flag +wire AZ; // ALU Zero flag +wire AZ1; // ALU Zero flag (BCD adjusted) +reg AZ2; // ALU Second Zero flag, set using TSB/TRB semantics +wire AV; // ALU overflow flag +wire AN; // ALU negative flag +wire AN1; // ALU negative flag (BCD adjusted) +wire HC; // ALU half carry + +reg [7:0] AI; // ALU Input A +reg [7:0] BI; // ALU Input B +wire [7:0] DI; // Data In +wire [7:0] IR; // Instruction register +reg [7:0] DO; // Data Out +wire [7:0] AO; // ALU output after BCD adjustment +reg WE; // Write Enable +reg CI; // Carry In +wire CO; // Carry Out +wire [7:0] PCH = PC[15:8]; +wire [7:0] PCL = PC[7:0]; + +reg NMI_edge = 0; // captured NMI edge + +reg [1:0] regsel; // Select A, X, Y or S register +wire [7:0] regfile = AXYS[regsel]; // Selected register output + +parameter + SEL_A = 2'd0, + SEL_S = 2'd1, + SEL_X = 2'd2, + SEL_Y = 2'd3; + +/* + * define some signals for watching in simulator output + */ + + +`ifdef SIM +wire [7:0] A = AXYS[SEL_A]; // Accumulator +wire [7:0] X = AXYS[SEL_X]; // X register +wire [7:0] Y = AXYS[SEL_Y]; // Y register +wire [7:0] S = AXYS[SEL_S]; // Stack pointer +`endif + +wire [7:0] P = { N, V, 2'b11, D, I, Z, C }; + +/* + * instruction decoder/sequencer + */ + +reg [5:0] state; + +/* + * control signals + */ + +reg PC_inc; // Increment PC +reg [15:0] PC_temp; // intermediate value of PC + +reg [1:0] src_reg; // source register index +reg [1:0] dst_reg; // destination register index + +reg index_y; // if set, then Y is index reg rather than X +reg load_reg; // loading a register (A, X, Y, S) in this instruction +reg inc; // increment +reg write_back; // set if memory is read/modified/written +reg load_only; // LDA/LDX/LDY instruction +reg store; // doing store (STA/STX/STY) +reg adc_sbc; // doing ADC/SBC +reg compare; // doing CMP/CPY/CPX +reg shift; // doing shift/rotate instruction +reg rotate; // doing rotate (no shift) +reg backwards; // backwards branch +reg cond_true; // branch condition is true +reg [3:0] cond_code; // condition code bits from instruction +reg shift_right; // Instruction ALU shift/rotate right +reg alu_shift_right; // Current cycle shift right enable +reg [3:0] op; // Main ALU operation for instruction +reg [3:0] alu_op; // Current cycle ALU operation +reg adc_bcd; // ALU should do BCD style carry +reg adj_bcd; // results should be BCD adjusted + +/* + * some flip flops to remember we're doing special instructions. These + * get loaded at the DECODE state, and used later + */ +reg store_zero; // doing STZ instruction +reg trb_ins; // doing TRB instruction +reg txb_ins; // doing TSB/TRB instruction +reg bit_ins; // doing BIT instruction +reg bit_ins_nv; // doing BIT instruction that will update the n and v flags (i.e. not BIT imm) +reg plp; // doing PLP instruction +reg php; // doing PHP instruction +reg clc; // clear carry +reg sec; // set carry +reg cld; // clear decimal +reg sed; // set decimal +reg cli; // clear interrupt +reg sei; // set interrupt +reg clv; // clear overflow + +reg res; // in reset + +/* + * ALU operations + */ + +parameter + OP_OR = 4'b1100, + OP_AND = 4'b1101, + OP_EOR = 4'b1110, + OP_ADD = 4'b0011, + OP_SUB = 4'b0111, + OP_ROL = 4'b1011, + OP_A = 4'b1111; + +/* + * Microcode state machine. Basically, every addressing mode has its own + * path through the state machine. Additional information, such as the + * operation, source and destination registers are decoded in parallel, and + * kept in separate flops. + */ + +parameter + ABS0 = 6'd0, // ABS - fetch LSB + ABS1 = 6'd1, // ABS - fetch MSB + ABSX0 = 6'd2, // ABS, X - fetch LSB and send to ALU (+X) + ABSX1 = 6'd3, // ABS, X - fetch MSB and send to ALU (+Carry) + ABSX2 = 6'd4, // ABS, X - Wait for ALU (only if needed) + BRA0 = 6'd5, // Branch - fetch offset and send to ALU (+PC[7:0]) + BRA1 = 6'd6, // Branch - fetch opcode, and send PC[15:8] to ALU + BRA2 = 6'd7, // Branch - fetch opcode (if page boundary crossed) + BRK0 = 6'd8, // BRK/IRQ - push PCH, send S to ALU (-1) + BRK1 = 6'd9, // BRK/IRQ - push PCL, send S to ALU (-1) + BRK2 = 6'd10, // BRK/IRQ - push P, send S to ALU (-1) + BRK3 = 6'd11, // BRK/IRQ - write S, and fetch @ fffe + DECODE = 6'd12, // IR is valid, decode instruction, and write prev reg + FETCH = 6'd13, // fetch next opcode, and perform prev ALU op + INDX0 = 6'd14, // (ZP,X) - fetch ZP address, and send to ALU (+X) + INDX1 = 6'd15, // (ZP,X) - fetch LSB at ZP+X, calculate ZP+X+1 + INDX2 = 6'd16, // (ZP,X) - fetch MSB at ZP+X+1 + INDX3 = 6'd17, // (ZP,X) - fetch data + INDY0 = 6'd18, // (ZP),Y - fetch ZP address, and send ZP to ALU (+1) + INDY1 = 6'd19, // (ZP),Y - fetch at ZP+1, and send LSB to ALU (+Y) + INDY2 = 6'd20, // (ZP),Y - fetch data, and send MSB to ALU (+Carry) + INDY3 = 6'd21, // (ZP),Y) - fetch data (if page boundary crossed) + JMP0 = 6'd22, // JMP - fetch PCL and hold + JMP1 = 6'd23, // JMP - fetch PCH + JMPI0 = 6'd24, // JMP IND - fetch LSB and send to ALU for delay (+0) + JMPI1 = 6'd25, // JMP IND - fetch MSB, proceed with JMP0 state + JSR0 = 6'd26, // JSR - push PCH, save LSB, send S to ALU (-1) + JSR1 = 6'd27, // JSR - push PCL, send S to ALU (-1) + JSR2 = 6'd28, // JSR - write S + JSR3 = 6'd29, // JSR - fetch MSB + PULL0 = 6'd30, // PLP/PLA/PLX/PLY - save next op in IRHOLD, send S to ALU (+1) + PULL1 = 6'd31, // PLP/PLA/PLX/PLY - fetch data from stack, write S + PULL2 = 6'd32, // PLP/PLA/PLX/PLY - prefetch op, but don't increment PC + PUSH0 = 6'd33, // PHP/PHA/PHX/PHY - send A to ALU (+0) + PUSH1 = 6'd34, // PHP/PHA/PHX/PHY - write A/P, send S to ALU (-1) + READ = 6'd35, // Read memory for read/modify/write (INC, DEC, shift) + REG = 6'd36, // Read register for reg-reg transfers + RTI0 = 6'd37, // RTI - send S to ALU (+1) + RTI1 = 6'd38, // RTI - read P from stack + RTI2 = 6'd39, // RTI - read PCL from stack + RTI3 = 6'd40, // RTI - read PCH from stack + RTI4 = 6'd41, // RTI - read PCH from stack + RTS0 = 6'd42, // RTS - send S to ALU (+1) + RTS1 = 6'd43, // RTS - read PCL from stack + RTS2 = 6'd44, // RTS - write PCL to ALU, read PCH + RTS3 = 6'd45, // RTS - load PC and increment + WRITE = 6'd46, // Write memory for read/modify/write + ZP0 = 6'd47, // Z-page - fetch ZP address + ZPX0 = 6'd48, // ZP, X - fetch ZP, and send to ALU (+X) + ZPX1 = 6'd49, // ZP, X - load from memory + IND0 = 6'd50, // (ZP) - fetch ZP address, and send to ALU (+0) + JMPIX0 = 6'd51, // JMP (,X)- fetch LSB and send to ALU (+X) + JMPIX1 = 6'd52, // JMP (,X)- fetch MSB and send to ALU (+Carry) + JMPIX2 = 6'd53; // JMP (,X)- Wait for ALU (only if needed) + +`ifdef SIM + +/* + * easy to read names in simulator output + */ +reg [8*6-1:0] statename; + +always @* + case( state ) + DECODE: statename = "DECODE"; + REG: statename = "REG"; + ZP0: statename = "ZP0"; + ZPX0: statename = "ZPX0"; + ZPX1: statename = "ZPX1"; + ABS0: statename = "ABS0"; + ABS1: statename = "ABS1"; + ABSX0: statename = "ABSX0"; + ABSX1: statename = "ABSX1"; + ABSX2: statename = "ABSX2"; + IND0: statename = "IND0"; + INDX0: statename = "INDX0"; + INDX1: statename = "INDX1"; + INDX2: statename = "INDX2"; + INDX3: statename = "INDX3"; + INDY0: statename = "INDY0"; + INDY1: statename = "INDY1"; + INDY2: statename = "INDY2"; + INDY3: statename = "INDY3"; + READ: statename = "READ"; + WRITE: statename = "WRITE"; + FETCH: statename = "FETCH"; + PUSH0: statename = "PUSH0"; + PUSH1: statename = "PUSH1"; + PULL0: statename = "PULL0"; + PULL1: statename = "PULL1"; + PULL2: statename = "PULL2"; + JSR0: statename = "JSR0"; + JSR1: statename = "JSR1"; + JSR2: statename = "JSR2"; + JSR3: statename = "JSR3"; + RTI0: statename = "RTI0"; + RTI1: statename = "RTI1"; + RTI2: statename = "RTI2"; + RTI3: statename = "RTI3"; + RTI4: statename = "RTI4"; + RTS0: statename = "RTS0"; + RTS1: statename = "RTS1"; + RTS2: statename = "RTS2"; + RTS3: statename = "RTS3"; + BRK0: statename = "BRK0"; + BRK1: statename = "BRK1"; + BRK2: statename = "BRK2"; + BRK3: statename = "BRK3"; + BRA0: statename = "BRA0"; + BRA1: statename = "BRA1"; + BRA2: statename = "BRA2"; + JMP0: statename = "JMP0"; + JMP1: statename = "JMP1"; + JMPI0: statename = "JMPI0"; + JMPI1: statename = "JMPI1"; + JMPIX0: statename = "JMPIX0"; + JMPIX1: statename = "JMPIX1"; + JMPIX2: statename = "JMPIX2"; + + endcase + +//always @( PC ) +// $display( "%t, PC:%04x IR:%02x A:%02x X:%02x Y:%02x S:%02x C:%d Z:%d V:%d N:%d P:%02x", $time, PC, IR, A, X, Y, S, C, Z, V, N, P ); + +`endif + + + +/* + * Program Counter Increment/Load. First calculate the base value in + * PC_temp. + */ +always @* + case( state ) + DECODE: if( (~I & IRQ) | NMI_edge ) + PC_temp = { ABH, ABL }; + else + PC_temp = PC; + + + JMP1, + JMPI1, + JMPIX1, + JSR3, + RTS3, + RTI4: PC_temp = { DIMUX, ADD }; + + BRA1: PC_temp = { ABH, ADD }; + + JMPIX2, + BRA2: PC_temp = { ADD, PCL }; + + BRK2: PC_temp = res ? 16'hfffc : + NMI_edge ? 16'hfffa : 16'hfffe; + + default: PC_temp = PC; + endcase + +/* + * Determine wether we need PC_temp, or PC_temp + 1 + */ +always @* + case( state ) + DECODE: if( (~I & IRQ) | NMI_edge ) + PC_inc = 0; + else + PC_inc = 1; + + ABS0, + JMPIX0, + JMPIX2, + ABSX0, + FETCH, + BRA0, + BRA2, + BRK3, + JMPI1, + JMP1, + RTI4, + RTS3: PC_inc = 1; + + JMPIX1: PC_inc = ~CO; // Don't increment PC if we are going to go through JMPIX2 + + BRA1: PC_inc = CO ^~ backwards; + + default: PC_inc = 0; + endcase + +/* + * Set new PC + */ +always @(posedge clk) + if( RDY ) + PC <= PC_temp + PC_inc; + +/* + * Address Generator + */ + +parameter + ZEROPAGE = 8'h00, + STACKPAGE = 8'h01; + +always @* + case( state ) + JMPIX1, + ABSX1, + INDX3, + INDY2, + JMP1, + JMPI1, + RTI4, + ABS1: AB = { DIMUX, ADD }; + + BRA2, + INDY3, + JMPIX2, + ABSX2: AB = { ADD, ABL }; + + BRA1: AB = { ABH, ADD }; + + JSR0, + PUSH1, + RTS0, + RTI0, + BRK0: AB = { STACKPAGE, regfile }; + + BRK1, + JSR1, + PULL1, + RTS1, + RTS2, + RTI1, + RTI2, + RTI3, + BRK2: AB = { STACKPAGE, ADD }; + + INDY1, + INDX1, + ZPX1, + INDX2: AB = { ZEROPAGE, ADD }; + + ZP0, + INDY0: AB = { ZEROPAGE, DIMUX }; + + REG, + READ, + WRITE: AB = { ABH, ABL }; + + default: AB = PC; + endcase + +/* + * ABH/ABL pair is used for registering previous address bus state. + * This can be used to keep the current address, freeing up the original + * source of the address, such as the ALU or DI. + */ +always @(posedge clk) + if( state != PUSH0 && state != PUSH1 && RDY && + state != PULL0 && state != PULL1 && state != PULL2 ) + begin + ABL <= AB[7:0]; + ABH <= AB[15:8]; + end + +/* + * Data Out MUX + */ +always @* + case( state ) + WRITE: DO = ADD; + + JSR0, + BRK0: DO = PCH; + + JSR1, + BRK1: DO = PCL; + + PUSH1: DO = php ? P : ADD; + + BRK2: DO = (IRQ | NMI_edge) ? (P & 8'b1110_1111) : P; + + default: DO = store_zero ? 8'b0 : regfile; + endcase + +/* + * Write Enable Generator + */ + +always @* + case( state ) + BRK0, // writing to stack or memory + BRK1, + BRK2, + JSR0, + JSR1, + PUSH1, + WRITE: WE = 1; + + INDX3, // only if doing a STA, STX or STY + INDY3, + ABSX2, + ABS1, + ZPX1, + ZP0: WE = store; + + default: WE = 0; + endcase + +/* + * register file, contains A, X, Y and S (stack pointer) registers. At each + * cycle only 1 of those registers needs to be accessed, so they combined + * in a small memory, saving resources. + */ + +reg write_register; // set when register file is written + +always @* + case( state ) + DECODE: write_register = load_reg & ~plp; + + PULL1, + RTS2, + RTI3, + BRK3, + JSR0, + JSR2 : write_register = 1; + + default: write_register = 0; + endcase + +/* + * BCD adjust logic + */ + +always @(posedge clk) + adj_bcd <= adc_sbc & D; // '1' when doing a BCD instruction + +reg [3:0] ADJL; +reg [3:0] ADJH; + +// adjustment term to be added to ADD[3:0] based on the following +// adj_bcd: '1' if doing ADC/SBC with D=1 +// adc_bcd: '1' if doing ADC with D=1 +// HC : half carry bit from ALU +always @* begin + casex( {adj_bcd, adc_bcd, HC} ) + 3'b0xx: ADJL = 4'd0; // no BCD instruction + 3'b100: ADJL = 4'd10; // SBC, and digital borrow + 3'b101: ADJL = 4'd0; // SBC, but no borrow + 3'b110: ADJL = 4'd0; // ADC, but no carry + 3'b111: ADJL = 4'd6; // ADC, and decimal/digital carry + endcase +end + +// adjustment term to be added to ADD[7:4] based on the following +// adj_bcd: '1' if doing ADC/SBC with D=1 +// adc_bcd: '1' if doing ADC with D=1 +// CO : carry out bit from ALU +always @* begin + casex( {adj_bcd, adc_bcd, CO} ) + 3'b0xx: ADJH = 4'd0; // no BCD instruction + 3'b100: ADJH = 4'd10; // SBC, and digital borrow + 3'b101: ADJH = 4'd0; // SBC, but no borrow + 3'b110: ADJH = 4'd0; // ADC, but no carry + 3'b111: ADJH = 4'd6; // ADC, and decimal/digital carry + endcase +end + +assign AO = { ADD[7:4] + ADJH, ADD[3:0] + ADJL }; + +`ifdef IMPLEMENT_CORRECT_BCD_FLAGS + +assign AN1 = AO[7]; +assign AZ1 = ~|AO; + +`else + +assign AN1 = AN; +assign AZ1 = AZ; + +`endif + +/* + * write to a register. Usually this is the (BCD corrected) output of the + * ALU, but in case of the JSR0 we use the S register to temporarily store + * the PCL. This is possible, because the S register itself is stored in + * the ALU during those cycles. + */ +always @(posedge clk) + if( write_register & RDY ) + AXYS[regsel] <= (state == JSR0) ? DIMUX : AO; + +/* + * register select logic. This determines which of the A, X, Y or + * S registers will be accessed. + */ + +always @* + case( state ) + INDY1, + INDX0, + ZPX0, + JMPIX0, + ABSX0 : regsel = index_y ? SEL_Y : SEL_X; + + + DECODE : regsel = dst_reg; + + BRK0, + BRK3, + JSR0, + JSR2, + PULL0, + PULL1, + PUSH1, + RTI0, + RTI3, + RTS0, + RTS2 : regsel = SEL_S; + + default: regsel = src_reg; + endcase + +/* + * ALU + */ + +ALU ALU( .clk(clk), + .op(alu_op), + .right(alu_shift_right), + .AI(AI), + .BI(BI), + .CI(CI), + .BCD(adc_bcd & (state == FETCH)), + .CO(CO), + .OUT(ADD), + .V(AV), + .Z(AZ), + .N(AN), + .HC(HC), + .RDY(RDY) ); + +/* + * Select current ALU operation + */ + +always @* + case( state ) + READ: alu_op = op; + + BRA1: alu_op = backwards ? OP_SUB : OP_ADD; + + FETCH, + REG : alu_op = op; + + DECODE, + ABS1: alu_op = 1'bx; + + PUSH1, + BRK0, + BRK1, + BRK2, + JSR0, + JSR1: alu_op = OP_SUB; + + default: alu_op = OP_ADD; + endcase + +/* + * Determine shift right signal to ALU + */ + +always @* + if( state == FETCH || state == REG || state == READ ) + alu_shift_right = shift_right; + else + alu_shift_right = 0; + +/* + * Sign extend branch offset. + */ + +always @(posedge clk) + if( RDY ) + backwards <= DIMUX[7]; + +/* + * ALU A Input MUX + */ + +always @* + case( state ) + JSR1, + RTS1, + RTI1, + RTI2, + BRK1, + BRK2, + INDX1: AI = ADD; + + REG, + ZPX0, + INDX0, + JMPIX0, + ABSX0, + RTI0, + RTS0, + JSR0, + JSR2, + BRK0, + PULL0, + INDY1, + PUSH0, + PUSH1: AI = regfile; + + BRA0, + READ: AI = DIMUX; + + BRA1: AI = ABH; // don't use PCH in case we're + + FETCH: AI = load_only ? 8'b0 : regfile; + + DECODE, + ABS1: AI = 8'hxx; // don't care + + default: AI = 0; + endcase + + +/* + * ALU B Input mux + */ + +always @* + case( state ) + BRA1, + RTS1, + RTI0, + RTI1, + RTI2, + INDX1, + REG, + JSR0, + JSR1, + JSR2, + BRK0, + BRK1, + BRK2, + PUSH0, + PUSH1, + PULL0, + RTS0: BI = 8'h00; + + READ: BI = txb_ins ? (trb_ins ? ~regfile : regfile) : 8'h00; + + BRA0: BI = PCL; + + DECODE, + ABS1: BI = 8'hxx; + + default: BI = DIMUX; + endcase + +/* + * ALU CI (carry in) mux + */ + +always @* + case( state ) + INDY2, + BRA1, + JMPIX1, + ABSX1: CI = CO; + + DECODE, + ABS1: CI = 1'bx; + + READ, + REG: CI = rotate ? C : + shift ? 1'b0 : inc; + + FETCH: CI = rotate ? C : + compare ? 1'b1 : + (shift | load_only) ? 1'b0 : C; + + PULL0, + RTI0, + RTI1, + RTI2, + RTS0, + RTS1, + INDY0, + INDX1: CI = 1; + + default: CI = 0; + endcase + +/* + * Processor Status Register update + * + */ + +/* + * Update C flag when doing ADC/SBC, shift/rotate, compare + */ +always @(posedge clk ) + if( shift && state == WRITE ) + C <= CO; + else if( state == RTI2 ) + C <= DIMUX[0]; + else if( ~write_back && state == DECODE ) begin + if( adc_sbc | shift | compare ) + C <= CO; + else if( plp ) + C <= ADD[0]; + else begin + if( sec ) C <= 1; + if( clc ) C <= 0; + end + end + +/* + * Special Z flag got TRB/TSB + */ +always @(posedge clk) + if (RDY) + AZ2 <= ~|(AI & regfile); + +/* + * Update Z, N flags when writing A, X, Y, Memory, or when doing compare + */ + +always @(posedge clk) + if( state == WRITE) + Z <= txb_ins ? AZ2 : AZ1; + else if( state == RTI2 ) + Z <= DIMUX[1]; + else if( state == DECODE ) begin + if( plp ) + Z <= ADD[1]; + else if( (load_reg & (regsel != SEL_S)) | compare | bit_ins ) + Z <= AZ1; + end + +always @(posedge clk) + if( state == WRITE && ~txb_ins) + N <= AN1; + else if( state == RTI2 ) + N <= DIMUX[7]; + else if( state == DECODE ) begin + if( plp ) + N <= ADD[7]; + else if( (load_reg & (regsel != SEL_S)) | compare ) + N <= AN1; + end else if( state == FETCH && bit_ins_nv ) + N <= DIMUX[7]; + +/* + * Update I flag + */ + +always @(posedge clk) + if( state == BRK3 ) + I <= 1; + else if( state == RTI2 ) + I <= DIMUX[2]; + else if( state == REG ) begin + if( sei ) I <= 1; + if( cli ) I <= 0; + end else if( state == DECODE ) + if( plp ) I <= ADD[2]; + +/* + * Update D flag + */ +always @(posedge clk ) + if( state == RTI2 ) + D <= DIMUX[3]; + else if( state == DECODE ) begin + if( sed ) D <= 1; + if( cld ) D <= 0; + if( plp ) D <= ADD[3]; + end + +/* + * Update V flag + */ +always @(posedge clk ) + if( state == RTI2 ) + V <= DIMUX[6]; + else if( state == DECODE ) begin + if( adc_sbc ) V <= AV; + if( clv ) V <= 0; + if( plp ) V <= ADD[6]; + end else if( state == FETCH && bit_ins_nv ) + V <= DIMUX[6]; + +/* + * Instruction decoder + */ + +/* + * IR register/mux. Hold previous DI value in IRHOLD in PULL0 and PUSH0 + * states. In these states, the IR has been prefetched, and there is no + * time to read the IR again before the next decode. + */ + +//reg RDY1 = 1; + +//always @(posedge clk ) +// RDY1 <= RDY; + +//always @(posedge clk ) +// if( ~RDY && RDY1 ) +// DIHOLD <= DI; + +always @(posedge clk ) + if( reset ) + IRHOLD_valid <= 0; + else if( RDY ) begin + if( state == PULL0 || state == PUSH0 ) begin + IRHOLD <= DIMUX; + IRHOLD_valid <= 1; + end else if( state == DECODE ) + IRHOLD_valid <= 0; + end + +assign IR = (IRQ & ~I) | NMI_edge ? 8'h00 : + IRHOLD_valid ? IRHOLD : DIMUX; + +//assign DIMUX = ~RDY1 ? DIHOLD : DI; + +assign DIMUX = DI; + +/* + * Microcode state machine + */ +always @(posedge clk or posedge reset) + if( reset ) + state <= BRK0; + else if( RDY ) case( state ) + DECODE : + casex ( IR ) + // TODO Review for simplifications as in verilog the first matching case has priority + 8'b0000_0000: state <= BRK0; + 8'b0010_0000: state <= JSR0; + 8'b0010_1100: state <= ABS0; // BIT abs + 8'b1001_1100: state <= ABS0; // STZ abs + 8'b000x_1100: state <= ABS0; // TSB/TRB + 8'b0100_0000: state <= RTI0; // + 8'b0100_1100: state <= JMP0; + 8'b0110_0000: state <= RTS0; + 8'b0110_1100: state <= JMPI0; + 8'b0111_1100: state <= JMPIX0; +`ifdef IMPLEMENT_NOPS + 8'bxxxx_xx11: state <= REG; // (NOP1: 3/7/B/F column) + 8'bxxx0_0010: state <= FETCH; // (NOP2: 2 column, 4 column handled correctly below) + 8'bx1x1_1100: state <= ABS0; // (NOP3: C column) +`endif + 8'b0x00_1000: state <= PUSH0; + 8'b0x10_1000: state <= PULL0; + 8'b0xx1_1000: state <= REG; // CLC, SEC, CLI, SEI + 8'b11x0_00x0: state <= FETCH; // IMM + 8'b1x10_00x0: state <= FETCH; // IMM + 8'b1xx0_1100: state <= ABS0; // X/Y abs + 8'b1xxx_1000: state <= REG; // DEY, TYA, ... + 8'bxxx0_0001: state <= INDX0; + 8'bxxx1_0010: state <= IND0; // (ZP) odd 2 column + 8'b000x_0100: state <= ZP0; // TSB/TRB + 8'bxxx0_01xx: state <= ZP0; + 8'bxxx0_1001: state <= FETCH; // IMM + 8'bxxx0_1101: state <= ABS0; // even D column + 8'bxxx0_1110: state <= ABS0; // even E column + 8'bxxx1_0000: state <= BRA0; // odd 0 column (Branches) + 8'b1000_0000: state <= BRA0; // BRA + 8'bxxx1_0001: state <= INDY0; // odd 1 column + 8'bxxx1_01xx: state <= ZPX0; // odd 4,5,6,7 columns + 8'bxxx1_1001: state <= ABSX0; // odd 9 column + 8'bx011_1100: state <= ABSX0; // C column BIT (3C), LDY (BC) + 8'bxxx1_11x1: state <= ABSX0; // odd D, F columns + 8'bxxx1_111x: state <= ABSX0; // odd E, F columns + 8'bx101_1010: state <= PUSH0; // PHX/PHY + 8'bx111_1010: state <= PULL0; // PLX/PLY + 8'bx0xx_1010: state <= REG; // A, TXA, ... NOP + 8'bxxx0_1010: state <= REG; // A, TXA, ... NOP + endcase + + ZP0 : state <= write_back ? READ : FETCH; + + ZPX0 : state <= ZPX1; + ZPX1 : state <= write_back ? READ : FETCH; + + ABS0 : state <= ABS1; + ABS1 : state <= write_back ? READ : FETCH; + + ABSX0 : state <= ABSX1; + ABSX1 : state <= (CO | store | write_back) ? ABSX2 : FETCH; + ABSX2 : state <= write_back ? READ : FETCH; + + JMPIX0 : state <= JMPIX1; + JMPIX1 : state <= CO ? JMPIX2 : JMP0; + JMPIX2 : state <= JMP0; + + IND0 : state <= INDX1; + + INDX0 : state <= INDX1; + INDX1 : state <= INDX2; + INDX2 : state <= INDX3; + INDX3 : state <= FETCH; + + INDY0 : state <= INDY1; + INDY1 : state <= INDY2; + INDY2 : state <= (CO | store) ? INDY3 : FETCH; + INDY3 : state <= FETCH; + + READ : state <= WRITE; + WRITE : state <= FETCH; + FETCH : state <= DECODE; + + REG : state <= DECODE; + + PUSH0 : state <= PUSH1; + PUSH1 : state <= DECODE; + + PULL0 : state <= PULL1; + PULL1 : state <= PULL2; + PULL2 : state <= DECODE; + + JSR0 : state <= JSR1; + JSR1 : state <= JSR2; + JSR2 : state <= JSR3; + JSR3 : state <= FETCH; + + RTI0 : state <= RTI1; + RTI1 : state <= RTI2; + RTI2 : state <= RTI3; + RTI3 : state <= RTI4; + RTI4 : state <= DECODE; + + RTS0 : state <= RTS1; + RTS1 : state <= RTS2; + RTS2 : state <= RTS3; + RTS3 : state <= FETCH; + + BRA0 : state <= cond_true ? BRA1 : DECODE; + BRA1 : state <= (CO ^ backwards) ? BRA2 : DECODE; + BRA2 : state <= DECODE; + + JMP0 : state <= JMP1; + JMP1 : state <= DECODE; + + JMPI0 : state <= JMPI1; + JMPI1 : state <= JMP0; + + BRK0 : state <= BRK1; + BRK1 : state <= BRK2; + BRK2 : state <= BRK3; + BRK3 : state <= JMP0; + + endcase + + +/* + * Sync state machine + */ +always @(posedge clk or posedge reset) + if( reset ) + SYNC <= 1'b0; + else if( RDY ) case( state ) + BRA0 : SYNC <= !cond_true; + BRA1 : SYNC <= !(CO ^ backwards); + BRA2, + FETCH, + REG, + PUSH1, + PULL2, + RTI4, + JMP1, + BRA2 : SYNC <= 1'b1; + default: SYNC <= 1'b0; + endcase + +//assign SYNC = state == DECODE; + +/* + * Additional control signals + */ + +always @(posedge clk) + if( reset ) + res <= 1; + else if( state == DECODE ) + res <= 0; + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) // DMB: Checked for 65C02 NOP collisions + 8'b0xx1_0010, // ORA, AND, EOR, ADC (zp) + 8'b1x11_0010, // LDA, SBC (zp) + 8'b0xxx_1010, // ASLA, INCA, ROLA, DECA, LSRA, PHY, RORA, PLY + 8'b0xxx_xx01, // ORA, AND, EOR, ADC + 8'b100x_10x0, // DEY, TYA, TXA, TXS + 8'b1010_xxx0, // LDA/LDX/LDY + 8'b1011_1010, // TSX + 8'b1011_x1x0, // LDX/LDY + 8'b1100_1010, // DEX + 8'b11x1_1010, // PHX, PLX + 8'b1x1x_xx01, // LDA, SBC + 8'bxxx0_1000: // PHP, PLP, PHA, PLA, DEY, TAY, INY, INX + load_reg <= 1; + + default: load_reg <= 0; + endcase + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'b1110_1000, // INX + 8'b1100_1010, // DEX + 8'b1111_1010, // PLX + 8'b1010_0010, // LDX imm + 8'b101x_x110, // LDX + 8'b101x_1x10: // LDX, TAX, TSX + dst_reg <= SEL_X; + + 8'b0x00_1000, // PHP, PHA + 8'bx101_1010, // PHX, PHY + 8'b1001_1010: // TXS + dst_reg <= SEL_S; + + 8'b1x00_1000, // DEY, DEX + 8'b0111_1010, // PLY + 8'b101x_x100, // LDY + 8'b1010_x000: // LDY #imm, TAY + dst_reg <= SEL_Y; + + default: dst_reg <= SEL_A; + endcase + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'b1011_1010: // TSX + src_reg <= SEL_S; + + 8'b100x_x110, // STX + 8'b100x_1x10, // TXA, TXS + 8'b1110_xx00, // INX, CPX + 8'b1101_1010, // PHX + 8'b1100_1010: // DEX + src_reg <= SEL_X; + + 8'b100x_x100, // STY + 8'b1001_1000, // TYA + 8'b1100_xx00, // CPY + 8'b0101_1010, // PHY + 8'b1x00_1000: // DEY, INY + src_reg <= SEL_Y; + + default: src_reg <= SEL_A; + endcase + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) + 8'bxxx1_0001, // INDY + 8'b10x1_0110, // LDX zp,Y / STX zp,Y + 8'b1011_1110, // LDX abs,Y + 8'bxxxx_1001: // abs, Y + index_y <= 1; + + default: index_y <= 0; + endcase + + +always @(posedge clk) + if( state == DECODE && RDY ) + casex( IR ) // DMB: Checked for 65C02 NOP collisions + 8'b1001_0010, // STA (zp) + 8'b100x_x1x0, // STX, STY, STZ abs, STZ abs,x + 8'b011x_0100, // STZ zp, STZ zp,x + 8'b100x_xx01: // STA + store <= 1; + + default: store <= 0; + + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) // DMB: Checked for 65C02 NOP collisions + 8'b0xxx_x110, // ASL, ROL, LSR, ROR + 8'b000x_x100, // TSB/TRB + 8'b11xx_x110: // DEC/INC + write_back <= 1; + + default: write_back <= 0; + endcase + + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b101x_xxxx: // LDA, LDX, LDY + load_only <= 1; + default: load_only <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0001_1010, // INCA + 8'b111x_x110, // INC + 8'b11x0_1000: // INX, INY + inc <= 1; + + default: inc <= 0; + endcase + +always @(posedge clk ) + if( (state == DECODE || state == BRK0) && RDY ) + casex( IR ) + 8'bx111_0010, // SBC (zp), ADC (zp) + 8'bx11x_xx01: // SBC, ADC + adc_sbc <= 1; + + default: adc_sbc <= 0; + endcase + +always @(posedge clk ) + if( (state == DECODE || state == BRK0) && RDY ) + casex( IR ) + 8'b0111_0010, // ADC (zp) + 8'b011x_xx01: // ADC + adc_bcd <= D; + + default: adc_bcd <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0xxx_x110, // ASL, ROL, LSR, ROR (abs, absx, zpg, zpgx) + 8'b0xx0_1010: // ASL, ROL, LSR, ROR (acc) + shift <= 1; + + default: shift <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b1101_0010, // CMP (zp) + 8'b11x0_0x00, // CPX, CPY (imm/zp) + 8'b11x0_1100, // CPX, CPY (abs) + 8'b110x_xx01: // CMP + compare <= 1; + + default: compare <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b01xx_x110, // ROR, LSR + 8'b01xx_1x10: // ROR, LSR + shift_right <= 1; + + default: shift_right <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0x10_1010, // ROL A, ROR A + 8'b0x1x_x110: // ROR, ROL + rotate <= 1; + + default: rotate <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0000_x100: // TSB + op <= OP_OR; + + 8'b0001_x100: // TRB + op <= OP_AND; + + 8'b00xx_x110, // ROL, ASL + 8'b00x0_1010: // ROL, ASL + op <= OP_ROL; + + 8'b1000_1001, // BIT imm + 8'b001x_x100: // BIT zp/abs/zpx/absx + op <= OP_AND; + + 8'b01xx_x110, // ROR, LSR + 8'b01xx_1x10: // ROR, LSR + op <= OP_A; + + 8'b11x1_0010, // CMP, SBC (zp) + 8'b0011_1010, // DEC A + 8'b1000_1000, // DEY + 8'b1100_1010, // DEX + 8'b110x_x110, // DEC + 8'b11xx_xx01, // CMP, SBC + 8'b11x0_0x00, // CPX, CPY (imm, zpg) + 8'b11x0_1100: op <= OP_SUB; + + 8'b00x1_0010, // ORA, AND (zp) + 8'b0x01_0010, // ORA, EOR (zp) + 8'b010x_xx01, // EOR + 8'b00xx_xx01: // ORA, AND + op <= { 2'b11, IR[6:5] }; + + default: op <= OP_ADD; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b001x_x100: // BIT zp/abs/zpx/absx (update N,V,Z) + {bit_ins, bit_ins_nv} <= 2'b11; + + 8'b1000_1001: // BIT imm (update Z) + {bit_ins, bit_ins_nv} <= 2'b10; + + default: // not a BIT instruction + {bit_ins, bit_ins_nv} <= 2'b00; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b000x_x100: // TRB/TSB + txb_ins <= 1; + + default: txb_ins <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b0001_x100: // TRB + trb_ins <= 1; + + default: trb_ins <= 0; + endcase + +always @(posedge clk ) + if( state == DECODE && RDY ) + casex( IR ) + 8'b1001_11x0, // STZ abs, STZ abs,x + 8'b011x_0100: // STZ zp, STZ zp,x + store_zero <= 1; + + default: store_zero <= 0; + endcase + +/* + * special instructions + */ +always @(posedge clk ) + if( state == DECODE && RDY ) begin + php <= (IR == 8'h08); + clc <= (IR == 8'h18); + plp <= (IR == 8'h28); + sec <= (IR == 8'h38); + cli <= (IR == 8'h58); + sei <= (IR == 8'h78); + clv <= (IR == 8'hb8); + cld <= (IR == 8'hd8); + sed <= (IR == 8'hf8); + end + +always @(posedge clk) + if( RDY ) + cond_code <= IR[7:4]; + +always @* + case( cond_code ) + 4'b0001: cond_true = ~N; + 4'b0011: cond_true = N; + 4'b0101: cond_true = ~V; + 4'b0111: cond_true = V; + 4'b1001: cond_true = ~C; + 4'b1011: cond_true = C; + 4'b1101: cond_true = ~Z; + 4'b1111: cond_true = Z; + default: cond_true = 1; // BRA is 80 + endcase + + +reg NMI_1 = 0; // delayed NMI signal + +always @(posedge clk) + NMI_1 <= NMI; + +always @(posedge clk ) + if( NMI_edge && state == BRK3 ) + NMI_edge <= 0; + else if( NMI & ~NMI_1 ) + NMI_edge <= 1; + +endmodule diff --git a/hw/efinix_fpga/simulation/timer_tb.sv b/hw/efinix_fpga/simulation/timer_tb.sv deleted file mode 100644 index 24d6cb2..0000000 --- a/hw/efinix_fpga/simulation/timer_tb.sv +++ /dev/null @@ -1,75 +0,0 @@ -module sim(); - -timeunit 10ns; -timeprecision 1ns; - -logic clk; -logic rwb; -logic clk_50; -logic reset; - -logic [2:0] addr; -logic [7:0] i_data; -logic [7:0] o_data; -logic cs; -logic irq; - -timer dut( - .*); - -always #1 clk_50 = clk_50 === 1'b0; -always #100 clk = clk === 1'b0; - -task write_reg(input logic [2:0] _addr, input logic [7:0] _data); - @(negedge clk); - cs <= '1; - addr <= _addr; - rwb <= '0; - i_data <= '1; - @(posedge clk); - i_data <= _data; - @(negedge clk); - cs <= '0; - rwb <= '1; -endtask - -task read_reg(input logic [2:0] _addr, output logic [7:0] _data); - @(negedge clk); - cs <= '1; - addr <= _addr; - rwb <= '1; - i_data <= '1; - @(posedge clk); - _data <= o_data; - @(negedge clk); - cs <= '0; - rwb <= '1; -endtask - -initial -begin - $dumpfile("timer.vcd"); - $dumpvars(0,sim); -end - -logic [7:0] read_data; - -initial begin - reset <= '1; - repeat(5) @(posedge clk); - reset <= '0; - - write_reg(5, 16); - - repeat(1024) @(posedge clk); - - repeat(10) begin - read_reg(0, read_data); - $display("Read: %d", read_data); - repeat(1024) @(posedge clk); - end - $finish(); - -end - -endmodule diff --git a/hw/efinix_fpga/simulation/verilog-6502 b/hw/efinix_fpga/simulation/verilog-6502 deleted file mode 160000 index a5f605d..0000000 --- a/hw/efinix_fpga/simulation/verilog-6502 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a5f605d00d22095532cc32aa7a481465b1bdca17 diff --git a/hw/efinix_fpga/src/sdram_adapter.sv b/hw/efinix_fpga/src/sdram_adapter.sv index c9c1603..f19f706 100644 --- a/hw/efinix_fpga/src/sdram_adapter.sv +++ b/hw/efinix_fpga/src/sdram_adapter.sv @@ -245,10 +245,11 @@ logic [3:0] o_dbg_BA; logic [25:0] o_dbg_ADDR; logic [31:0] o_dbg_DATA_out; logic [31:0] o_dbg_DATA_in; -logic o_sdr_init_done; +logic sdr_init_done; logic [3:0] o_sdr_state; assign o_ref_req = o_dbg_ref_req; +assign o_sdr_init_done = sdr_init_done; sdram_controller u_sdram_controller( @@ -265,7 +266,7 @@ sdram_controller u_sdram_controller( .i_din(r_write_data), //Data to write to SDRAM. Twice normal width when running at half speed (hence the even addresses) .i_dm(r_dm), //dm (r_dm) .o_dout(w_data_o), //Data read from SDRAM, doubled as above. - .o_sdr_init_done(o_sdr_init_done), //Indicates that the SDRAM initialization is done. + .o_sdr_init_done(sdr_init_done), //Indicates that the SDRAM initialization is done. .o_wr_ack(w_wr_ack), //Write acknowledge, handshake with we .o_rd_ack(w_rd_ack), //Read acknowledge, handshake with re .o_rd_valid(w_rd_valid),//Read valid. The data on o_dout is valid