diff --git a/sim/verilog6502_32bit_test.py b/sim/verilog6502_32bit_test.py index 86cb441..fd9ad00 100644 --- a/sim/verilog6502_32bit_test.py +++ b/sim/verilog6502_32bit_test.py @@ -476,6 +476,70 @@ async def test_indirect_indexed(dut): await check_instruction_sequence(dut, expected_cpu_outputs) @cocotb.test +async def test_jsr(dut): + cocotb.start_soon(Clock(dut.clk, CLK_PERIOD, unit="ns").start()) + cocotb.start_soon(handle_memory(dut)) + + write_dword(0xfffffff4, 0x200) + + # @0x200 + # ldx #$0 + # txs + # jsr $12345678 + # wai + # + # @0x1234 + # rts + write_bytes(0x200, [0xa2, 0xff, 0x9a, 0x20, 0x78, 0x56, 0x34, 0x12, 0xcb]) + write_bytes(0x12345678, [0x60]) + + dut.RDY.value = Immediate(1) + + dut.reset.value = Immediate(1) + for _ in range(10): + await RisingEdge(dut.clk) + dut.reset.value = 0 + + expected_cpu_outputs = [ + None, # ignore reset sequence + None, + None, + None, + None, + None, + None, + None, + None, + (0x00000200, False, None), # ldx #$00 + (0x00000201, False, None), # Immediate + (0x00000202, False, None), # txs + (0x00000203, False, None), # second cycle of txs + (0x00000203, False, None), # jsr $12345678 + (0x00000204, False, None), # first byte of address + (0x000001ff, True, 0x00), # 24-31 + (0x000001fe, True, 0x00), # 16-23 + (0x000001fd, True, 0x02), # 8-15 + (0x000001fc, True, 0x05), # 7-0 + (0x00000205, False, None), # second byte of address + (0x00000206, False, None), # third byte of address + (0x00000207, False, None), # fourth byte of address + (0x00000208, False, None), # receive last byte of address + (0x12345678, False, None), # rts + (0x12345679, False, None), # rts + (0x000001fb, False, None), # current stack while we add 1 to it + (0x000001fc, False, None), # 7-0 + (0x000001fd, False, None), # 15-8 + (0x000001fe, False, None), # 23-16 + (0x000001ff, False, None), # 31-24 + (0x1234567c, False, None), # Updating PC before jump + (0x00000208, False, None), # WAI + (0x00000209, False, None), # second wai + (0x0000020a, False, None), # third wai + ] + + await check_instruction_sequence(dut, expected_cpu_outputs) + +# @cocotb.test async def test_adc(dut): cocotb.start_soon(Clock(dut.clk, CLK_PERIOD, unit="ns").start()) cocotb.start_soon(handle_memory(dut)) diff --git a/src/cpu_65c02.v b/src/cpu_65c02.v index 91e64a5..cdb2eb0 100644 --- a/src/cpu_65c02.v +++ b/src/cpu_65c02.v @@ -141,12 +141,15 @@ reg [6:0] state; * control signals */ -reg PC_inc; // Increment PC +reg [1:0] PC_inc; // Increment PC reg [31:0] PC_temp; // intermediate value of PC reg [1:0] src_reg; // source register index reg [1:0] dst_reg; // destination register index +reg sr_sel; // choose to load shift register from dimux or from alu +reg alu_sr_enable; // choose to shift or not + reg index_y; // if set, then Y is index reg rather than X reg load_reg; // loading a register (A, X, Y, S) in this instruction reg inc; // increment @@ -201,6 +204,10 @@ parameter OP_ROL = 4'b1011, OP_A = 4'b1111; +parameter + SR_ALU = 1'b0, + SR_DI = 1'b1; + /* * Microcode state machine. Basically, every addressing mode has its own * path through the state machine. Additional information, such as the @@ -238,7 +245,7 @@ parameter JSR0 = 7'd26, // JSR - push PCH, save LSB, send S to ALU (-1) JSR1 = 7'd27, // JSR - push PCL, send S to ALU (-1) JSR2 = 7'd28, // JSR - write S - JSR3 = 7'd29, // JSR - fetch MSB + JSR4 = 7'd29, // JSR - fetch MSB PULL0 = 7'd30, // PLP/PLA/PLX/PLY - save next op in IRHOLD, send S to ALU (+1) PULL1 = 7'd31, // PLP/PLA/PLX/PLY - fetch data from stack, write S PULL2 = 7'd32, // PLP/PLA/PLX/PLY - prefetch op, but don't increment PC @@ -254,7 +261,7 @@ parameter RTS0 = 7'd42, // RTS - send S to ALU (+1) RTS1 = 7'd43, // RTS - read PCL from stack RTS2 = 7'd44, // RTS - write PCL to ALU, read PCH - RTS3 = 7'd45, // RTS - load PC and increment + RTS5 = 7'd45, // RTS - load PC and increment WRITE = 7'd46, // Write memory for read/modify/write ZP0 = 7'd47, // Z-page - fetch ZP address ZPX0 = 7'd48, // ZP, X - fetch ZP, and send to ALU (+X) @@ -279,7 +286,13 @@ parameter INDY3 = 7'd67, // (ZP),Y - fetch at ZP+3, and send byte 2 to ALU (+Carry) INDY4 = 7'd68, // (ZP),Y - fetch data, and send byte 3 to ALU (+Carry) INDX3 = 7'd69, // (ZP,X) - fetch addr 2 at ZP+X+2 - INDX4 = 7'd70; // (ZP,X) - fetch addr 3 at ZP+X+3 + INDX4 = 7'd70, // (ZP,X) - fetch addr 3 at ZP+X+3 + JSR3 = 7'd71, + JSR5 = 7'd73, + JSR6 = 7'd74, + JSR7 = 7'd75, + RTS3 = 7'd76, + RTS4 = 7'd77; `ifdef SIM @@ -330,6 +343,10 @@ always @* JSR1: statename = "JSR1"; JSR2: statename = "JSR2"; JSR3: statename = "JSR3"; + JSR4: statename = "JSR4"; + JSR5: statename = "JSR5"; + JSR6: statename = "JSR6"; + JSR7: statename = "JSR7"; RTI0: statename = "RTI0"; RTI1: statename = "RTI1"; RTI2: statename = "RTI2"; @@ -339,6 +356,8 @@ always @* RTS1: statename = "RTS1"; RTS2: statename = "RTS2"; RTS3: statename = "RTS3"; + RTS4: statename = "RTS4"; + RTS5: statename = "RTS5"; BRK0: statename = "BRK0"; BRK1: statename = "BRK1"; BRK2: statename = "BRK2"; @@ -387,9 +406,11 @@ always @* JMP3, JMPI3, JMPIX3, - JSR3, - RTS3, - RTI4: PC_temp = { DIMUX, ADD, alu_sr_0, alu_sr_1}; + JSR7: PC_temp = { DIMUX, ADD, alu_sr_0, alu_sr_1}; + + RTS5, + RTI4: PC_temp = { DIMUX, ADD, alu_sr_0, alu_sr_1} + 2; + BRA1: PC_temp = { ABR[15:8], ADD }; @@ -407,10 +428,11 @@ always @* */ always @* case( state ) - DECODE: if( (~I & IRQ) | NMI_edge ) + DECODE: if( (~I & IRQ) | NMI_edge ) begin PC_inc = 0; - else + end else begin PC_inc = 1; + end ABS0, ABS1, @@ -434,12 +456,17 @@ always @* JMP1, JMP2, JMP3, + JSR4, + JSR5, + JSR6, RTI4, - RTS3: PC_inc = 1; + RTS3, + RTS4, + RTS5: PC_inc = 1; - JMPIX3: PC_inc = ~CO; // Don't increment PC if we are going to go through JMPIX4 + JMPIX3: PC_inc = {1'b0, ~CO}; // Don't increment PC if we are going to go through JMPIX4 - BRA1: PC_inc = CO ^~ backwards; + BRA1: PC_inc = {1'b0, CO ^~ backwards}; default: PC_inc = 0; endcase @@ -483,9 +510,13 @@ always @* BRK1, JSR1, + JSR2, + JSR3, PULL1, RTS1, RTS2, + RTS3, + RTS4, RTI1, RTI2, RTI3, @@ -612,11 +643,11 @@ always @* DECODE: write_register = load_reg & ~plp; PULL1, - RTS2, + RTS4, RTI3, BRK5, JSR0, - JSR2 : write_register = 1; + JSR4 : write_register = 1; default: write_register = 0; endcase @@ -703,13 +734,15 @@ always @* BRK5, JSR0, JSR2, + JSR3, + JSR4, PULL0, PULL1, PUSH1, RTI0, RTI3, RTS0, - RTS2 : regsel = SEL_S; + RTS4 : regsel = SEL_S; default: regsel = src_reg; endcase @@ -734,8 +767,30 @@ ALU ALU( .clk(clk), .RDY(RDY) ); always @(posedge clk) begin - alu_sr_0 <= ADD; - alu_sr_1 <= alu_sr_0; + if (alu_sr_enable) begin + if (sr_sel == SR_ALU) begin + alu_sr_0 <= ADD; + end else begin + alu_sr_0 <= DIMUX; + end + alu_sr_1 <= alu_sr_0; + end +end + +always @* begin + case ( state ) + RTS2, + RTS3: sr_sel = SR_DI; + + default: sr_sel = SR_ALU; + endcase +end + +always @*begin + case ( state) + RTS4: alu_sr_enable = 0; + default: alu_sr_enable = 1; + endcase end /* @@ -761,7 +816,9 @@ always @* BRK3, BRK4, JSR0, - JSR1: alu_op = OP_SUB; + JSR1, + JSR2, + JSR3: alu_op = OP_SUB; default: alu_op = OP_ADD; endcase @@ -791,7 +848,11 @@ always @(posedge clk) always @* case( state ) JSR1, + JSR2, + JSR3, RTS1, + RTS2, + RTS3, RTI1, RTI2, BRK1, @@ -806,8 +867,10 @@ always @* ABSX0, RTI0, RTS0, + RTS1, + RTS2, JSR0, - JSR2, + JSR4, BRK0, PULL0, INDY1, @@ -836,6 +899,8 @@ always @* case( state ) BRA1, RTS1, + RTS2, + RTS3, RTI0, RTI1, RTI2, @@ -843,6 +908,8 @@ always @* JSR0, JSR1, JSR2, + JSR3, + JSR4, BRK0, BRK1, BRK2, @@ -891,6 +958,8 @@ always @* RTI2, RTS0, RTS1, + RTS2, + RTS3, INDY0: CI = 1; default: CI = 0; @@ -1140,7 +1209,11 @@ always @(posedge clk or posedge reset) JSR0 : state <= JSR1; JSR1 : state <= JSR2; JSR2 : state <= JSR3; - JSR3 : state <= FETCH; + JSR3 : state <= JSR4; + JSR4 : state <= JSR5; + JSR5 : state <= JSR6; + JSR6 : state <= JSR7; + JSR7 : state <= FETCH; RTI0 : state <= RTI1; RTI1 : state <= RTI2; @@ -1151,7 +1224,9 @@ always @(posedge clk or posedge reset) RTS0 : state <= RTS1; RTS1 : state <= RTS2; RTS2 : state <= RTS3; - RTS3 : state <= FETCH; + RTS3 : state <= RTS4; + RTS4 : state <= RTS5; + RTS5 : state <= FETCH; BRA0 : state <= cond_true ? BRA1 : DECODE; BRA1 : state <= (CO ^ backwards) ? BRA2 : DECODE;