diff --git a/src/cndm/rtl/cndm_micro_core.f b/src/cndm/rtl/cndm_micro_core.f index 988d583..0f91899 100644 --- a/src/cndm/rtl/cndm_micro_core.f +++ b/src/cndm/rtl/cndm_micro_core.f @@ -4,6 +4,7 @@ cndm_micro_dp_mgr.sv cndm_micro_port.sv cndm_micro_rx.sv cndm_micro_tx.sv +cndm_micro_queue_state.sv cndm_micro_desc_rd.sv cndm_micro_cpl_wr.sv ../lib/taxi/src/prim/rtl/taxi_ram_2rw_1c.sv diff --git a/src/cndm/rtl/cndm_micro_core.sv b/src/cndm/rtl/cndm_micro_core.sv index c654c64..6be7e75 100644 --- a/src/cndm/rtl/cndm_micro_core.sv +++ b/src/cndm/rtl/cndm_micro_core.sv @@ -36,6 +36,9 @@ module cndm_micro_core #( // Structural configuration parameter PORTS = 2, + // Queue configuration + parameter CQN_W = 5, + // PTP configuration parameter logic PTP_TS_EN = 1'b1, parameter logic PTP_TS_FMT_TOD = 1'b0, @@ -308,8 +311,12 @@ apb_dp_ctrl(); cndm_micro_dp_mgr #( .PORTS(PORTS), + + .CQN_W(CQN_W), + .PTP_EN(PTP_TS_EN), .PTP_BASE_ADDR_DP(0), + .PORT_BASE_ADDR_DP(PORT_BASE_ADDR_DP), .PORT_BASE_ADDR_HOST(PORT_BASE_ADDR_HOST) ) @@ -490,6 +497,10 @@ dma_mux_inst ( for (genvar p = 0; p < PORTS; p = p + 1) begin : port cndm_micro_port #( + // Queue configuration + .CQN_W(CQN_W), + + // PTP configuration .PTP_TS_EN(PTP_TS_EN), .PTP_TS_FMT_TOD(PTP_TS_FMT_TOD) ) diff --git a/src/cndm/rtl/cndm_micro_cpl_wr.sv b/src/cndm/rtl/cndm_micro_cpl_wr.sv index 77f1feb..067a487 100644 --- a/src/cndm/rtl/cndm_micro_cpl_wr.sv +++ b/src/cndm/rtl/cndm_micro_cpl_wr.sv @@ -1,7 +1,7 @@ // SPDX-License-Identifier: CERN-OHL-S-2.0 /* -Copyright (c) 2025 FPGA Ninja, LLC +Copyright (c) 2026 FPGA Ninja, LLC Authors: - Alex Forencich @@ -15,7 +15,9 @@ Authors: /* * Corundum-micro completion write module */ -module cndm_micro_cpl_wr +module cndm_micro_cpl_wr #( + parameter CQN_W = 5 +) ( input wire logic clk, input wire logic rst, @@ -42,168 +44,56 @@ module cndm_micro_cpl_wr output wire logic irq ); -localparam AXIL_ADDR_W = s_axil_ctrl_wr.ADDR_W; -localparam AXIL_DATA_W = s_axil_ctrl_wr.DATA_W; +localparam DMA_ADDR_W = dma_wr_desc_req.DST_ADDR_W; -localparam APB_ADDR_W = s_apb_dp_ctrl.ADDR_W; -localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W; +logic [CQN_W-1:0] cq_req_cqn_reg = '0; +logic cq_req_valid_reg = 1'b0; +logic cq_req_ready; +logic [DMA_ADDR_W-1:0] cq_rsp_addr; +logic cq_rsp_phase; +logic cq_rsp_error; +logic cq_rsp_valid; +logic cq_rsp_ready_reg = 1'b0; -logic txcq_en_reg = '0; -logic [3:0] txcq_size_reg = '0; -logic [63:0] txcq_base_addr_reg = '0; -logic rxcq_en_reg = '0; -logic [3:0] rxcq_size_reg = '0; -logic [63:0] rxcq_base_addr_reg = '0; +cndm_micro_queue_state #( + .QN_W(CQN_W), + .DQN_W(CQN_W), // TODO + .CPL_SIZE(16), + .DMA_ADDR_W(DMA_ADDR_W) +) +cq_mgr_inst ( + .clk(clk), + .rst(rst), -logic [15:0] txcq_prod_ptr_reg = '0; -logic [15:0] rxcq_prod_ptr_reg = '0; + /* + * Control register interface + */ + .s_axil_ctrl_wr(s_axil_ctrl_wr), + .s_axil_ctrl_rd(s_axil_ctrl_rd), -logic s_axil_ctrl_awready_reg = 1'b0; -logic s_axil_ctrl_wready_reg = 1'b0; -logic s_axil_ctrl_bvalid_reg = 1'b0; + /* + * Datapath control register interface + */ + .s_apb_dp_ctrl(s_apb_dp_ctrl), -logic s_axil_ctrl_arready_reg = 1'b0; -logic [AXIL_DATA_W-1:0] s_axil_ctrl_rdata_reg = '0; -logic s_axil_ctrl_rvalid_reg = 1'b0; - -assign s_axil_ctrl_wr.awready = s_axil_ctrl_awready_reg; -assign s_axil_ctrl_wr.wready = s_axil_ctrl_wready_reg; -assign s_axil_ctrl_wr.bresp = '0; -assign s_axil_ctrl_wr.buser = '0; -assign s_axil_ctrl_wr.bvalid = s_axil_ctrl_bvalid_reg; - -assign s_axil_ctrl_rd.arready = s_axil_ctrl_arready_reg; -assign s_axil_ctrl_rd.rdata = s_axil_ctrl_rdata_reg; -assign s_axil_ctrl_rd.rresp = '0; -assign s_axil_ctrl_rd.ruser = '0; -assign s_axil_ctrl_rd.rvalid = s_axil_ctrl_rvalid_reg; - -logic s_apb_dp_ctrl_pready_reg = 1'b0; -logic [AXIL_DATA_W-1:0] s_apb_dp_ctrl_prdata_reg = '0; - -assign s_apb_dp_ctrl.pready = s_apb_dp_ctrl_pready_reg; -assign s_apb_dp_ctrl.prdata = s_apb_dp_ctrl_prdata_reg; -assign s_apb_dp_ctrl.pslverr = 1'b0; -assign s_apb_dp_ctrl.pruser = '0; -assign s_apb_dp_ctrl.pbuser = '0; - -always_ff @(posedge clk) begin - s_axil_ctrl_awready_reg <= 1'b0; - s_axil_ctrl_wready_reg <= 1'b0; - s_axil_ctrl_bvalid_reg <= s_axil_ctrl_bvalid_reg && !s_axil_ctrl_wr.bready; - - s_axil_ctrl_arready_reg <= 1'b0; - s_axil_ctrl_rvalid_reg <= s_axil_ctrl_rvalid_reg && !s_axil_ctrl_rd.rready; - - s_apb_dp_ctrl_pready_reg <= 1'b0; - - if (s_axil_ctrl_wr.awvalid && s_axil_ctrl_wr.wvalid && !s_axil_ctrl_bvalid_reg) begin - s_axil_ctrl_awready_reg <= 1'b1; - s_axil_ctrl_wready_reg <= 1'b1; - s_axil_ctrl_bvalid_reg <= 1'b1; - - // case ({s_axil_ctrl_wr.awaddr[9:2], 2'b00}) - // 10'h000: begin - // txcq_en_reg <= s_axil_ctrl_wr.wdata[0]; - // txcq_size_reg <= s_axil_ctrl_wr.wdata[19:16]; - // end - // 10'h008: txcq_base_addr_reg[31:0] <= s_axil_ctrl_wr.wdata; - // 10'h00c: txcq_base_addr_reg[63:32] <= s_axil_ctrl_wr.wdata; - - // 10'h100: begin - // rxcq_en_reg <= s_axil_ctrl_wr.wdata[0]; - // rxcq_size_reg <= s_axil_ctrl_wr.wdata[19:16]; - // end - // 10'h108: rxcq_base_addr_reg[31:0] <= s_axil_ctrl_wr.wdata; - // 10'h10c: rxcq_base_addr_reg[63:32] <= s_axil_ctrl_wr.wdata; - // default: begin end - // endcase - end - - if (s_axil_ctrl_rd.arvalid && !s_axil_ctrl_rvalid_reg) begin - s_axil_ctrl_rdata_reg <= '0; - - s_axil_ctrl_arready_reg <= 1'b1; - s_axil_ctrl_rvalid_reg <= 1'b1; - - // case ({s_axil_ctrl_rd.araddr[9:2], 2'b00}) - // 10'h000: begin - // s_axil_ctrl_rdata_reg[0] <= txcq_en_reg; - // s_axil_ctrl_rdata_reg[19:16] <= txcq_size_reg; - // end - // 10'h004: s_axil_ctrl_rdata_reg[15:0] <= txcq_prod_ptr_reg; - // 10'h008: s_axil_ctrl_rdata_reg <= txcq_base_addr_reg[31:0]; - // 10'h00c: s_axil_ctrl_rdata_reg <= txcq_base_addr_reg[63:32]; - - // 10'h100: begin - // s_axil_ctrl_rdata_reg[0] <= rxcq_en_reg; - // s_axil_ctrl_rdata_reg[19:16] <= rxcq_size_reg; - // end - // 10'h104: s_axil_ctrl_rdata_reg[15:0] <= rxcq_prod_ptr_reg; - // 10'h108: s_axil_ctrl_rdata_reg <= rxcq_base_addr_reg[31:0]; - // 10'h10c: s_axil_ctrl_rdata_reg <= rxcq_base_addr_reg[63:32]; - // default: begin end - // endcase - end - - if (s_apb_dp_ctrl.penable && s_apb_dp_ctrl.psel && !s_apb_dp_ctrl_pready_reg) begin - s_apb_dp_ctrl_pready_reg <= 1'b1; - s_apb_dp_ctrl_prdata_reg <= '0; - - if (s_apb_dp_ctrl.pwrite) begin - case ({s_apb_dp_ctrl.paddr[9:2], 2'b00}) - 10'h000: begin - txcq_en_reg <= s_apb_dp_ctrl.pwdata[0]; - txcq_size_reg <= s_apb_dp_ctrl.pwdata[19:16]; - end - 10'h008: txcq_base_addr_reg[31:0] <= s_apb_dp_ctrl.pwdata; - 10'h00c: txcq_base_addr_reg[63:32] <= s_apb_dp_ctrl.pwdata; - - 10'h100: begin - rxcq_en_reg <= s_apb_dp_ctrl.pwdata[0]; - rxcq_size_reg <= s_apb_dp_ctrl.pwdata[19:16]; - end - 10'h108: rxcq_base_addr_reg[31:0] <= s_apb_dp_ctrl.pwdata; - 10'h10c: rxcq_base_addr_reg[63:32] <= s_apb_dp_ctrl.pwdata; - default: begin end - endcase - end - - case ({s_apb_dp_ctrl.paddr[9:2], 2'b00}) - 10'h000: begin - s_apb_dp_ctrl_prdata_reg[0] <= txcq_en_reg; - s_apb_dp_ctrl_prdata_reg[19:16] <= txcq_size_reg; - end - 10'h004: s_apb_dp_ctrl_prdata_reg[15:0] <= txcq_prod_ptr_reg; - 10'h008: s_apb_dp_ctrl_prdata_reg <= txcq_base_addr_reg[31:0]; - 10'h00c: s_apb_dp_ctrl_prdata_reg <= txcq_base_addr_reg[63:32]; - - 10'h100: begin - s_apb_dp_ctrl_prdata_reg[0] <= rxcq_en_reg; - s_apb_dp_ctrl_prdata_reg[19:16] <= rxcq_size_reg; - end - 10'h104: s_apb_dp_ctrl_prdata_reg[15:0] <= rxcq_prod_ptr_reg; - 10'h108: s_apb_dp_ctrl_prdata_reg <= rxcq_base_addr_reg[31:0]; - 10'h10c: s_apb_dp_ctrl_prdata_reg <= rxcq_base_addr_reg[63:32]; - default: begin end - endcase - end - - if (rst) begin - s_axil_ctrl_awready_reg <= 1'b0; - s_axil_ctrl_wready_reg <= 1'b0; - s_axil_ctrl_bvalid_reg <= 1'b0; - - s_axil_ctrl_arready_reg <= 1'b0; - s_axil_ctrl_rvalid_reg <= 1'b0; - - s_apb_dp_ctrl_pready_reg <= 1'b0; - end -end + /* + * CQ management interface + */ + .req_qn(cq_req_cqn_reg), + .req_valid(cq_req_valid_reg), + .req_ready(cq_req_ready), + .rsp_qn(), + .rsp_dqn(), + .rsp_addr(cq_rsp_addr), + .rsp_phase(cq_rsp_phase), + .rsp_error(cq_rsp_error), + .rsp_valid(cq_rsp_valid), + .rsp_ready(cq_rsp_ready_reg) +); typedef enum logic [1:0] { STATE_IDLE, - STATE_RX_CPL, + STATE_QUERY_CQ, STATE_WRITE_DATA } state_t; @@ -231,13 +121,8 @@ always_ff @(posedge clk) begin dma_wr_desc_req.req_user <= '0; dma_wr_desc_req.req_valid <= dma_wr_desc_req.req_valid && !dma_wr_desc_req.req_ready; - if (!txcq_en_reg) begin - txcq_prod_ptr_reg <= '0; - end - - if (!rxcq_en_reg) begin - rxcq_prod_ptr_reg <= '0; - end + cq_req_valid_reg <= cq_req_valid_reg && !cq_req_ready; + cq_rsp_ready_reg <= 1'b0; irq_reg <= 1'b0; @@ -245,29 +130,32 @@ always_ff @(posedge clk) begin STATE_IDLE: begin dma_wr_desc_req.req_src_addr <= '0; - if (s_axis_cpl.tdest == 0) begin - dma_wr_desc_req.req_dst_addr <= txcq_base_addr_reg + 64'(16'(txcq_prod_ptr_reg & ({16{1'b1}} >> (16 - txcq_size_reg))) * 16); - phase_tag_reg <= !txcq_prod_ptr_reg[txcq_size_reg]; - if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin - txcq_prod_ptr_reg <= txcq_prod_ptr_reg + 1; - if (txcq_en_reg) begin - dma_wr_desc_req.req_valid <= 1'b1; - state_reg <= STATE_WRITE_DATA; - end else begin - state_reg <= STATE_IDLE; - end - end + cq_req_cqn_reg <= CQN_W'(s_axis_cpl.tdest); + + if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin + cq_req_valid_reg <= 1'b1; + state_reg <= STATE_QUERY_CQ; end else begin - dma_wr_desc_req.req_dst_addr <= rxcq_base_addr_reg + 64'(16'(rxcq_prod_ptr_reg & ({16{1'b1}} >> (16 - rxcq_size_reg))) * 16); - phase_tag_reg <= !rxcq_prod_ptr_reg[rxcq_size_reg]; - if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin - rxcq_prod_ptr_reg <= rxcq_prod_ptr_reg + 1; - if (rxcq_en_reg) begin - dma_wr_desc_req.req_valid <= 1'b1; - state_reg <= STATE_WRITE_DATA; - end else begin - state_reg <= STATE_IDLE; - end + state_reg <= STATE_IDLE; + end + end + STATE_QUERY_CQ: begin + dma_wr_desc_req.req_src_addr <= '0; + cq_rsp_ready_reg <= 1'b1; + + if (cq_rsp_valid) begin + cq_rsp_ready_reg <= 1'b0; + + dma_wr_desc_req.req_dst_addr <= cq_rsp_addr; + phase_tag_reg <= cq_rsp_phase; + + if (cq_rsp_error) begin + // drop completion + s_axis_cpl.tready <= 1'b1; + state_reg <= STATE_IDLE; + end else begin + dma_wr_desc_req.req_valid <= 1'b1; + state_reg <= STATE_WRITE_DATA; end end end @@ -285,8 +173,8 @@ always_ff @(posedge clk) begin if (rst) begin state_reg <= STATE_IDLE; - txcq_prod_ptr_reg <= '0; - rxcq_prod_ptr_reg <= '0; + cq_req_valid_reg <= 1'b0; + cq_rsp_ready_reg <= 1'b0; irq_reg <= 1'b0; end end diff --git a/src/cndm/rtl/cndm_micro_dp_mgr.sv b/src/cndm/rtl/cndm_micro_dp_mgr.sv index 2748c9b..6945e66 100644 --- a/src/cndm/rtl/cndm_micro_dp_mgr.sv +++ b/src/cndm/rtl/cndm_micro_dp_mgr.sv @@ -18,8 +18,12 @@ Authors: module cndm_micro_dp_mgr # ( parameter PORTS = 2, + + parameter CQN_W = 5, + parameter logic PTP_EN = 1'b1, parameter PTP_BASE_ADDR_DP = 0, + parameter PORT_BASE_ADDR_DP = 0, parameter PORT_BASE_ADDR_HOST = 0 ) @@ -247,9 +251,9 @@ always_comb begin // determine block base address case (opcode_reg) + // // EQ // CMD_OP_CREATE_EQ: // begin - // // EQ // qn_next = 0; // dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h8000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); // host_ptr_next = 32'({port_reg, 16'd0} | 'h8000) + PORT_BASE_ADDR_HOST; @@ -258,14 +262,13 @@ always_comb begin // CMD_OP_QUERY_EQ, // CMD_OP_DESTROY_EQ: // begin - // // EQ // dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h8000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); // host_ptr_next = 32'({port_reg, 16'd0} | 'h8000) + PORT_BASE_ADDR_HOST; // end + // CQ CMD_OP_CREATE_CQ: begin - // CQ - cnt_next = 1; + cnt_next = 2**CQN_W-1; dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h8000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); host_ptr_next = 32'({port_reg, 16'd0} | 'h8000) + PORT_BASE_ADDR_HOST; end @@ -273,13 +276,12 @@ always_comb begin CMD_OP_QUERY_CQ, CMD_OP_DESTROY_CQ: begin - // CQ - dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h8000 | {qn_reg, 8'd00}) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); - host_ptr_next = 32'({port_reg, 16'd0} | 'h8000 | {qn_reg, 8'd00}) + PORT_BASE_ADDR_HOST; + dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h8000 | {qn_reg, 5'd00}) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); + host_ptr_next = 32'({port_reg, 16'd0} | 'h8000 | {qn_reg, 5'd00}) + PORT_BASE_ADDR_HOST; end + // SQ CMD_OP_CREATE_SQ: begin - // SQ cnt_next = 0; dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h0000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); host_ptr_next = 32'({port_reg, 16'd0} | 'h0000) + PORT_BASE_ADDR_HOST; @@ -288,13 +290,12 @@ always_comb begin CMD_OP_QUERY_SQ, CMD_OP_DESTROY_SQ: begin - // SQ dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h0000) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); host_ptr_next = 32'({port_reg, 16'd0} | 'h0000) + PORT_BASE_ADDR_HOST; end + // RQ CMD_OP_CREATE_RQ: begin - // RQ cnt_next = 0; dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h0100) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); host_ptr_next = 32'({port_reg, 16'd0} | 'h0100) + PORT_BASE_ADDR_HOST; @@ -303,7 +304,6 @@ always_comb begin CMD_OP_QUERY_RQ, CMD_OP_DESTROY_RQ: begin - // RQ dp_ptr_next = DP_APB_ADDR_W'({port_reg, 16'd0} | 'h0100) + DP_APB_ADDR_W'(PORT_BASE_ADDR_DP); host_ptr_next = 32'({port_reg, 16'd0} | 'h0100) + PORT_BASE_ADDR_HOST; end @@ -470,7 +470,8 @@ always_comb begin end else begin // queue is active qn_next = qn_reg + 1; - dp_ptr_next = dp_ptr_reg + 'h100; + dp_ptr_next = dp_ptr_reg + 'h20; + host_ptr_next = host_ptr_reg + 'h20; if (cnt_reg == 0) begin // no more queues m_axis_rsp_tdata_next = '0; // TODO diff --git a/src/cndm/rtl/cndm_micro_pcie_us.sv b/src/cndm/rtl/cndm_micro_pcie_us.sv index 07eeadc..3dabf0b 100644 --- a/src/cndm/rtl/cndm_micro_pcie_us.sv +++ b/src/cndm/rtl/cndm_micro_pcie_us.sv @@ -36,6 +36,9 @@ module cndm_micro_pcie_us #( // Structural configuration parameter PORTS = 2, + // Queue configuration + parameter CQN_W = 5, + // PTP configuration parameter logic PTP_TS_EN = 1'b1, parameter logic PTP_TS_FMT_TOD = 1'b0, @@ -510,6 +513,9 @@ cndm_micro_core #( // Structural configuration .PORTS(PORTS), + // Queue configuration + .CQN_W(CQN_W), + // PTP configuration .PTP_TS_EN(PTP_TS_EN), .PTP_TS_FMT_TOD(PTP_TS_FMT_TOD), diff --git a/src/cndm/rtl/cndm_micro_port.sv b/src/cndm/rtl/cndm_micro_port.sv index 8a2485b..aa65e6a 100644 --- a/src/cndm/rtl/cndm_micro_port.sv +++ b/src/cndm/rtl/cndm_micro_port.sv @@ -16,6 +16,10 @@ Authors: * Corundum-micro port module */ module cndm_micro_port #( + // Queue configuration + parameter CQN_W = 5, + + // PTP configuration parameter logic PTP_TS_EN = 1'b1, parameter logic PTP_TS_FMT_TOD = 1'b0 ) @@ -387,7 +391,9 @@ cpl_mux_inst ( .m_axis(axis_cpl) ); -cndm_micro_cpl_wr +cndm_micro_cpl_wr #( + .CQN_W(CQN_W) +) cpl_wr_inst ( .clk(clk), .rst(rst), diff --git a/src/cndm/rtl/cndm_micro_queue_state.sv b/src/cndm/rtl/cndm_micro_queue_state.sv new file mode 100644 index 0000000..9a7ffd3 --- /dev/null +++ b/src/cndm/rtl/cndm_micro_queue_state.sv @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2026 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Corundum-micro queue state manager module + */ +module cndm_micro_queue_state #( + parameter QN_W = 5, + parameter DQN_W = 5, + parameter CPL_SIZE = 16, + parameter DMA_ADDR_W = 64 +) +( + input wire logic clk, + input wire logic rst, + + /* + * Control register interface + */ + taxi_axil_if.wr_slv s_axil_ctrl_wr, + taxi_axil_if.rd_slv s_axil_ctrl_rd, + + /* + * Datapath control register interface + */ + taxi_apb_if.slv s_apb_dp_ctrl, + + /* + * CQ management interface + */ + input wire logic [QN_W-1:0] req_qn, + input wire logic req_valid, + output wire logic req_ready, + output wire logic [QN_W-1:0] rsp_qn, + output wire logic [DQN_W-1:0] rsp_dqn, + output wire logic [DMA_ADDR_W-1:0] rsp_addr, + output wire logic rsp_phase, + output wire logic rsp_error, + output wire logic rsp_valid, + input wire logic rsp_ready +); + +localparam PTR_W = 16; + +localparam ADDR_W = QN_W+5; + +localparam AXIL_ADDR_W = s_axil_ctrl_wr.ADDR_W; +localparam AXIL_DATA_W = s_axil_ctrl_wr.DATA_W; + +localparam APB_ADDR_W = s_apb_dp_ctrl.ADDR_W; +localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W; + +// check configuration +if (s_axil_ctrl_rd.DATA_W != 32 || s_axil_ctrl_wr.DATA_W != 32) + $fatal(0, "Error: AXI data width must be 32 (instance %m)"); + +if (s_axil_ctrl_rd.ADDR_W < ADDR_W || s_axil_ctrl_wr.ADDR_W < ADDR_W) + $fatal(0, "Error: AXI address width is insufficient (instance %m)"); + +if (s_apb_dp_ctrl.DATA_W != 32) + $fatal(0, "Error: APB data width must be 32 (instance %m)"); + +if (s_apb_dp_ctrl.ADDR_W < ADDR_W) + $fatal(0, "Error: APB address width is insufficient (instance %m)"); + +logic s_axil_ctrl_awready_reg = 1'b0, s_axil_ctrl_awready_next; +logic s_axil_ctrl_wready_reg = 1'b0, s_axil_ctrl_wready_next; +logic s_axil_ctrl_bvalid_reg = 1'b0, s_axil_ctrl_bvalid_next; + +logic s_axil_ctrl_arready_reg = 1'b0, s_axil_ctrl_arready_next; +logic [AXIL_DATA_W-1:0] s_axil_ctrl_rdata_reg = '0, s_axil_ctrl_rdata_next; +logic s_axil_ctrl_rvalid_reg = 1'b0, s_axil_ctrl_rvalid_next; + +assign s_axil_ctrl_wr.awready = s_axil_ctrl_awready_reg; +assign s_axil_ctrl_wr.wready = s_axil_ctrl_wready_reg; +assign s_axil_ctrl_wr.bresp = '0; +assign s_axil_ctrl_wr.buser = '0; +assign s_axil_ctrl_wr.bvalid = s_axil_ctrl_bvalid_reg; + +assign s_axil_ctrl_rd.arready = s_axil_ctrl_arready_reg; +assign s_axil_ctrl_rd.rdata = s_axil_ctrl_rdata_reg; +assign s_axil_ctrl_rd.rresp = '0; +assign s_axil_ctrl_rd.ruser = '0; +assign s_axil_ctrl_rd.rvalid = s_axil_ctrl_rvalid_reg; + +wire [QN_W-1:0] s_axil_ctrl_awaddr_queue_index = s_axil_ctrl_wr.awaddr[5 +: QN_W]; +wire [2:0] s_axil_ctrl_awaddr_reg_index = s_axil_ctrl_wr.awaddr[4:2]; +wire [QN_W-1:0] s_axil_ctrl_araddr_queue_index = s_axil_ctrl_rd.araddr[5 +: QN_W]; +wire [2:0] s_axil_ctrl_araddr_reg_index = s_axil_ctrl_rd.araddr[4:2]; + +logic s_apb_dp_ctrl_pready_reg = 1'b0, s_apb_dp_ctrl_pready_next; +logic [AXIL_DATA_W-1:0] s_apb_dp_ctrl_prdata_reg = '0, s_apb_dp_ctrl_prdata_next; + +assign s_apb_dp_ctrl.pready = s_apb_dp_ctrl_pready_reg; +assign s_apb_dp_ctrl.prdata = s_apb_dp_ctrl_prdata_reg; +assign s_apb_dp_ctrl.pslverr = 1'b0; +assign s_apb_dp_ctrl.pruser = '0; +assign s_apb_dp_ctrl.pbuser = '0; + +wire [QN_W-1:0] s_apb_dp_ctrl_paddr_queue_index = s_apb_dp_ctrl.paddr[5 +: QN_W]; +wire [2:0] s_apb_dp_ctrl_paddr_reg_index = s_apb_dp_ctrl.paddr[4:2]; + +logic req_ready_reg = 1'b0, req_ready_next; +logic [QN_W-1:0] rsp_qn_reg = '0, rsp_qn_next; +logic [DQN_W-1:0] rsp_dqn_reg = '0, rsp_dqn_next; +logic [DMA_ADDR_W-1:0] rsp_addr_reg = '0, rsp_addr_next; +logic rsp_phase_reg = 1'b0, rsp_phase_next; +logic rsp_error_reg = 1'b0, rsp_error_next; +logic rsp_valid_reg = 1'b0, rsp_valid_next; + +assign req_ready = req_ready_reg; +assign rsp_qn = rsp_qn_reg; +assign rsp_dqn = rsp_dqn_reg; +assign rsp_addr = rsp_addr_reg; +assign rsp_phase = rsp_phase_reg; +assign rsp_error = rsp_error_reg; +assign rsp_valid = rsp_valid_reg; + +logic [2**QN_W-1:0] queue_enable_reg = '0; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [3:0] queue_mem_size[2**QN_W] = '{default: '0}; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [DMA_ADDR_W-1:0] queue_mem_base_addr[2**QN_W] = '{default: '0}; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [PTR_W-1:0] queue_mem_prod[2**QN_W] = '{default: '0}; + +logic queue_mem_wr_en; +logic [QN_W-1:0] queue_mem_addr; + +wire queue_mem_rd_enable = queue_enable_reg[queue_mem_addr]; +wire [3:0] queue_mem_rd_size = queue_mem_size[queue_mem_addr]; +wire [DMA_ADDR_W-1:0] queue_mem_rd_base_addr = queue_mem_base_addr[queue_mem_addr]; +wire [PTR_W-1:0] queue_mem_rd_prod = queue_mem_prod[queue_mem_addr]; + +logic queue_mem_wr_enable; +logic [3:0] queue_mem_wr_size; +logic [DMA_ADDR_W-1:0] queue_mem_wr_base_addr; +logic [PTR_W-1:0] queue_mem_wr_prod; + +always_comb begin + s_axil_ctrl_awready_next = 1'b0; + s_axil_ctrl_wready_next = 1'b0; + s_axil_ctrl_bvalid_next = 1'b0; + + s_axil_ctrl_arready_next = 1'b0; + s_axil_ctrl_rdata_next = s_axil_ctrl_rdata_reg; + s_axil_ctrl_rvalid_next = 1'b0; + + s_apb_dp_ctrl_pready_next = 1'b0; + s_apb_dp_ctrl_prdata_next = s_apb_dp_ctrl_prdata_reg; + + req_ready_next = 1'b0; + rsp_qn_next = rsp_qn_reg; + rsp_dqn_next = rsp_dqn_reg; + rsp_addr_next = rsp_addr_reg; + rsp_phase_next = rsp_phase_reg; + rsp_error_next = rsp_error_reg; + rsp_valid_next = rsp_valid_reg && !rsp_ready; + + queue_mem_wr_en = 1'b0; + queue_mem_addr = '0; + + queue_mem_wr_enable = queue_mem_rd_enable; + queue_mem_wr_size = queue_mem_rd_size; + queue_mem_wr_base_addr = queue_mem_rd_base_addr; + queue_mem_wr_prod = queue_mem_rd_prod; + + // terminate AXI lite writes + if (s_axil_ctrl_wr.awvalid && s_axil_ctrl_wr.wvalid && !s_axil_ctrl_bvalid_reg) begin + s_axil_ctrl_awready_next = 1'b1; + s_axil_ctrl_wready_next = 1'b1; + s_axil_ctrl_bvalid_next = 1'b1; + end + + // terminate AXI lite reads + if (s_axil_ctrl_rd.arvalid && !s_axil_ctrl_rvalid_reg) begin + s_axil_ctrl_rdata_next = '0; + + s_axil_ctrl_arready_next = 1'b1; + s_axil_ctrl_rvalid_next = 1'b1; + end + + if (s_apb_dp_ctrl.penable && s_apb_dp_ctrl.psel && !s_apb_dp_ctrl_pready_reg) begin + // APB read/write + s_apb_dp_ctrl_pready_next = 1'b1; + s_apb_dp_ctrl_prdata_next = '0; + + queue_mem_addr = s_apb_dp_ctrl_paddr_queue_index; + + if (s_apb_dp_ctrl.pwrite) begin + queue_mem_wr_en = 1'b1; + + case (s_apb_dp_ctrl_paddr_reg_index) + 3'd0: begin + queue_mem_wr_enable = s_apb_dp_ctrl.pwdata[0]; + queue_mem_wr_size = s_apb_dp_ctrl.pwdata[19:16]; + end + 3'd1: queue_mem_wr_prod = s_apb_dp_ctrl.pwdata[15:0]; + 3'd2: queue_mem_wr_base_addr[31:0] = s_apb_dp_ctrl.pwdata; + 3'd3: queue_mem_wr_base_addr[63:32] = s_apb_dp_ctrl.pwdata; + default: begin end + endcase + end + + case (s_apb_dp_ctrl_paddr_reg_index) + 3'd0: begin + s_apb_dp_ctrl_prdata_next[0] = queue_mem_rd_enable; + s_apb_dp_ctrl_prdata_next[19:16] = queue_mem_rd_size; + end + 3'd1: s_apb_dp_ctrl_prdata_next[15:0] = queue_mem_rd_prod; + 3'd2: s_apb_dp_ctrl_prdata_next = queue_mem_rd_base_addr[31:0]; + 3'd3: s_apb_dp_ctrl_prdata_next = queue_mem_rd_base_addr[63:32]; + default: begin end + endcase + + end else if (req_valid && !req_ready && (!rsp_valid || rsp_ready)) begin + // completion enqueue request + req_ready_next = 1'b1; + + queue_mem_addr = req_qn; + + rsp_qn_next = req_qn; + rsp_dqn_next = '0; // TODO + rsp_addr_next = queue_mem_rd_base_addr + DMA_ADDR_W'(16'(queue_mem_rd_prod & ({16{1'b1}} >> (16 - queue_mem_rd_size))) * CPL_SIZE); + rsp_phase_next = !queue_mem_rd_prod[queue_mem_rd_size]; + rsp_error_next = !queue_mem_rd_enable; + rsp_valid_next = 1'b1; + + queue_mem_wr_prod = queue_mem_rd_prod + 1; + + if (queue_mem_rd_enable) begin + queue_mem_wr_en = 1'b1; + end + end +end + +always @(posedge clk) begin + s_axil_ctrl_awready_reg <= s_axil_ctrl_awready_next; + s_axil_ctrl_wready_reg <= s_axil_ctrl_wready_next; + s_axil_ctrl_bvalid_reg <= s_axil_ctrl_bvalid_next; + + s_axil_ctrl_arready_reg <= s_axil_ctrl_arready_next; + s_axil_ctrl_rdata_reg <= s_axil_ctrl_rdata_next; + s_axil_ctrl_rvalid_reg <= s_axil_ctrl_rvalid_next; + + s_apb_dp_ctrl_pready_reg <= s_apb_dp_ctrl_pready_next; + s_apb_dp_ctrl_prdata_reg <= s_apb_dp_ctrl_prdata_next; + + req_ready_reg <= req_ready_next; + rsp_qn_reg <= rsp_qn_next; + rsp_dqn_reg <= rsp_dqn_next; + rsp_addr_reg <= rsp_addr_next; + rsp_phase_reg <= rsp_phase_next; + rsp_error_reg <= rsp_error_next; + rsp_valid_reg <= rsp_valid_next; + + if (queue_mem_wr_en) begin + queue_enable_reg[queue_mem_addr] <= queue_mem_wr_enable; + queue_mem_size[queue_mem_addr] <= queue_mem_wr_size; + queue_mem_base_addr[queue_mem_addr] <= queue_mem_wr_base_addr; + queue_mem_prod[queue_mem_addr] <= queue_mem_wr_prod; + end + + if (rst) begin + s_axil_ctrl_awready_reg <= 1'b0; + s_axil_ctrl_wready_reg <= 1'b0; + s_axil_ctrl_bvalid_reg <= 1'b0; + + s_axil_ctrl_arready_reg <= 1'b0; + s_axil_ctrl_rvalid_reg <= 1'b0; + + s_apb_dp_ctrl_pready_reg <= 1'b0; + + req_ready_reg <= 1'b0; + rsp_valid_reg <= 1'b0; + + queue_enable_reg <= '0; + end +end + +endmodule + +`resetall