From c7279a1ea2e6eae9515635d9be75fc028f3aa92c Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 10 Mar 2026 00:57:20 -0700 Subject: [PATCH] cndm: Add support for event queues Signed-off-by: Alex Forencich --- src/cndm/rtl/cndm_micro_cpl_wr.sv | 66 ++++++++++++---- src/cndm/rtl/cndm_micro_desc_rd.sv | 10 ++- src/cndm/rtl/cndm_micro_port.sv | 27 ++++++- src/cndm/rtl/cndm_micro_queue_state.sv | 104 +++++++++++++++++++++---- 4 files changed, 174 insertions(+), 33 deletions(-) diff --git a/src/cndm/rtl/cndm_micro_cpl_wr.sv b/src/cndm/rtl/cndm_micro_cpl_wr.sv index ed5ac85..bae04c9 100644 --- a/src/cndm/rtl/cndm_micro_cpl_wr.sv +++ b/src/cndm/rtl/cndm_micro_cpl_wr.sv @@ -16,7 +16,10 @@ Authors: * Corundum-micro completion write module */ module cndm_micro_cpl_wr #( - parameter CQN_W = 5 + parameter CQN_W = 5, + parameter logic IS_CQ = 1'b1, + parameter logic IS_EQ = 1'b0, + parameter logic CQ_IRQ = IS_CQ ) ( input wire logic clk, @@ -48,17 +51,34 @@ module cndm_micro_cpl_wr #( /* * Completion input */ - taxi_axis_if.snk s_axis_cpl + taxi_axis_if.snk s_axis_cpl, + + /* + * Event input + */ + taxi_axis_if.snk s_axis_event, + + /* + * Event output + */ + taxi_axis_if.src m_axis_event ); localparam DMA_ADDR_W = dma_wr_desc_req.DST_ADDR_W; localparam IRQN_W = m_axis_irq.DATA_W; +typedef enum logic [2:0] { + QTYPE_EQ, + QTYPE_CQ, + QTYPE_SQ, + QTYPE_RQ +} qtype_t; + logic [CQN_W-1:0] cq_req_cqn_reg = '0; +logic [2:0] cq_req_qtype_reg = '0; logic cq_req_valid_reg = 1'b0; logic cq_req_ready; -logic [IRQN_W-1:0] cq_rsp_irqn; logic [DMA_ADDR_W-1:0] cq_rsp_addr; logic cq_rsp_phase_tag; logic cq_rsp_error; @@ -72,8 +92,10 @@ logic notify_req_ready; cndm_micro_queue_state #( .QN_W(CQN_W), .DQN_W(IRQN_W), - .IS_CQ(1), - .QTYPE_EN(0), + .IS_CQ(IS_CQ || !IS_EQ), + .IS_EQ(IS_EQ), + .CQ_IRQ(CQ_IRQ), + .QTYPE_EN(IS_CQ && IS_EQ), .QE_SIZE(16), .DMA_ADDR_W(DMA_ADDR_W) ) @@ -96,11 +118,11 @@ cq_mgr_inst ( * Queue management interface */ .req_qn(cq_req_cqn_reg), - .req_qtype('0), + .req_qtype(cq_req_qtype_reg), .req_valid(cq_req_valid_reg), .req_ready(cq_req_ready), .rsp_qn(), - .rsp_dqn(cq_rsp_irqn), + .rsp_dqn(), .rsp_addr(cq_rsp_addr), .rsp_phase_tag(cq_rsp_phase_tag), .rsp_error(cq_rsp_error), @@ -117,7 +139,12 @@ cq_mgr_inst ( /* * Interrupts */ - .m_axis_irq(m_axis_irq) + .m_axis_irq(m_axis_irq), + + /* + * Event output + */ + .m_axis_event(m_axis_event) ); typedef enum logic [1:0] { @@ -130,8 +157,11 @@ state_t state_reg = STATE_IDLE; logic phase_tag_reg = 1'b0; +logic [127:0] data_reg = '0; + always_ff @(posedge clk) begin s_axis_cpl.tready <= 1'b0; + s_axis_event.tready <= 1'b0; dma_wr_desc_req.req_src_sel <= '0; dma_wr_desc_req.req_src_asid <= '0; @@ -155,11 +185,21 @@ always_ff @(posedge clk) begin STATE_IDLE: begin dma_wr_desc_req.req_src_addr <= '0; - cq_req_cqn_reg <= s_axis_cpl.tdest; - - if (s_axis_cpl.tvalid && !s_axis_cpl.tready && (!notify_req_valid_reg || notify_req_ready)) begin + if (IS_EQ && s_axis_event.tvalid && !s_axis_event.tready && (!notify_req_valid_reg || notify_req_ready)) begin + data_reg <= s_axis_event.tdata; + cq_req_cqn_reg <= s_axis_event.tdest; + cq_req_qtype_reg <= QTYPE_EQ; + cq_req_valid_reg <= 1'b1; + notify_req_qn_reg <= s_axis_event.tdest; + s_axis_event.tready <= 1'b1; + state_reg <= STATE_QUERY_CQ; + end else if ((IS_CQ || !IS_EQ) && s_axis_cpl.tvalid && !s_axis_cpl.tready && (!notify_req_valid_reg || notify_req_ready)) begin + data_reg <= s_axis_cpl.tdata; + cq_req_cqn_reg <= s_axis_cpl.tdest; + cq_req_qtype_reg <= QTYPE_CQ; cq_req_valid_reg <= 1'b1; notify_req_qn_reg <= s_axis_cpl.tdest; + s_axis_cpl.tready <= 1'b1; state_reg <= STATE_QUERY_CQ; end else begin state_reg <= STATE_IDLE; @@ -177,7 +217,6 @@ always_ff @(posedge clk) begin if (cq_rsp_error) begin // drop completion - s_axis_cpl.tready <= 1'b1; state_reg <= STATE_IDLE; end else begin dma_wr_desc_req.req_valid <= 1'b1; @@ -187,7 +226,6 @@ always_ff @(posedge clk) begin end STATE_WRITE_DATA: begin if (dma_wr_desc_sts.sts_valid) begin - s_axis_cpl.tready <= 1'b1; notify_req_valid_reg <= 1'b1; state_reg <= STATE_IDLE; end @@ -213,7 +251,7 @@ localparam SEG_BE_W = dma_ram_rd.SEG_BE_W; if (SEGS*SEG_DATA_W < 128) $fatal(0, "Total segmented interface width must be at least 128 (instance %m)"); -wire [SEGS-1:0][SEG_DATA_W-1:0] ram_data = (SEG_DATA_W*SEGS)'({phase_tag_reg, s_axis_cpl.tdata[126:0]}); +wire [SEGS-1:0][SEG_DATA_W-1:0] ram_data = (SEG_DATA_W*SEGS)'({phase_tag_reg, data_reg[126:0]}); for (genvar n = 0; n < SEGS; n = n + 1) begin diff --git a/src/cndm/rtl/cndm_micro_desc_rd.sv b/src/cndm/rtl/cndm_micro_desc_rd.sv index 0d59fdd..9b24565 100644 --- a/src/cndm/rtl/cndm_micro_desc_rd.sv +++ b/src/cndm/rtl/cndm_micro_desc_rd.sv @@ -67,11 +67,14 @@ logic wq_rsp_valid; logic wq_rsp_ready_reg = 1'b0; taxi_axis_if axis_irq_stub(); +taxi_axis_if axis_event_stub(); cndm_micro_queue_state #( .QN_W(WQN_W), .DQN_W(CQN_W), .IS_CQ(0), + .IS_EQ(0), + .CQ_IRQ(0), .QTYPE_EN(1), .QE_SIZE(16), .DMA_ADDR_W(DMA_ADDR_W) @@ -116,7 +119,12 @@ wq_mgr_inst ( /* * Interrupts */ - .m_axis_irq(axis_irq_stub) + .m_axis_irq(axis_irq_stub), + + /* + * Event output + */ + .m_axis_event(axis_event_stub) ); taxi_dma_desc_if #( diff --git a/src/cndm/rtl/cndm_micro_port.sv b/src/cndm/rtl/cndm_micro_port.sv index 4f7e827..988546c 100644 --- a/src/cndm/rtl/cndm_micro_port.sv +++ b/src/cndm/rtl/cndm_micro_port.sv @@ -483,8 +483,21 @@ cpl_mux_inst ( .m_axis(axis_cpl) ); +taxi_axis_if #( + .DATA_W(16*8), + .KEEP_EN(1), + .LAST_EN(1), + .ID_EN(0), + .DEST_EN(1), + .DEST_W(CQN_W), + .USER_EN(0) +) axis_event(); + cndm_micro_cpl_wr #( - .CQN_W(CQN_W) + .CQN_W(CQN_W), + .IS_CQ(1), + .IS_EQ(1), + .CQ_IRQ(1) ) cpl_wr_inst ( .clk(clk), @@ -516,7 +529,17 @@ cpl_wr_inst ( /* * Completion input */ - .s_axis_cpl(axis_cpl) + .s_axis_cpl(axis_cpl), + + /* + * Event input + */ + .s_axis_event(axis_event), + + /* + * Event output + */ + .m_axis_event(axis_event) ); // TX path diff --git a/src/cndm/rtl/cndm_micro_queue_state.sv b/src/cndm/rtl/cndm_micro_queue_state.sv index c1f3d4b..43c192d 100644 --- a/src/cndm/rtl/cndm_micro_queue_state.sv +++ b/src/cndm/rtl/cndm_micro_queue_state.sv @@ -19,7 +19,9 @@ module cndm_micro_queue_state #( parameter QN_W = 5, parameter DQN_W = 5, parameter logic IS_CQ = 1'b0, - parameter logic QTYPE_EN = !IS_CQ, + parameter logic IS_EQ = 1'b0, + parameter logic CQ_IRQ = IS_CQ, + parameter logic QTYPE_EN = 1'b1, parameter QE_SIZE = 16, parameter DMA_ADDR_W = 64 ) @@ -63,7 +65,12 @@ module cndm_micro_queue_state #( /* * Interrupts */ - taxi_axis_if.src m_axis_irq + taxi_axis_if.src m_axis_irq, + + /* + * Event output + */ + taxi_axis_if.src m_axis_event ); localparam PTR_W = 16; @@ -78,6 +85,10 @@ localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W; localparam IRQN_W = m_axis_irq.DATA_W; +localparam EQN_W = m_axis_event.DEST_W; +localparam EVENT_DATA_W = 64; +localparam EVENT_DEST_W = m_axis_event.DEST_W; + // check configuration if (s_axil_ctrl_rd.DATA_W != 32 || s_axil_ctrl_wr.DATA_W != 32) $fatal(0, "Error: AXI data width must be 32 (instance %m)"); @@ -91,6 +102,13 @@ if (s_apb_dp_ctrl.DATA_W != 32) if (s_apb_dp_ctrl.ADDR_W < ADDR_W) $fatal(0, "Error: APB address width is insufficient (instance %m)"); +typedef enum logic [2:0] { + QTYPE_EQ, + QTYPE_CQ, + QTYPE_SQ, + QTYPE_RQ +} qtype_t; + logic s_axil_ctrl_awready_reg = 1'b0, s_axil_ctrl_awready_next; logic s_axil_ctrl_wready_reg = 1'b0, s_axil_ctrl_wready_next; logic s_axil_ctrl_bvalid_reg = 1'b0, s_axil_ctrl_bvalid_next; @@ -160,12 +178,27 @@ assign m_axis_irq.tid = '0; assign m_axis_irq.tdest = '0; assign m_axis_irq.tuser = '0; +logic [EVENT_DATA_W-1:0] m_axis_event_tdata_reg = '0, m_axis_event_tdata_next; +logic [EQN_W-1:0] m_axis_event_tdest_reg = '0, m_axis_event_tdest_next; +logic m_axis_event_tvalid_reg = 1'b0, m_axis_event_tvalid_next; + +assign m_axis_event.tdata = m_axis_event.DATA_W'(m_axis_event_tdata_reg); +assign m_axis_event.tkeep = '1; +assign m_axis_event.tstrb = m_axis_event.tkeep; +assign m_axis_event.tvalid = m_axis_event_tvalid_reg; +assign m_axis_event.tlast = 1'b1; +assign m_axis_event.tid = '0; +assign m_axis_event.tdest = m_axis_event_tdest_reg; +assign m_axis_event.tuser = '0; + logic [2**QN_W-1:0] queue_enable_reg = '0; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) logic queue_mem_arm[2**QN_W] = '{default: '0}; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) logic queue_mem_fire[2**QN_W] = '{default: '0}; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic queue_mem_cq_irq[2**QN_W] = '{default: '0}; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) logic [2:0] queue_mem_qtype[2**QN_W] = '{default: '0}; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) logic [DQN_W-1:0] queue_mem_dqn[2**QN_W] = '{default: '0}; @@ -184,6 +217,7 @@ logic [QN_W-1:0] queue_mem_addr; wire queue_mem_rd_enable = queue_enable_reg[queue_mem_addr]; wire queue_mem_rd_arm = queue_mem_arm[queue_mem_addr]; wire queue_mem_rd_fire = queue_mem_fire[queue_mem_addr]; +wire queue_mem_rd_cq_irq = queue_mem_cq_irq[queue_mem_addr]; wire [2:0] queue_mem_rd_qtype = queue_mem_qtype[queue_mem_addr]; wire [DQN_W-1:0] queue_mem_rd_dqn = queue_mem_dqn[queue_mem_addr]; wire [3:0] queue_mem_rd_log_size = queue_mem_log_size[queue_mem_addr]; @@ -197,6 +231,7 @@ wire queue_mem_rd_status_full = ($unsigned(queue_mem_rd_prod_ptr - queue_mem_rd_ logic queue_mem_wr_enable; logic queue_mem_wr_arm; logic queue_mem_wr_fire; +logic queue_mem_wr_cq_irq; logic [2:0] queue_mem_wr_qtype; logic [DQN_W-1:0] queue_mem_wr_dqn; logic [3:0] queue_mem_wr_log_size; @@ -231,12 +266,17 @@ always_comb begin m_axis_irq_irqn_next = m_axis_irq_irqn_reg; m_axis_irq_tvalid_next = m_axis_irq_tvalid_reg && !m_axis_irq.tready; + m_axis_event_tdata_next = m_axis_event_tdata_reg; + m_axis_event_tdest_next = m_axis_event_tdest_reg; + m_axis_event_tvalid_next = m_axis_event_tvalid_reg && !m_axis_event.tready; + queue_mem_wr_en = 1'b0; queue_mem_addr = '0; queue_mem_wr_enable = queue_mem_rd_enable; queue_mem_wr_arm = queue_mem_rd_arm; queue_mem_wr_fire = queue_mem_rd_fire; + queue_mem_wr_cq_irq = queue_mem_rd_cq_irq; queue_mem_wr_qtype = queue_mem_rd_qtype; queue_mem_wr_dqn = queue_mem_rd_dqn; queue_mem_wr_log_size = queue_mem_rd_log_size; @@ -265,12 +305,12 @@ always_comb begin case (s_axil_ctrl_awaddr_reg_index) 3'd2: begin - if (!IS_CQ) begin + if (!IS_CQ && !IS_EQ) begin queue_mem_wr_prod_ptr = s_axil_ctrl_wr.wdata[15:0]; end end 3'd3: begin - if (IS_CQ) begin + if (IS_CQ || IS_EQ) begin queue_mem_wr_cons_ptr = s_axil_ctrl_wr.wdata[15:0]; if (s_axil_ctrl_wr.wdata[31]) begin queue_mem_wr_arm = 1'b1; @@ -299,7 +339,10 @@ always_comb begin queue_mem_wr_fire = 1'b0; end - 3'd1: queue_mem_wr_dqn = s_apb_dp_ctrl.pwdata[DQN_W-1:0]; + 3'd1: begin + queue_mem_wr_dqn = s_apb_dp_ctrl.pwdata[DQN_W-1:0]; + queue_mem_wr_cq_irq = s_apb_dp_ctrl.pwdata[31]; + end 3'd2: queue_mem_wr_prod_ptr = s_apb_dp_ctrl.pwdata[15:0]; 3'd3: begin queue_mem_wr_cons_ptr = s_apb_dp_ctrl.pwdata[15:0]; @@ -317,11 +360,14 @@ always_comb begin case (s_apb_dp_ctrl_paddr_reg_index) 3'd0: begin s_apb_dp_ctrl_prdata_next[0] = queue_mem_rd_enable; - s_apb_dp_ctrl_prdata_next[1] = IS_CQ ? queue_mem_rd_arm : 1'b0; + s_apb_dp_ctrl_prdata_next[1] = (IS_CQ || IS_EQ) ? queue_mem_rd_arm : 1'b0; s_apb_dp_ctrl_prdata_next[19:16] = queue_mem_rd_log_size; s_apb_dp_ctrl_prdata_next[23:20] = QTYPE_EN ? 4'(queue_mem_rd_qtype) : '0; end - 3'd1: s_apb_dp_ctrl_prdata_next = 32'(queue_mem_rd_dqn); + 3'd1: begin + s_apb_dp_ctrl_prdata_next[30:0] = 31'(queue_mem_rd_dqn); + s_apb_dp_ctrl_prdata_next[31] = (IS_CQ && CQ_IRQ) ? queue_mem_rd_cq_irq : 1'b0; + end 3'd2: s_apb_dp_ctrl_prdata_next = 32'(queue_mem_rd_prod_ptr); 3'd3: s_apb_dp_ctrl_prdata_next = 32'(queue_mem_rd_cons_ptr); 3'd6: s_apb_dp_ctrl_prdata_next = queue_mem_rd_base_addr[31:0]; @@ -346,7 +392,7 @@ always_comb begin rsp_qn_next = req_qn; rsp_dqn_next = queue_mem_rd_dqn; rsp_error_next = !queue_mem_rd_enable || (QTYPE_EN && req_qtype != queue_mem_rd_qtype); - if (IS_CQ) begin + if (IS_CQ || IS_EQ) begin rsp_addr_next = queue_mem_rd_base_addr + DMA_ADDR_W'(16'(queue_mem_rd_prod_ptr & ({16{1'b1}} >> (16 - queue_mem_rd_log_size))) * QE_SIZE); rsp_phase_tag_next = !queue_mem_rd_prod_ptr[queue_mem_rd_log_size]; if (queue_mem_rd_status_full) @@ -368,16 +414,35 @@ always_comb begin queue_mem_addr = scrub_ptr_reg; - if (IS_CQ && queue_mem_rd_enable && queue_mem_rd_arm && queue_mem_rd_fire) begin - if (!m_axis_irq_tvalid_reg || m_axis_irq.tready) begin - // fire in the hole + if ((IS_CQ || IS_EQ) && queue_mem_rd_enable && queue_mem_rd_arm && queue_mem_rd_fire) begin + if ((IS_CQ && !IS_EQ && (!CQ_IRQ || !queue_mem_rd_cq_irq)) || (QTYPE_EN && queue_mem_rd_qtype == QTYPE_CQ && (!CQ_IRQ || !queue_mem_rd_cq_irq))) begin + // event - only for CQ + if (!m_axis_event_tvalid_reg || m_axis_event.tready) begin + // fire in the hole - m_axis_irq_irqn_next = IRQN_W'(queue_mem_rd_dqn); - m_axis_irq_tvalid_next = 1'b1; + m_axis_event_tdata_next = '0; + m_axis_event_tdata_next[15:0] = '0; // rsvd + m_axis_event_tdata_next[31:16] = '0; // CPL + m_axis_event_tdata_next[63:32] = 32'(scrub_ptr_reg); // CQN + m_axis_event_tdest_next = EQN_W'(queue_mem_rd_dqn); + m_axis_event_tvalid_next = 1'b1; - queue_mem_wr_arm = 1'b0; - queue_mem_wr_fire = 1'b0; - queue_mem_wr_en = 1'b1; + queue_mem_wr_arm = 1'b0; + queue_mem_wr_fire = 1'b0; + queue_mem_wr_en = 1'b1; + end + end else if ((!IS_CQ && IS_EQ) || (IS_CQ && !IS_EQ && (CQ_IRQ && queue_mem_rd_cq_irq)) || (QTYPE_EN && (queue_mem_rd_qtype == QTYPE_EQ || (queue_mem_rd_qtype == QTYPE_CQ && (CQ_IRQ && queue_mem_rd_cq_irq))))) begin + // interrupt - EQ or CQ, but CQ requires config bit set to select interrupts + if (!m_axis_irq_tvalid_reg || m_axis_irq.tready) begin + // fire in the hole + + m_axis_irq_irqn_next = IRQN_W'(queue_mem_rd_dqn); + m_axis_irq_tvalid_next = 1'b1; + + queue_mem_wr_arm = 1'b0; + queue_mem_wr_fire = 1'b0; + queue_mem_wr_en = 1'b1; + end end end @@ -410,12 +475,17 @@ always @(posedge clk) begin m_axis_irq_irqn_reg <= m_axis_irq_irqn_next; m_axis_irq_tvalid_reg <= m_axis_irq_tvalid_next; + m_axis_event_tdata_reg <= m_axis_event_tdata_next; + m_axis_event_tdest_reg <= m_axis_event_tdest_next; + m_axis_event_tvalid_reg <= m_axis_event_tvalid_next; + scrub_ptr_reg <= scrub_ptr_next; if (queue_mem_wr_en) begin queue_enable_reg[queue_mem_addr] <= queue_mem_wr_enable; queue_mem_arm[queue_mem_addr] <= queue_mem_wr_arm; queue_mem_fire[queue_mem_addr] <= queue_mem_wr_fire; + queue_mem_cq_irq[queue_mem_addr] <= queue_mem_wr_cq_irq; queue_mem_qtype[queue_mem_addr] <= queue_mem_wr_qtype; queue_mem_dqn[queue_mem_addr] <= queue_mem_wr_dqn; queue_mem_log_size[queue_mem_addr] <= queue_mem_wr_log_size; @@ -441,6 +511,8 @@ always @(posedge clk) begin m_axis_irq_tvalid_reg <= 1'b0; + m_axis_event_tvalid_reg <= 1'b0; + scrub_ptr_reg <= '0; queue_enable_reg <= '0;