cndm: Rework queue notification mechanism to eliminate ream race

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2026-03-09 21:41:36 -07:00
parent c37d967de9
commit 2ea3c204de
3 changed files with 140 additions and 47 deletions

View File

@@ -61,11 +61,14 @@ logic cq_req_ready;
logic [IRQN_W-1:0] cq_rsp_irqn; logic [IRQN_W-1:0] cq_rsp_irqn;
logic [DMA_ADDR_W-1:0] cq_rsp_addr; logic [DMA_ADDR_W-1:0] cq_rsp_addr;
logic cq_rsp_phase_tag; logic cq_rsp_phase_tag;
logic cq_rsp_arm;
logic cq_rsp_error; logic cq_rsp_error;
logic cq_rsp_valid; logic cq_rsp_valid;
logic cq_rsp_ready_reg = 1'b0; logic cq_rsp_ready_reg = 1'b0;
logic [CQN_W-1:0] notify_req_qn_reg = '0;
logic notify_req_valid_reg = 1'b0;
logic notify_req_ready;
cndm_micro_queue_state #( cndm_micro_queue_state #(
.QN_W(CQN_W), .QN_W(CQN_W),
.DQN_W(IRQN_W), .DQN_W(IRQN_W),
@@ -100,10 +103,21 @@ cq_mgr_inst (
.rsp_dqn(cq_rsp_irqn), .rsp_dqn(cq_rsp_irqn),
.rsp_addr(cq_rsp_addr), .rsp_addr(cq_rsp_addr),
.rsp_phase_tag(cq_rsp_phase_tag), .rsp_phase_tag(cq_rsp_phase_tag),
.rsp_arm(cq_rsp_arm),
.rsp_error(cq_rsp_error), .rsp_error(cq_rsp_error),
.rsp_valid(cq_rsp_valid), .rsp_valid(cq_rsp_valid),
.rsp_ready(cq_rsp_ready_reg) .rsp_ready(cq_rsp_ready_reg),
/*
* Notification interface
*/
.notify_req_qn(notify_req_qn_reg),
.notify_req_valid(notify_req_valid_reg),
.notify_req_ready(notify_req_ready),
/*
* Interrupts
*/
.m_axis_irq(m_axis_irq)
); );
typedef enum logic [1:0] { typedef enum logic [1:0] {
@@ -115,19 +129,6 @@ typedef enum logic [1:0] {
state_t state_reg = STATE_IDLE; state_t state_reg = STATE_IDLE;
logic phase_tag_reg = 1'b0; logic phase_tag_reg = 1'b0;
logic arm_reg = 1'b0;
logic [IRQN_W-1:0] m_axis_irq_irqn_reg = '0;
logic m_axis_irq_tvalid_reg = 1'b0;
assign m_axis_irq.tdata = m_axis_irq_irqn_reg;
assign m_axis_irq.tkeep = '1;
assign m_axis_irq.tstrb = m_axis_irq.tkeep;
assign m_axis_irq.tvalid = m_axis_irq_tvalid_reg;
assign m_axis_irq.tlast = 1'b1;
assign m_axis_irq.tid = '0;
assign m_axis_irq.tdest = '0;
assign m_axis_irq.tuser = '0;
always_ff @(posedge clk) begin always_ff @(posedge clk) begin
s_axis_cpl.tready <= 1'b0; s_axis_cpl.tready <= 1'b0;
@@ -148,7 +149,7 @@ always_ff @(posedge clk) begin
cq_req_valid_reg <= cq_req_valid_reg && !cq_req_ready; cq_req_valid_reg <= cq_req_valid_reg && !cq_req_ready;
cq_rsp_ready_reg <= 1'b0; cq_rsp_ready_reg <= 1'b0;
m_axis_irq_tvalid_reg <= m_axis_irq_tvalid_reg && !m_axis_irq.tready; notify_req_valid_reg <= notify_req_valid_reg && !notify_req_ready;
case (state_reg) case (state_reg)
STATE_IDLE: begin STATE_IDLE: begin
@@ -156,8 +157,9 @@ always_ff @(posedge clk) begin
cq_req_cqn_reg <= s_axis_cpl.tdest; cq_req_cqn_reg <= s_axis_cpl.tdest;
if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin if (s_axis_cpl.tvalid && !s_axis_cpl.tready && (!notify_req_valid_reg || notify_req_ready)) begin
cq_req_valid_reg <= 1'b1; cq_req_valid_reg <= 1'b1;
notify_req_qn_reg <= s_axis_cpl.tdest;
state_reg <= STATE_QUERY_CQ; state_reg <= STATE_QUERY_CQ;
end else begin end else begin
state_reg <= STATE_IDLE; state_reg <= STATE_IDLE;
@@ -170,10 +172,8 @@ always_ff @(posedge clk) begin
if (cq_rsp_valid && cq_rsp_ready_reg) begin if (cq_rsp_valid && cq_rsp_ready_reg) begin
cq_rsp_ready_reg <= 1'b0; cq_rsp_ready_reg <= 1'b0;
m_axis_irq_irqn_reg <= cq_rsp_irqn;
dma_wr_desc_req.req_dst_addr <= cq_rsp_addr; dma_wr_desc_req.req_dst_addr <= cq_rsp_addr;
phase_tag_reg <= cq_rsp_phase_tag; phase_tag_reg <= cq_rsp_phase_tag;
arm_reg <= cq_rsp_arm;
if (cq_rsp_error) begin if (cq_rsp_error) begin
// drop completion // drop completion
@@ -188,7 +188,7 @@ always_ff @(posedge clk) begin
STATE_WRITE_DATA: begin STATE_WRITE_DATA: begin
if (dma_wr_desc_sts.sts_valid) begin if (dma_wr_desc_sts.sts_valid) begin
s_axis_cpl.tready <= 1'b1; s_axis_cpl.tready <= 1'b1;
m_axis_irq_tvalid_reg <= arm_reg; // only generate interrupt when armed notify_req_valid_reg <= 1'b1;
state_reg <= STATE_IDLE; state_reg <= STATE_IDLE;
end end
end end
@@ -201,7 +201,6 @@ always_ff @(posedge clk) begin
state_reg <= STATE_IDLE; state_reg <= STATE_IDLE;
cq_req_valid_reg <= 1'b0; cq_req_valid_reg <= 1'b0;
cq_rsp_ready_reg <= 1'b0; cq_rsp_ready_reg <= 1'b0;
m_axis_irq_tvalid_reg <= 1'b0;
end end
end end

View File

@@ -66,6 +66,8 @@ logic wq_rsp_error;
logic wq_rsp_valid; logic wq_rsp_valid;
logic wq_rsp_ready_reg = 1'b0; logic wq_rsp_ready_reg = 1'b0;
taxi_axis_if axis_irq_stub();
cndm_micro_queue_state #( cndm_micro_queue_state #(
.QN_W(WQN_W), .QN_W(WQN_W),
.DQN_W(CQN_W), .DQN_W(CQN_W),
@@ -100,10 +102,21 @@ wq_mgr_inst (
.rsp_dqn(wq_rsp_cqn), .rsp_dqn(wq_rsp_cqn),
.rsp_addr(wq_rsp_addr), .rsp_addr(wq_rsp_addr),
.rsp_phase_tag(), .rsp_phase_tag(),
.rsp_arm(),
.rsp_error(wq_rsp_error), .rsp_error(wq_rsp_error),
.rsp_valid(wq_rsp_valid), .rsp_valid(wq_rsp_valid),
.rsp_ready(wq_rsp_ready_reg) .rsp_ready(wq_rsp_ready_reg),
/*
* Notification interface
*/
.notify_req_qn('0),
.notify_req_valid(1'b0),
.notify_req_ready(),
/*
* Interrupts
*/
.m_axis_irq(axis_irq_stub)
); );
taxi_dma_desc_if #( taxi_dma_desc_if #(

View File

@@ -49,10 +49,21 @@ module cndm_micro_queue_state #(
output wire logic [DQN_W-1:0] rsp_dqn, output wire logic [DQN_W-1:0] rsp_dqn,
output wire logic [DMA_ADDR_W-1:0] rsp_addr, output wire logic [DMA_ADDR_W-1:0] rsp_addr,
output wire logic rsp_phase_tag, output wire logic rsp_phase_tag,
output wire logic rsp_arm,
output wire logic rsp_error, output wire logic rsp_error,
output wire logic rsp_valid, output wire logic rsp_valid,
input wire logic rsp_ready input wire logic rsp_ready,
/*
* Notification interface
*/
input wire logic [QN_W-1:0] notify_req_qn,
input wire logic notify_req_valid,
output wire logic notify_req_ready,
/*
* Interrupts
*/
taxi_axis_if.src m_axis_irq
); );
localparam PTR_W = 16; localparam PTR_W = 16;
@@ -65,6 +76,8 @@ localparam AXIL_DATA_W = s_axil_ctrl_wr.DATA_W;
localparam APB_ADDR_W = s_apb_dp_ctrl.ADDR_W; localparam APB_ADDR_W = s_apb_dp_ctrl.ADDR_W;
localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W; localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W;
localparam IRQN_W = m_axis_irq.DATA_W;
// check configuration // check configuration
if (s_axil_ctrl_rd.DATA_W != 32 || s_axil_ctrl_wr.DATA_W != 32) if (s_axil_ctrl_rd.DATA_W != 32 || s_axil_ctrl_wr.DATA_W != 32)
$fatal(0, "Error: AXI data width must be 32 (instance %m)"); $fatal(0, "Error: AXI data width must be 32 (instance %m)");
@@ -120,7 +133,6 @@ logic [QN_W-1:0] rsp_qn_reg = '0, rsp_qn_next;
logic [DQN_W-1:0] rsp_dqn_reg = '0, rsp_dqn_next; logic [DQN_W-1:0] rsp_dqn_reg = '0, rsp_dqn_next;
logic [DMA_ADDR_W-1:0] rsp_addr_reg = '0, rsp_addr_next; logic [DMA_ADDR_W-1:0] rsp_addr_reg = '0, rsp_addr_next;
logic rsp_phase_tag_reg = 1'b0, rsp_phase_tag_next; logic rsp_phase_tag_reg = 1'b0, rsp_phase_tag_next;
logic rsp_arm_reg = 1'b0, rsp_arm_next;
logic rsp_error_reg = 1'b0, rsp_error_next; logic rsp_error_reg = 1'b0, rsp_error_next;
logic rsp_valid_reg = 1'b0, rsp_valid_next; logic rsp_valid_reg = 1'b0, rsp_valid_next;
@@ -129,14 +141,31 @@ assign rsp_qn = rsp_qn_reg;
assign rsp_dqn = rsp_dqn_reg; assign rsp_dqn = rsp_dqn_reg;
assign rsp_addr = rsp_addr_reg; assign rsp_addr = rsp_addr_reg;
assign rsp_phase_tag = rsp_phase_tag_reg; assign rsp_phase_tag = rsp_phase_tag_reg;
assign rsp_arm = IS_CQ ? rsp_arm_reg : 1'b0;
assign rsp_error = rsp_error_reg; assign rsp_error = rsp_error_reg;
assign rsp_valid = rsp_valid_reg; assign rsp_valid = rsp_valid_reg;
logic notify_req_ready_reg = 1'b0, notify_req_ready_next;
assign notify_req_ready = notify_req_ready_reg;
logic [IRQN_W-1:0] m_axis_irq_irqn_reg = '0, m_axis_irq_irqn_next;
logic m_axis_irq_tvalid_reg = 1'b0, m_axis_irq_tvalid_next;
assign m_axis_irq.tdata = m_axis_irq_irqn_reg;
assign m_axis_irq.tkeep = '1;
assign m_axis_irq.tstrb = m_axis_irq.tkeep;
assign m_axis_irq.tvalid = m_axis_irq_tvalid_reg;
assign m_axis_irq.tlast = 1'b1;
assign m_axis_irq.tid = '0;
assign m_axis_irq.tdest = '0;
assign m_axis_irq.tuser = '0;
logic [2**QN_W-1:0] queue_enable_reg = '0; logic [2**QN_W-1:0] queue_enable_reg = '0;
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic queue_mem_arm[2**QN_W] = '{default: '0}; logic queue_mem_arm[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic queue_mem_fire[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [2:0] queue_mem_qtype[2**QN_W] = '{default: '0}; logic [2:0] queue_mem_qtype[2**QN_W] = '{default: '0};
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
logic [DQN_W-1:0] queue_mem_dqn[2**QN_W] = '{default: '0}; logic [DQN_W-1:0] queue_mem_dqn[2**QN_W] = '{default: '0};
@@ -154,6 +183,7 @@ logic [QN_W-1:0] queue_mem_addr;
wire queue_mem_rd_enable = queue_enable_reg[queue_mem_addr]; wire queue_mem_rd_enable = queue_enable_reg[queue_mem_addr];
wire queue_mem_rd_arm = queue_mem_arm[queue_mem_addr]; wire queue_mem_rd_arm = queue_mem_arm[queue_mem_addr];
wire queue_mem_rd_fire = queue_mem_fire[queue_mem_addr];
wire [2:0] queue_mem_rd_qtype = queue_mem_qtype[queue_mem_addr]; wire [2:0] queue_mem_rd_qtype = queue_mem_qtype[queue_mem_addr];
wire [DQN_W-1:0] queue_mem_rd_dqn = queue_mem_dqn[queue_mem_addr]; wire [DQN_W-1:0] queue_mem_rd_dqn = queue_mem_dqn[queue_mem_addr];
wire [3:0] queue_mem_rd_log_size = queue_mem_log_size[queue_mem_addr]; wire [3:0] queue_mem_rd_log_size = queue_mem_log_size[queue_mem_addr];
@@ -166,6 +196,7 @@ wire queue_mem_rd_status_full = ($unsigned(queue_mem_rd_prod_ptr - queue_mem_rd_
logic queue_mem_wr_enable; logic queue_mem_wr_enable;
logic queue_mem_wr_arm; logic queue_mem_wr_arm;
logic queue_mem_wr_fire;
logic [2:0] queue_mem_wr_qtype; logic [2:0] queue_mem_wr_qtype;
logic [DQN_W-1:0] queue_mem_wr_dqn; logic [DQN_W-1:0] queue_mem_wr_dqn;
logic [3:0] queue_mem_wr_log_size; logic [3:0] queue_mem_wr_log_size;
@@ -173,6 +204,8 @@ logic [DMA_ADDR_W-1:0] queue_mem_wr_base_addr;
logic [PTR_W-1:0] queue_mem_wr_prod_ptr; logic [PTR_W-1:0] queue_mem_wr_prod_ptr;
logic [PTR_W-1:0] queue_mem_wr_cons_ptr; logic [PTR_W-1:0] queue_mem_wr_cons_ptr;
logic [QN_W-1:0] scrub_ptr_reg = '0, scrub_ptr_next;
always_comb begin always_comb begin
s_axil_ctrl_awready_next = 1'b0; s_axil_ctrl_awready_next = 1'b0;
s_axil_ctrl_wready_next = 1'b0; s_axil_ctrl_wready_next = 1'b0;
@@ -190,15 +223,20 @@ always_comb begin
rsp_dqn_next = rsp_dqn_reg; rsp_dqn_next = rsp_dqn_reg;
rsp_addr_next = rsp_addr_reg; rsp_addr_next = rsp_addr_reg;
rsp_phase_tag_next = rsp_phase_tag_reg; rsp_phase_tag_next = rsp_phase_tag_reg;
rsp_arm_next = rsp_arm_reg;
rsp_error_next = rsp_error_reg; rsp_error_next = rsp_error_reg;
rsp_valid_next = rsp_valid_reg && !rsp_ready; rsp_valid_next = rsp_valid_reg && !rsp_ready;
notify_req_ready_next = 1'b0;
m_axis_irq_irqn_next = m_axis_irq_irqn_reg;
m_axis_irq_tvalid_next = m_axis_irq_tvalid_reg && !m_axis_irq.tready;
queue_mem_wr_en = 1'b0; queue_mem_wr_en = 1'b0;
queue_mem_addr = '0; queue_mem_addr = '0;
queue_mem_wr_enable = queue_mem_rd_enable; queue_mem_wr_enable = queue_mem_rd_enable;
queue_mem_wr_arm = queue_mem_rd_arm; queue_mem_wr_arm = queue_mem_rd_arm;
queue_mem_wr_fire = queue_mem_rd_fire;
queue_mem_wr_qtype = queue_mem_rd_qtype; queue_mem_wr_qtype = queue_mem_rd_qtype;
queue_mem_wr_dqn = queue_mem_rd_dqn; queue_mem_wr_dqn = queue_mem_rd_dqn;
queue_mem_wr_log_size = queue_mem_rd_log_size; queue_mem_wr_log_size = queue_mem_rd_log_size;
@@ -206,6 +244,8 @@ always_comb begin
queue_mem_wr_prod_ptr = queue_mem_rd_prod_ptr; queue_mem_wr_prod_ptr = queue_mem_rd_prod_ptr;
queue_mem_wr_cons_ptr = queue_mem_rd_cons_ptr; queue_mem_wr_cons_ptr = queue_mem_rd_cons_ptr;
scrub_ptr_next = scrub_ptr_reg;
// terminate AXI lite reads // terminate AXI lite reads
if (s_axil_ctrl_rd.arvalid && !s_axil_ctrl_rvalid_reg) begin if (s_axil_ctrl_rd.arvalid && !s_axil_ctrl_rvalid_reg) begin
s_axil_ctrl_rdata_next = '0; s_axil_ctrl_rdata_next = '0;
@@ -256,12 +296,15 @@ always_comb begin
queue_mem_wr_arm = s_apb_dp_ctrl.pwdata[1]; queue_mem_wr_arm = s_apb_dp_ctrl.pwdata[1];
queue_mem_wr_log_size = s_apb_dp_ctrl.pwdata[19:16]; queue_mem_wr_log_size = s_apb_dp_ctrl.pwdata[19:16];
queue_mem_wr_qtype = 3'(s_apb_dp_ctrl.pwdata[23:20]); queue_mem_wr_qtype = 3'(s_apb_dp_ctrl.pwdata[23:20]);
queue_mem_wr_fire = 1'b0;
end end
3'd1: queue_mem_wr_dqn = s_apb_dp_ctrl.pwdata[DQN_W-1:0]; 3'd1: queue_mem_wr_dqn = s_apb_dp_ctrl.pwdata[DQN_W-1:0];
3'd2: queue_mem_wr_prod_ptr = s_apb_dp_ctrl.pwdata[15:0]; 3'd2: queue_mem_wr_prod_ptr = s_apb_dp_ctrl.pwdata[15:0];
3'd3: begin 3'd3: begin
queue_mem_wr_cons_ptr = s_apb_dp_ctrl.pwdata[15:0]; queue_mem_wr_cons_ptr = s_apb_dp_ctrl.pwdata[15:0];
if (s_apb_dp_ctrl.pwdata[31]) begin if (s_apb_dp_ctrl.pwdata[31]) begin
// rearm
queue_mem_wr_arm = 1'b1; queue_mem_wr_arm = 1'b1;
end end
end end
@@ -286,14 +329,20 @@ always_comb begin
default: begin end default: begin end
endcase endcase
end else if (notify_req_valid && !notify_req_ready) begin
// notify request
notify_req_ready_next = 1'b1;
queue_mem_addr = notify_req_qn;
queue_mem_wr_fire = 1'b1;
queue_mem_wr_en = 1'b1;
end else if (req_valid && !req_ready && (!rsp_valid || rsp_ready)) begin end else if (req_valid && !req_ready && (!rsp_valid || rsp_ready)) begin
// completion enqueue request // completion enqueue request
req_ready_next = 1'b1; req_ready_next = 1'b1;
queue_mem_addr = req_qn; queue_mem_addr = req_qn;
queue_mem_wr_arm = 1'b0;
rsp_arm_next = queue_mem_rd_arm;
rsp_qn_next = req_qn; rsp_qn_next = req_qn;
rsp_dqn_next = queue_mem_rd_dqn; rsp_dqn_next = queue_mem_rd_dqn;
rsp_error_next = !queue_mem_rd_enable || (QTYPE_EN && req_qtype != queue_mem_rd_qtype); rsp_error_next = !queue_mem_rd_enable || (QTYPE_EN && req_qtype != queue_mem_rd_qtype);
@@ -314,6 +363,25 @@ always_comb begin
if (!rsp_error_next) begin if (!rsp_error_next) begin
queue_mem_wr_en = 1'b1; queue_mem_wr_en = 1'b1;
end end
end else begin
// scrub
queue_mem_addr = scrub_ptr_reg;
if (IS_CQ && queue_mem_rd_enable && queue_mem_rd_arm && queue_mem_rd_fire) begin
if (!m_axis_irq_tvalid_reg || m_axis_irq.tready) begin
// fire in the hole
m_axis_irq_irqn_next = IRQN_W'(queue_mem_rd_dqn);
m_axis_irq_tvalid_next = 1'b1;
queue_mem_wr_arm = 1'b0;
queue_mem_wr_fire = 1'b0;
queue_mem_wr_en = 1'b1;
end
end
scrub_ptr_next = scrub_ptr_reg + 1;
end end
end end
@@ -334,13 +402,20 @@ always @(posedge clk) begin
rsp_dqn_reg <= rsp_dqn_next; rsp_dqn_reg <= rsp_dqn_next;
rsp_addr_reg <= rsp_addr_next; rsp_addr_reg <= rsp_addr_next;
rsp_phase_tag_reg <= rsp_phase_tag_next; rsp_phase_tag_reg <= rsp_phase_tag_next;
rsp_arm_reg <= rsp_arm_next;
rsp_error_reg <= rsp_error_next; rsp_error_reg <= rsp_error_next;
rsp_valid_reg <= rsp_valid_next; rsp_valid_reg <= rsp_valid_next;
notify_req_ready_reg <= notify_req_ready_next;
m_axis_irq_irqn_reg <= m_axis_irq_irqn_next;
m_axis_irq_tvalid_reg <= m_axis_irq_tvalid_next;
scrub_ptr_reg <= scrub_ptr_next;
if (queue_mem_wr_en) begin if (queue_mem_wr_en) begin
queue_enable_reg[queue_mem_addr] <= queue_mem_wr_enable; queue_enable_reg[queue_mem_addr] <= queue_mem_wr_enable;
queue_mem_arm[queue_mem_addr] <= queue_mem_wr_arm; queue_mem_arm[queue_mem_addr] <= queue_mem_wr_arm;
queue_mem_fire[queue_mem_addr] <= queue_mem_wr_fire;
queue_mem_qtype[queue_mem_addr] <= queue_mem_wr_qtype; queue_mem_qtype[queue_mem_addr] <= queue_mem_wr_qtype;
queue_mem_dqn[queue_mem_addr] <= queue_mem_wr_dqn; queue_mem_dqn[queue_mem_addr] <= queue_mem_wr_dqn;
queue_mem_log_size[queue_mem_addr] <= queue_mem_wr_log_size; queue_mem_log_size[queue_mem_addr] <= queue_mem_wr_log_size;
@@ -362,6 +437,12 @@ always @(posedge clk) begin
req_ready_reg <= 1'b0; req_ready_reg <= 1'b0;
rsp_valid_reg <= 1'b0; rsp_valid_reg <= 1'b0;
notify_req_ready_reg <= 1'b0;
m_axis_irq_tvalid_reg <= 1'b0;
scrub_ptr_reg <= '0;
queue_enable_reg <= '0; queue_enable_reg <= '0;
end end
end end