cndm: Peel off queue management logic, store queue state in distributed RAM

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich
2026-03-05 18:04:43 -08:00
parent 8f1c082174
commit 7dbe6df56a
7 changed files with 407 additions and 200 deletions

View File

@@ -1,7 +1,7 @@
// SPDX-License-Identifier: CERN-OHL-S-2.0
/*
Copyright (c) 2025 FPGA Ninja, LLC
Copyright (c) 2026 FPGA Ninja, LLC
Authors:
- Alex Forencich
@@ -15,7 +15,9 @@ Authors:
/*
* Corundum-micro completion write module
*/
module cndm_micro_cpl_wr
module cndm_micro_cpl_wr #(
parameter CQN_W = 5
)
(
input wire logic clk,
input wire logic rst,
@@ -42,168 +44,56 @@ module cndm_micro_cpl_wr
output wire logic irq
);
localparam AXIL_ADDR_W = s_axil_ctrl_wr.ADDR_W;
localparam AXIL_DATA_W = s_axil_ctrl_wr.DATA_W;
localparam DMA_ADDR_W = dma_wr_desc_req.DST_ADDR_W;
localparam APB_ADDR_W = s_apb_dp_ctrl.ADDR_W;
localparam APB_DATA_W = s_apb_dp_ctrl.DATA_W;
logic [CQN_W-1:0] cq_req_cqn_reg = '0;
logic cq_req_valid_reg = 1'b0;
logic cq_req_ready;
logic [DMA_ADDR_W-1:0] cq_rsp_addr;
logic cq_rsp_phase;
logic cq_rsp_error;
logic cq_rsp_valid;
logic cq_rsp_ready_reg = 1'b0;
logic txcq_en_reg = '0;
logic [3:0] txcq_size_reg = '0;
logic [63:0] txcq_base_addr_reg = '0;
logic rxcq_en_reg = '0;
logic [3:0] rxcq_size_reg = '0;
logic [63:0] rxcq_base_addr_reg = '0;
cndm_micro_queue_state #(
.QN_W(CQN_W),
.DQN_W(CQN_W), // TODO
.CPL_SIZE(16),
.DMA_ADDR_W(DMA_ADDR_W)
)
cq_mgr_inst (
.clk(clk),
.rst(rst),
logic [15:0] txcq_prod_ptr_reg = '0;
logic [15:0] rxcq_prod_ptr_reg = '0;
/*
* Control register interface
*/
.s_axil_ctrl_wr(s_axil_ctrl_wr),
.s_axil_ctrl_rd(s_axil_ctrl_rd),
logic s_axil_ctrl_awready_reg = 1'b0;
logic s_axil_ctrl_wready_reg = 1'b0;
logic s_axil_ctrl_bvalid_reg = 1'b0;
/*
* Datapath control register interface
*/
.s_apb_dp_ctrl(s_apb_dp_ctrl),
logic s_axil_ctrl_arready_reg = 1'b0;
logic [AXIL_DATA_W-1:0] s_axil_ctrl_rdata_reg = '0;
logic s_axil_ctrl_rvalid_reg = 1'b0;
assign s_axil_ctrl_wr.awready = s_axil_ctrl_awready_reg;
assign s_axil_ctrl_wr.wready = s_axil_ctrl_wready_reg;
assign s_axil_ctrl_wr.bresp = '0;
assign s_axil_ctrl_wr.buser = '0;
assign s_axil_ctrl_wr.bvalid = s_axil_ctrl_bvalid_reg;
assign s_axil_ctrl_rd.arready = s_axil_ctrl_arready_reg;
assign s_axil_ctrl_rd.rdata = s_axil_ctrl_rdata_reg;
assign s_axil_ctrl_rd.rresp = '0;
assign s_axil_ctrl_rd.ruser = '0;
assign s_axil_ctrl_rd.rvalid = s_axil_ctrl_rvalid_reg;
logic s_apb_dp_ctrl_pready_reg = 1'b0;
logic [AXIL_DATA_W-1:0] s_apb_dp_ctrl_prdata_reg = '0;
assign s_apb_dp_ctrl.pready = s_apb_dp_ctrl_pready_reg;
assign s_apb_dp_ctrl.prdata = s_apb_dp_ctrl_prdata_reg;
assign s_apb_dp_ctrl.pslverr = 1'b0;
assign s_apb_dp_ctrl.pruser = '0;
assign s_apb_dp_ctrl.pbuser = '0;
always_ff @(posedge clk) begin
s_axil_ctrl_awready_reg <= 1'b0;
s_axil_ctrl_wready_reg <= 1'b0;
s_axil_ctrl_bvalid_reg <= s_axil_ctrl_bvalid_reg && !s_axil_ctrl_wr.bready;
s_axil_ctrl_arready_reg <= 1'b0;
s_axil_ctrl_rvalid_reg <= s_axil_ctrl_rvalid_reg && !s_axil_ctrl_rd.rready;
s_apb_dp_ctrl_pready_reg <= 1'b0;
if (s_axil_ctrl_wr.awvalid && s_axil_ctrl_wr.wvalid && !s_axil_ctrl_bvalid_reg) begin
s_axil_ctrl_awready_reg <= 1'b1;
s_axil_ctrl_wready_reg <= 1'b1;
s_axil_ctrl_bvalid_reg <= 1'b1;
// case ({s_axil_ctrl_wr.awaddr[9:2], 2'b00})
// 10'h000: begin
// txcq_en_reg <= s_axil_ctrl_wr.wdata[0];
// txcq_size_reg <= s_axil_ctrl_wr.wdata[19:16];
// end
// 10'h008: txcq_base_addr_reg[31:0] <= s_axil_ctrl_wr.wdata;
// 10'h00c: txcq_base_addr_reg[63:32] <= s_axil_ctrl_wr.wdata;
// 10'h100: begin
// rxcq_en_reg <= s_axil_ctrl_wr.wdata[0];
// rxcq_size_reg <= s_axil_ctrl_wr.wdata[19:16];
// end
// 10'h108: rxcq_base_addr_reg[31:0] <= s_axil_ctrl_wr.wdata;
// 10'h10c: rxcq_base_addr_reg[63:32] <= s_axil_ctrl_wr.wdata;
// default: begin end
// endcase
end
if (s_axil_ctrl_rd.arvalid && !s_axil_ctrl_rvalid_reg) begin
s_axil_ctrl_rdata_reg <= '0;
s_axil_ctrl_arready_reg <= 1'b1;
s_axil_ctrl_rvalid_reg <= 1'b1;
// case ({s_axil_ctrl_rd.araddr[9:2], 2'b00})
// 10'h000: begin
// s_axil_ctrl_rdata_reg[0] <= txcq_en_reg;
// s_axil_ctrl_rdata_reg[19:16] <= txcq_size_reg;
// end
// 10'h004: s_axil_ctrl_rdata_reg[15:0] <= txcq_prod_ptr_reg;
// 10'h008: s_axil_ctrl_rdata_reg <= txcq_base_addr_reg[31:0];
// 10'h00c: s_axil_ctrl_rdata_reg <= txcq_base_addr_reg[63:32];
// 10'h100: begin
// s_axil_ctrl_rdata_reg[0] <= rxcq_en_reg;
// s_axil_ctrl_rdata_reg[19:16] <= rxcq_size_reg;
// end
// 10'h104: s_axil_ctrl_rdata_reg[15:0] <= rxcq_prod_ptr_reg;
// 10'h108: s_axil_ctrl_rdata_reg <= rxcq_base_addr_reg[31:0];
// 10'h10c: s_axil_ctrl_rdata_reg <= rxcq_base_addr_reg[63:32];
// default: begin end
// endcase
end
if (s_apb_dp_ctrl.penable && s_apb_dp_ctrl.psel && !s_apb_dp_ctrl_pready_reg) begin
s_apb_dp_ctrl_pready_reg <= 1'b1;
s_apb_dp_ctrl_prdata_reg <= '0;
if (s_apb_dp_ctrl.pwrite) begin
case ({s_apb_dp_ctrl.paddr[9:2], 2'b00})
10'h000: begin
txcq_en_reg <= s_apb_dp_ctrl.pwdata[0];
txcq_size_reg <= s_apb_dp_ctrl.pwdata[19:16];
end
10'h008: txcq_base_addr_reg[31:0] <= s_apb_dp_ctrl.pwdata;
10'h00c: txcq_base_addr_reg[63:32] <= s_apb_dp_ctrl.pwdata;
10'h100: begin
rxcq_en_reg <= s_apb_dp_ctrl.pwdata[0];
rxcq_size_reg <= s_apb_dp_ctrl.pwdata[19:16];
end
10'h108: rxcq_base_addr_reg[31:0] <= s_apb_dp_ctrl.pwdata;
10'h10c: rxcq_base_addr_reg[63:32] <= s_apb_dp_ctrl.pwdata;
default: begin end
endcase
end
case ({s_apb_dp_ctrl.paddr[9:2], 2'b00})
10'h000: begin
s_apb_dp_ctrl_prdata_reg[0] <= txcq_en_reg;
s_apb_dp_ctrl_prdata_reg[19:16] <= txcq_size_reg;
end
10'h004: s_apb_dp_ctrl_prdata_reg[15:0] <= txcq_prod_ptr_reg;
10'h008: s_apb_dp_ctrl_prdata_reg <= txcq_base_addr_reg[31:0];
10'h00c: s_apb_dp_ctrl_prdata_reg <= txcq_base_addr_reg[63:32];
10'h100: begin
s_apb_dp_ctrl_prdata_reg[0] <= rxcq_en_reg;
s_apb_dp_ctrl_prdata_reg[19:16] <= rxcq_size_reg;
end
10'h104: s_apb_dp_ctrl_prdata_reg[15:0] <= rxcq_prod_ptr_reg;
10'h108: s_apb_dp_ctrl_prdata_reg <= rxcq_base_addr_reg[31:0];
10'h10c: s_apb_dp_ctrl_prdata_reg <= rxcq_base_addr_reg[63:32];
default: begin end
endcase
end
if (rst) begin
s_axil_ctrl_awready_reg <= 1'b0;
s_axil_ctrl_wready_reg <= 1'b0;
s_axil_ctrl_bvalid_reg <= 1'b0;
s_axil_ctrl_arready_reg <= 1'b0;
s_axil_ctrl_rvalid_reg <= 1'b0;
s_apb_dp_ctrl_pready_reg <= 1'b0;
end
end
/*
* CQ management interface
*/
.req_qn(cq_req_cqn_reg),
.req_valid(cq_req_valid_reg),
.req_ready(cq_req_ready),
.rsp_qn(),
.rsp_dqn(),
.rsp_addr(cq_rsp_addr),
.rsp_phase(cq_rsp_phase),
.rsp_error(cq_rsp_error),
.rsp_valid(cq_rsp_valid),
.rsp_ready(cq_rsp_ready_reg)
);
typedef enum logic [1:0] {
STATE_IDLE,
STATE_RX_CPL,
STATE_QUERY_CQ,
STATE_WRITE_DATA
} state_t;
@@ -231,13 +121,8 @@ always_ff @(posedge clk) begin
dma_wr_desc_req.req_user <= '0;
dma_wr_desc_req.req_valid <= dma_wr_desc_req.req_valid && !dma_wr_desc_req.req_ready;
if (!txcq_en_reg) begin
txcq_prod_ptr_reg <= '0;
end
if (!rxcq_en_reg) begin
rxcq_prod_ptr_reg <= '0;
end
cq_req_valid_reg <= cq_req_valid_reg && !cq_req_ready;
cq_rsp_ready_reg <= 1'b0;
irq_reg <= 1'b0;
@@ -245,29 +130,32 @@ always_ff @(posedge clk) begin
STATE_IDLE: begin
dma_wr_desc_req.req_src_addr <= '0;
if (s_axis_cpl.tdest == 0) begin
dma_wr_desc_req.req_dst_addr <= txcq_base_addr_reg + 64'(16'(txcq_prod_ptr_reg & ({16{1'b1}} >> (16 - txcq_size_reg))) * 16);
phase_tag_reg <= !txcq_prod_ptr_reg[txcq_size_reg];
if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin
txcq_prod_ptr_reg <= txcq_prod_ptr_reg + 1;
if (txcq_en_reg) begin
dma_wr_desc_req.req_valid <= 1'b1;
state_reg <= STATE_WRITE_DATA;
end else begin
state_reg <= STATE_IDLE;
end
end
cq_req_cqn_reg <= CQN_W'(s_axis_cpl.tdest);
if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin
cq_req_valid_reg <= 1'b1;
state_reg <= STATE_QUERY_CQ;
end else begin
dma_wr_desc_req.req_dst_addr <= rxcq_base_addr_reg + 64'(16'(rxcq_prod_ptr_reg & ({16{1'b1}} >> (16 - rxcq_size_reg))) * 16);
phase_tag_reg <= !rxcq_prod_ptr_reg[rxcq_size_reg];
if (s_axis_cpl.tvalid && !s_axis_cpl.tready) begin
rxcq_prod_ptr_reg <= rxcq_prod_ptr_reg + 1;
if (rxcq_en_reg) begin
dma_wr_desc_req.req_valid <= 1'b1;
state_reg <= STATE_WRITE_DATA;
end else begin
state_reg <= STATE_IDLE;
end
state_reg <= STATE_IDLE;
end
end
STATE_QUERY_CQ: begin
dma_wr_desc_req.req_src_addr <= '0;
cq_rsp_ready_reg <= 1'b1;
if (cq_rsp_valid) begin
cq_rsp_ready_reg <= 1'b0;
dma_wr_desc_req.req_dst_addr <= cq_rsp_addr;
phase_tag_reg <= cq_rsp_phase;
if (cq_rsp_error) begin
// drop completion
s_axis_cpl.tready <= 1'b1;
state_reg <= STATE_IDLE;
end else begin
dma_wr_desc_req.req_valid <= 1'b1;
state_reg <= STATE_WRITE_DATA;
end
end
end
@@ -285,8 +173,8 @@ always_ff @(posedge clk) begin
if (rst) begin
state_reg <= STATE_IDLE;
txcq_prod_ptr_reg <= '0;
rxcq_prod_ptr_reg <= '0;
cq_req_valid_reg <= 1'b0;
cq_rsp_ready_reg <= 1'b0;
irq_reg <= 1'b0;
end
end