diff --git a/README.md b/README.md index 27699f1..f43b790 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ To facilitate the dual-license model, contributions to the project can only be a * AXI central DMA * AXI streaming DMA * DMA client for AXI stream + * DMA interface for AXI * Segmented SDP RAM * Segmented dual-clock SDP RAM * Ethernet diff --git a/src/dma/rtl/taxi_dma_if_axi.f b/src/dma/rtl/taxi_dma_if_axi.f new file mode 100644 index 0000000..b5639d5 --- /dev/null +++ b/src/dma/rtl/taxi_dma_if_axi.f @@ -0,0 +1,6 @@ +taxi_dma_if_axi.sv +taxi_dma_if_axi_rd.sv +taxi_dma_if_axi_wr.sv +taxi_dma_desc_if.sv +taxi_dma_ram_if.sv +../lib/taxi/src/axi/rtl/taxi_axi_if.sv diff --git a/src/dma/rtl/taxi_dma_if_axi.sv b/src/dma/rtl/taxi_dma_if_axi.sv new file mode 100644 index 0000000..e94384b --- /dev/null +++ b/src/dma/rtl/taxi_dma_if_axi.sv @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2021-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI DMA interface + */ +module taxi_dma_if_axi # +( + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 256, + // Operation table size (read) + parameter RD_OP_TBL_SIZE = 32, + // Operation table size (write) + parameter WR_OP_TBL_SIZE = 32, + // Use AXI ID signals (read) + parameter RD_USE_AXI_ID = 0, + // Use AXI ID signals (write) + parameter WR_USE_AXI_ID = 1 +) +( + input wire logic clk, + input wire logic rst, + + /* + * AXI master interface + */ + taxi_axi_if.wr_mst m_axi_wr, + taxi_axi_if.rd_mst m_axi_rd, + + /* + * Read descriptor + */ + taxi_dma_desc_if.req_snk rd_desc_req, + taxi_dma_desc_if.sts_src rd_desc_sts, + + /* + * Write descriptor + */ + taxi_dma_desc_if.req_snk wr_desc_req, + taxi_dma_desc_if.sts_src wr_desc_sts, + + /* + * RAM interface + */ + taxi_dma_ram_if.wr_mst dma_ram_wr, + taxi_dma_ram_if.rd_mst dma_ram_rd, + + /* + * Configuration + */ + input wire logic read_enable, + input wire logic write_enable, + + /* + * Status + */ + output wire logic status_rd_busy, + output wire logic status_wr_busy, + + /* + * Statistics + */ + output wire logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_start_tag, + output wire logic stat_rd_op_start_valid, + output wire logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag, + output wire logic [3:0] stat_rd_op_finish_status, + output wire logic stat_rd_op_finish_valid, + output wire logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_req_start_tag, + output wire logic [12:0] stat_rd_req_start_len, + output wire logic stat_rd_req_start_valid, + output wire logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_req_finish_tag, + output wire logic [3:0] stat_rd_req_finish_status, + output wire logic stat_rd_req_finish_valid, + output wire logic stat_rd_op_tbl_full, + output wire logic stat_rd_tx_stall, + output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_start_tag, + output wire logic stat_wr_op_start_valid, + output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag, + output wire logic [3:0] stat_wr_op_finish_status, + output wire logic stat_wr_op_finish_valid, + output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_start_tag, + output wire logic [12:0] stat_wr_req_start_len, + output wire logic stat_wr_req_start_valid, + output wire logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag, + output wire logic [3:0] stat_wr_req_finish_status, + output wire logic stat_wr_req_finish_valid, + output wire logic stat_wr_op_tbl_full, + output wire logic stat_wr_tx_stall +); + +taxi_dma_if_axi_rd #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .OP_TBL_SIZE(RD_OP_TBL_SIZE), + .USE_AXI_ID(RD_USE_AXI_ID) +) +dma_rd_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI master interface + */ + .m_axi_rd(m_axi_rd), + + /* + * Read descriptor + */ + .rd_desc_req(rd_desc_req), + .rd_desc_sts(rd_desc_sts), + + /* + * RAM interface + */ + .dma_ram_wr(dma_ram_wr), + + /* + * Configuration + */ + .enable(read_enable), + + /* + * Status + */ + .status_busy(status_rd_busy), + + /* + * Statistics + */ + .stat_rd_op_start_tag(stat_rd_op_start_tag), + .stat_rd_op_start_valid(stat_rd_op_start_valid), + .stat_rd_op_finish_tag(stat_rd_op_finish_tag), + .stat_rd_op_finish_status(stat_rd_op_finish_status), + .stat_rd_op_finish_valid(stat_rd_op_finish_valid), + .stat_rd_req_start_tag(stat_rd_req_start_tag), + .stat_rd_req_start_len(stat_rd_req_start_len), + .stat_rd_req_start_valid(stat_rd_req_start_valid), + .stat_rd_req_finish_tag(stat_rd_req_finish_tag), + .stat_rd_req_finish_status(stat_rd_req_finish_status), + .stat_rd_req_finish_valid(stat_rd_req_finish_valid), + .stat_rd_op_tbl_full(stat_rd_op_tbl_full), + .stat_rd_tx_stall(stat_rd_tx_stall) +); + +taxi_dma_if_axi_wr #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .OP_TBL_SIZE(WR_OP_TBL_SIZE), + .USE_AXI_ID(WR_USE_AXI_ID) +) +dma_wr_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI master interface + */ + .m_axi_wr(m_axi_wr), + + /* + * Write descriptor + */ + .wr_desc_req(wr_desc_req), + .wr_desc_sts(wr_desc_sts), + + /* + * RAM interface + */ + .dma_ram_rd(dma_ram_rd), + + /* + * Configuration + */ + .enable(write_enable), + + /* + * Status + */ + .status_busy(status_wr_busy), + + /* + * Statistics + */ + .stat_wr_op_start_tag(stat_wr_op_start_tag), + .stat_wr_op_start_valid(stat_wr_op_start_valid), + .stat_wr_op_finish_tag(stat_wr_op_finish_tag), + .stat_wr_op_finish_status(stat_wr_op_finish_status), + .stat_wr_op_finish_valid(stat_wr_op_finish_valid), + .stat_wr_req_start_tag(stat_wr_req_start_tag), + .stat_wr_req_start_len(stat_wr_req_start_len), + .stat_wr_req_start_valid(stat_wr_req_start_valid), + .stat_wr_req_finish_tag(stat_wr_req_finish_tag), + .stat_wr_req_finish_status(stat_wr_req_finish_status), + .stat_wr_req_finish_valid(stat_wr_req_finish_valid), + .stat_wr_op_tbl_full(stat_wr_op_tbl_full), + .stat_wr_tx_stall(stat_wr_tx_stall) +); + +endmodule + +`resetall diff --git a/src/dma/rtl/taxi_dma_if_axi_rd.sv b/src/dma/rtl/taxi_dma_if_axi_rd.sv new file mode 100644 index 0000000..4f2d15f --- /dev/null +++ b/src/dma/rtl/taxi_dma_if_axi_rd.sv @@ -0,0 +1,1043 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2021-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI DMA read interface + */ +module taxi_dma_if_axi_rd # +( + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 256, + // Operation table size + parameter OP_TBL_SIZE = 32, + // Use AXI ID signals + parameter logic USE_AXI_ID = 1'b0 +) +( + input wire logic clk, + input wire logic rst, + + /* + * AXI master interface + */ + taxi_axi_if.rd_mst m_axi_rd, + + /* + * Read descriptor + */ + taxi_dma_desc_if.req_snk rd_desc_req, + taxi_dma_desc_if.sts_src rd_desc_sts, + + /* + * RAM interface + */ + taxi_dma_ram_if.wr_mst dma_ram_wr, + + /* + * Configuration + */ + input wire logic enable, + + /* + * Status + */ + output wire logic status_busy, + + /* + * Statistics + */ + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_op_start_tag, + output wire logic stat_rd_op_start_valid, + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag, + output wire logic [3:0] stat_rd_op_finish_status, + output wire logic stat_rd_op_finish_valid, + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_req_start_tag, + output wire logic [12:0] stat_rd_req_start_len, + output wire logic stat_rd_req_start_valid, + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_req_finish_tag, + output wire logic [3:0] stat_rd_req_finish_status, + output wire logic stat_rd_req_finish_valid, + output wire logic stat_rd_op_tbl_full, + output wire logic stat_rd_tx_stall +); + +// TODO cleanup +// verilator lint_off WIDTHEXPAND + +// extract parameters +localparam AXI_DATA_W = m_axi_rd.DATA_W; +localparam AXI_ADDR_W = m_axi_rd.ADDR_W; +localparam AXI_STRB_W = m_axi_rd.STRB_W; +localparam AXI_ID_W = m_axi_rd.ID_W; +localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_rd.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_rd.MAX_BURST_LEN; + +localparam LEN_W = rd_desc_req.LEN_W; +localparam TAG_W = rd_desc_req.TAG_W; + +localparam RAM_SEGS = dma_ram_wr.SEGS; +localparam RAM_SEG_ADDR_W = dma_ram_wr.SEG_ADDR_W; +localparam RAM_SEG_DATA_W = dma_ram_wr.SEG_DATA_W; +localparam RAM_SEG_BE_W = dma_ram_wr.SEG_BE_W; +localparam RAM_SEL_W = dma_ram_wr.SEL_W; + +localparam RAM_ADDR_W = RAM_SEG_ADDR_W+$clog2(RAM_SEGS*RAM_SEG_BE_W); +localparam RAM_DATA_W = RAM_SEGS*RAM_SEG_DATA_W; +localparam RAM_BYTE_LANES = RAM_SEG_BE_W; +localparam RAM_BYTE_SIZE = RAM_SEG_DATA_W/RAM_BYTE_LANES; + +localparam AXI_BYTE_LANES = AXI_STRB_W; +localparam AXI_BYTE_SIZE = AXI_DATA_W/AXI_BYTE_LANES; +localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W); +localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE; + +localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1; +localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0; +localparam RAM_OFFSET_W = $clog2(RAM_SEGS*RAM_SEG_BE_W); +localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W); +localparam CYCLE_COUNT_W = LEN_W - AXI_BURST_SIZE + 1; + +localparam OP_TAG_W = $clog2(OP_TBL_SIZE); +localparam OP_TBL_READ_COUNT_W = AXI_ID_W+1; +localparam OP_TBL_WRITE_COUNT_W = LEN_W; + +localparam STATUS_FIFO_AW = 5; +localparam OUTPUT_FIFO_AW = 5; + +// check configuration +if (AXI_BYTE_SIZE * AXI_STRB_W != AXI_DATA_W) + $fatal(0, "Error: AXI data width not evenly divisible (instance %m)"); + +if (AXI_BYTE_SIZE != RAM_BYTE_SIZE) + $fatal(0, "Error: byte size mismatch (instance %m)"); + +if (2**$clog2(AXI_BYTE_LANES) != AXI_BYTE_LANES) + $fatal(0, "Error: AXI byte lane count must be even power of two (instance %m)"); + +if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256) + $fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)"); + +if (RAM_SEGS < 2) + $fatal(0, "Error: RAM interface requires at least 2 segments (instance %m)"); + +if (RAM_DATA_W != AXI_DATA_W*2) + $fatal(0, "Error: RAM interface width must be double the AXI interface width (instance %m)"); + +if (2**$clog2(RAM_BYTE_LANES) != RAM_BYTE_LANES) + $fatal(0, "Error: RAM byte lane count must be even power of two (instance %m)"); + +if (OP_TBL_SIZE > 2**AXI_ID_W) + $fatal(0, "Error: AXI_ID_W insufficient for requested OP_TBL_SIZE (instance %m)"); + +if (rd_desc_req.SRC_ADDR_W < AXI_ADDR_W || rd_desc_req.DST_ADDR_W < RAM_ADDR_W) + $fatal(0, "Error: Descriptor address width is not sufficient (instance %m)"); + +localparam logic [1:0] + AXI_RESP_OKAY = 2'b00, + AXI_RESP_EXOKAY = 2'b01, + AXI_RESP_SLVERR = 2'b10, + AXI_RESP_DECERR = 2'b11; + +localparam logic [3:0] + DMA_ERROR_NONE = 4'd0, + DMA_ERROR_TIMEOUT = 4'd1, + DMA_ERROR_PARITY = 4'd2, + DMA_ERROR_AXI_RD_SLVERR = 4'd4, + DMA_ERROR_AXI_RD_DECERR = 4'd5, + DMA_ERROR_AXI_WR_SLVERR = 4'd6, + DMA_ERROR_AXI_WR_DECERR = 4'd7, + DMA_ERROR_PCIE_FLR = 4'd8, + DMA_ERROR_PCIE_CPL_POISONED = 4'd9, + DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10, + DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11; + +localparam logic [0:0] + REQ_STATE_IDLE = 1'd0, + REQ_STATE_START = 1'd1; + +logic [0:0] req_state_reg = REQ_STATE_IDLE, req_state_next; + +localparam logic [0:0] + AXI_STATE_IDLE = 1'd0, + AXI_STATE_WRITE = 1'd1; + +logic [0:0] axi_state_reg = AXI_STATE_IDLE, axi_state_next; + +logic [AXI_ADDR_W-1:0] req_axi_addr_reg = '0, req_axi_addr_next; +logic [RAM_SEL_W-1:0] req_ram_sel_reg = '0, req_ram_sel_next; +logic [RAM_ADDR_W-1:0] req_ram_addr_reg = '0, req_ram_addr_next; +logic [LEN_W-1:0] req_op_count_reg = '0, req_op_count_next; +logic [12:0] req_tr_count_reg = '0, req_tr_count_next; +logic req_zero_len_reg = 1'b0, req_zero_len_next; +logic [TAG_W-1:0] req_tag_reg = '0, req_tag_next; + +logic [RAM_SEL_W-1:0] ram_sel_reg = '0, ram_sel_next; +logic [RAM_ADDR_W-1:0] addr_reg = '0, addr_next; +logic [RAM_ADDR_W-1:0] addr_delay_reg = '0, addr_delay_next; +logic [12:0] op_count_reg = '0, op_count_next; +logic zero_len_reg = 1'b0, zero_len_next; +logic [RAM_SEGS-1:0] ram_mask_reg = '0, ram_mask_next; +logic [RAM_SEGS-1:0] ram_mask_0_reg = '0, ram_mask_0_next; +logic [RAM_SEGS-1:0] ram_mask_1_reg = '0, ram_mask_1_next; +logic ram_wrap_reg = 1'b0, ram_wrap_next; +logic [OFFSET_W+1-1:0] cycle_byte_count_reg = '0, cycle_byte_count_next; +logic [RAM_OFFSET_W-1:0] start_offset_reg = '0, start_offset_next; +logic [RAM_OFFSET_W-1:0] end_offset_reg = '0, end_offset_next; +logic [OFFSET_W-1:0] offset_reg = '0, offset_next; +logic [OP_TAG_W-1:0] op_tag_reg = '0, op_tag_next; + +logic [STATUS_FIFO_AW+1-1:0] status_fifo_wr_ptr_reg = '0; +logic [STATUS_FIFO_AW+1-1:0] status_fifo_rd_ptr_reg = '0, status_fifo_rd_ptr_next; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [OP_TAG_W-1:0] status_fifo_op_tag[2**STATUS_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [RAM_SEGS-1:0] status_fifo_mask[2**STATUS_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic status_fifo_finish[2**STATUS_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [3:0] status_fifo_error[2**STATUS_FIFO_AW]; +logic [OP_TAG_W-1:0] status_fifo_wr_op_tag; +logic [RAM_SEGS-1:0] status_fifo_wr_mask; +logic status_fifo_wr_finish; +logic [3:0] status_fifo_wr_error; +logic status_fifo_we; +logic status_fifo_mask_reg = 1'b0, status_fifo_mask_next; +logic status_fifo_finish_reg = 1'b0, status_fifo_finish_next; +logic [3:0] status_fifo_error_reg = 4'd0, status_fifo_error_next; +logic status_fifo_we_reg = 1'b0, status_fifo_we_next; +logic status_fifo_half_full_reg = 1'b0; +logic [OP_TAG_W-1:0] status_fifo_rd_op_tag_reg = '0, status_fifo_rd_op_tag_next; +logic [RAM_SEGS-1:0] status_fifo_rd_mask_reg = '0, status_fifo_rd_mask_next; +logic status_fifo_rd_finish_reg = 1'b0, status_fifo_rd_finish_next; +logic [3:0] status_fifo_rd_error_reg = 4'd0, status_fifo_rd_error_next; +logic status_fifo_rd_valid_reg = 1'b0, status_fifo_rd_valid_next; + +logic [OP_TAG_W+1-1:0] active_op_count_reg = '0; +logic inc_active_op; +logic dec_active_op; + +logic [AXI_DATA_W-1:0] m_axi_rdata_int_reg = '0, m_axi_rdata_int_next; +logic m_axi_rvalid_int_reg = 1'b0, m_axi_rvalid_int_next; + +logic [AXI_ID_W-1:0] m_axi_arid_reg = '0, m_axi_arid_next; +logic [AXI_ADDR_W-1:0] m_axi_araddr_reg = '0, m_axi_araddr_next; +logic [7:0] m_axi_arlen_reg = 8'd0, m_axi_arlen_next; +logic m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next; +logic m_axi_rready_reg = 1'b0, m_axi_rready_next; + +logic rd_desc_req_ready_reg = 1'b0, rd_desc_req_ready_next; + +logic [TAG_W-1:0] rd_desc_sts_tag_reg = '0, rd_desc_sts_tag_next; +logic [3:0] rd_desc_sts_error_reg = 4'd0, rd_desc_sts_error_next; +logic rd_desc_sts_valid_reg = 1'b0, rd_desc_sts_valid_next; + +logic status_busy_reg = 1'b0; + +logic [OP_TAG_W-1:0] stat_rd_op_start_tag_reg = '0, stat_rd_op_start_tag_next; +logic stat_rd_op_start_valid_reg = 1'b0, stat_rd_op_start_valid_next; +logic [OP_TAG_W-1:0] stat_rd_op_finish_tag_reg = '0, stat_rd_op_finish_tag_next; +logic [3:0] stat_rd_op_finish_status_reg = 4'd0, stat_rd_op_finish_status_next; +logic stat_rd_op_finish_valid_reg = 1'b0, stat_rd_op_finish_valid_next; +logic [OP_TAG_W-1:0] stat_rd_req_start_tag_reg = '0, stat_rd_req_start_tag_next; +logic [12:0] stat_rd_req_start_len_reg = 13'd0, stat_rd_req_start_len_next; +logic stat_rd_req_start_valid_reg = 1'b0, stat_rd_req_start_valid_next; +logic [OP_TAG_W-1:0] stat_rd_req_finish_tag_reg = '0, stat_rd_req_finish_tag_next; +logic [3:0] stat_rd_req_finish_status_reg = 4'd0, stat_rd_req_finish_status_next; +logic stat_rd_req_finish_valid_reg = 1'b0, stat_rd_req_finish_valid_next; +logic stat_rd_op_tbl_full_reg = 1'b0, stat_rd_op_tbl_full_next; +logic stat_rd_tx_stall_reg = 1'b0, stat_rd_tx_stall_next; + +// internal datapath +logic [RAM_SEGS-1:0][RAM_SEL_W-1:0] ram_wr_cmd_sel_int; +logic [RAM_SEGS-1:0][RAM_SEG_BE_W-1:0] ram_wr_cmd_be_int; +logic [RAM_SEGS-1:0][RAM_SEG_ADDR_W-1:0] ram_wr_cmd_addr_int; +logic [RAM_SEGS-1:0][RAM_SEG_DATA_W-1:0] ram_wr_cmd_data_int; +logic [RAM_SEGS-1:0] ram_wr_cmd_valid_int; +wire [RAM_SEGS-1:0] ram_wr_cmd_ready_int; + +wire [RAM_SEGS-1:0] out_done; +logic [RAM_SEGS-1:0] out_done_ack; + +assign m_axi_rd.arid = USE_AXI_ID ? m_axi_arid_reg : '0; +assign m_axi_rd.araddr = m_axi_araddr_reg; +assign m_axi_rd.arlen = m_axi_arlen_reg; +assign m_axi_rd.arsize = 3'(AXI_BURST_SIZE); +assign m_axi_rd.arburst = 2'b01; +assign m_axi_rd.arlock = 1'b0; +assign m_axi_rd.arcache = 4'b0011; +assign m_axi_rd.arprot = 3'b010; +assign m_axi_rd.arvalid = m_axi_arvalid_reg; +assign m_axi_rd.rready = m_axi_rready_reg; + +assign rd_desc_req.req_ready = rd_desc_req_ready_reg; + +assign rd_desc_sts.sts_tag = rd_desc_sts_tag_reg; +assign rd_desc_sts.sts_error = rd_desc_sts_error_reg; +assign rd_desc_sts.sts_valid = rd_desc_sts_valid_reg; + +assign status_busy = status_busy_reg; + +assign stat_rd_op_start_tag = stat_rd_op_start_tag_reg; +assign stat_rd_op_start_valid = stat_rd_op_start_valid_reg; +assign stat_rd_op_finish_tag = stat_rd_op_finish_tag_reg; +assign stat_rd_op_finish_status = stat_rd_op_finish_status_reg; +assign stat_rd_op_finish_valid = stat_rd_op_finish_valid_reg; +assign stat_rd_req_start_tag = stat_rd_req_start_tag_reg; +assign stat_rd_req_start_len = stat_rd_req_start_len_reg; +assign stat_rd_req_start_valid = stat_rd_req_start_valid_reg; +assign stat_rd_req_finish_tag = stat_rd_req_finish_tag_reg; +assign stat_rd_req_finish_status = stat_rd_req_finish_status_reg; +assign stat_rd_req_finish_valid = stat_rd_req_finish_valid_reg; +assign stat_rd_op_tbl_full = stat_rd_op_tbl_full_reg; +assign stat_rd_tx_stall = stat_rd_tx_stall_reg; + +// operation tag management +logic [OP_TAG_W+1-1:0] op_tbl_start_ptr_reg = '0; +logic [AXI_ADDR_W-1:0] op_tbl_start_axi_addr; +logic [RAM_SEL_W-1:0] op_tbl_start_ram_sel; +logic [RAM_ADDR_W-1:0] op_tbl_start_ram_addr; +logic [12:0] op_tbl_start_len; +logic op_tbl_start_zero_len; +logic [CYCLE_COUNT_W-1:0] op_tbl_start_cycle_count; +logic [TAG_W-1:0] op_tbl_start_tag; +logic op_tbl_start_last; +logic op_tbl_start_en; +logic [OP_TAG_W+1-1:0] op_tbl_read_complete_ptr_reg = '0; +logic op_tbl_read_complete_en; +logic [OP_TAG_W-1:0] op_tbl_update_status_ptr; +logic [3:0] op_tbl_update_status_error; +logic op_tbl_update_status_en; +logic [OP_TAG_W-1:0] op_tbl_write_complete_ptr; +logic op_tbl_write_complete_en; +logic [OP_TAG_W+1-1:0] op_tbl_finish_ptr_reg = '0; +logic op_tbl_finish_en; + +logic [2**OP_TAG_W-1:0] op_tbl_active = '0; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXI_ADDR_W-1:0] op_tbl_axi_addr[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [RAM_SEL_W-1:0] op_tbl_ram_sel[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [RAM_ADDR_W-1:0] op_tbl_ram_addr[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [12:0] op_tbl_len[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_zero_len[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [CYCLE_COUNT_W-1:0] op_tbl_cycle_count[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [TAG_W-1:0] op_tbl_tag[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_last[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_write_complete[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_error_a[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_error_b[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [3:0] op_tbl_error_code[2**OP_TAG_W]; + +initial begin + for (integer i = 0; i < 2**OP_TAG_W; i = i + 1) begin + op_tbl_axi_addr[i] = '0; + op_tbl_ram_sel[i] = '0; + op_tbl_ram_addr[i] = '0; + op_tbl_len[i] = '0; + op_tbl_zero_len[i] = 1'b0; + op_tbl_cycle_count[i] = '0; + op_tbl_tag[i] = '0; + op_tbl_last[i] = '0; + op_tbl_write_complete[i] = '0; + op_tbl_error_a[i] = '0; + op_tbl_error_b[i] = '0; + op_tbl_error_code[i] = '0; + end +end + +always_comb begin + req_state_next = REQ_STATE_IDLE; + + rd_desc_req_ready_next = 1'b0; + + stat_rd_op_start_tag_next = stat_rd_op_start_tag_reg; + stat_rd_op_start_valid_next = 1'b0; + stat_rd_req_start_tag_next = stat_rd_req_start_tag_reg; + stat_rd_req_start_len_next = stat_rd_req_start_len_reg; + stat_rd_req_start_valid_next = 1'b0; + stat_rd_op_tbl_full_next = !(!op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W)); + stat_rd_tx_stall_next = m_axi_arvalid_reg && !m_axi_rd.arready; + + req_axi_addr_next = req_axi_addr_reg; + req_ram_sel_next = req_ram_sel_reg; + req_ram_addr_next = req_ram_addr_reg; + req_op_count_next = req_op_count_reg; + req_tr_count_next = req_tr_count_reg; + req_zero_len_next = req_zero_len_reg; + req_tag_next = req_tag_reg; + + m_axi_arid_next = m_axi_arid_reg; + m_axi_araddr_next = m_axi_araddr_reg; + m_axi_arlen_next = m_axi_arlen_reg; + m_axi_arvalid_next = m_axi_arvalid_reg && !m_axi_rd.arready; + + op_tbl_start_axi_addr = req_axi_addr_reg; + op_tbl_start_ram_sel = req_ram_sel_reg; + op_tbl_start_ram_addr = req_ram_addr_reg; + op_tbl_start_len = '0; + op_tbl_start_zero_len = req_zero_len_reg; + op_tbl_start_tag = req_tag_reg; + op_tbl_start_cycle_count = '0; + op_tbl_start_last = '0; + op_tbl_start_en = 1'b0; + + inc_active_op = 1'b0; + + // segmentation and request generation + case (req_state_reg) + REQ_STATE_IDLE: begin + rd_desc_req_ready_next = !op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W) && enable; + + req_axi_addr_next = rd_desc_req.req_src_addr; + req_ram_sel_next = rd_desc_req.req_dst_sel; + req_ram_addr_next = rd_desc_req.req_dst_addr; + if (rd_desc_req.req_len == 0) begin + // zero-length operation + req_op_count_next = 1; + req_zero_len_next = 1'b1; + end else begin + req_op_count_next = rd_desc_req.req_len; + req_zero_len_next = 1'b0; + end + req_tag_next = rd_desc_req.req_tag; + + if (req_op_count_next <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(req_op_count_next & 12'hfff)) >> 12 != 0 || req_op_count_next >> 12 != 0) begin + // crosses 4k boundary + req_tr_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + req_tr_count_next = 13'(req_op_count_next); + end + end else begin + // packet larger than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin + // crosses 4k boundary + req_tr_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + req_tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(req_axi_addr_next & OFFSET_MASK); + end + end + + if (rd_desc_req.req_ready && rd_desc_req.req_valid) begin + rd_desc_req_ready_next = 1'b0; + + stat_rd_op_start_tag_next = stat_rd_op_start_tag_reg+1; + stat_rd_op_start_valid_next = 1'b1; + + req_state_next = REQ_STATE_START; + end else begin + req_state_next = REQ_STATE_IDLE; + end + end + REQ_STATE_START: begin + if (!op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W) && (!m_axi_rd.arvalid || m_axi_rd.arready)) begin + req_axi_addr_next = req_axi_addr_reg + AXI_ADDR_W'(req_tr_count_reg); + req_ram_addr_next = req_ram_addr_reg + RAM_ADDR_W'(req_tr_count_reg); + req_op_count_next = req_op_count_reg - LEN_W'(req_tr_count_reg); + + op_tbl_start_axi_addr = req_axi_addr_reg; + op_tbl_start_ram_sel = req_ram_sel_reg; + op_tbl_start_ram_addr = req_ram_addr_reg; + op_tbl_start_len = req_tr_count_next; + op_tbl_start_zero_len = req_zero_len_reg; + op_tbl_start_tag = req_tag_reg; + op_tbl_start_cycle_count = CYCLE_COUNT_W'((req_tr_count_next + 13'(req_axi_addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE); + op_tbl_start_last = req_op_count_reg == LEN_W'(req_tr_count_next); + op_tbl_start_en = 1'b1; + inc_active_op = 1'b1; + + stat_rd_req_start_tag_next = op_tbl_start_ptr_reg[OP_TAG_W-1:0]; + stat_rd_req_start_len_next = req_zero_len_reg ? '0 : req_tr_count_reg; + stat_rd_req_start_valid_next = 1'b1; + + m_axi_arid_next = op_tbl_start_ptr_reg[OP_TAG_W-1:0]; + m_axi_araddr_next = req_axi_addr_reg; + m_axi_arlen_next = 8'(op_tbl_start_cycle_count); + m_axi_arvalid_next = 1'b1; + + if (req_op_count_next <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(req_op_count_next & 12'hfff)) >> 12 != 0 || req_op_count_next >> 12 != 0) begin + // crosses 4k boundary + req_tr_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + req_tr_count_next = 13'(req_op_count_next); + end + end else begin + // packet larger than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin + // crosses 4k boundary + req_tr_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + req_tr_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(req_axi_addr_next & OFFSET_MASK); + end + end + + if (!op_tbl_start_last) begin + req_state_next = REQ_STATE_START; + end else begin + rd_desc_req_ready_next = !op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W) && enable; + req_state_next = REQ_STATE_IDLE; + end + end else begin + req_state_next = REQ_STATE_START; + end + end + endcase +end + +always_comb begin + axi_state_next = AXI_STATE_IDLE; + + m_axi_rready_next = 1'b0; + + stat_rd_op_finish_tag_next = stat_rd_op_finish_tag_reg; + stat_rd_op_finish_status_next = stat_rd_op_finish_status_reg; + stat_rd_op_finish_valid_next = 1'b0; + stat_rd_req_finish_tag_next = stat_rd_req_finish_tag_reg; + stat_rd_req_finish_status_next = stat_rd_req_finish_status_reg; + stat_rd_req_finish_valid_next = 1'b0; + + ram_sel_next = ram_sel_reg; + addr_next = addr_reg; + addr_delay_next = addr_delay_reg; + op_count_next = op_count_reg; + zero_len_next = zero_len_reg; + ram_mask_next = ram_mask_reg; + ram_mask_0_next = ram_mask_0_reg; + ram_mask_1_next = ram_mask_1_reg; + ram_wrap_next = ram_wrap_reg; + cycle_byte_count_next = cycle_byte_count_reg; + start_offset_next = start_offset_reg; + end_offset_next = end_offset_reg; + offset_next = offset_reg; + op_tag_next = op_tag_reg; + + op_tbl_read_complete_en = 1'b0; + + m_axi_rdata_int_next = m_axi_rdata_int_reg; + m_axi_rvalid_int_next = 1'b0; + + status_fifo_mask_next = 1'b1; + status_fifo_finish_next = 1'b0; + status_fifo_error_next = DMA_ERROR_NONE; + status_fifo_we_next = 1'b0; + + out_done_ack = '0; + + // Write generation + ram_wr_cmd_sel_int = '{RAM_SEGS{ram_sel_reg}}; + if (!ram_wrap_reg) begin + ram_wr_cmd_be_int = ({RAM_SEGS*RAM_SEG_BE_W{1'b1}} << start_offset_reg) & ({RAM_SEGS*RAM_SEG_BE_W{1'b1}} >> (RAM_SEGS*RAM_SEG_BE_W-1-end_offset_reg)); + end else begin + ram_wr_cmd_be_int = ({RAM_SEGS*RAM_SEG_BE_W{1'b1}} << start_offset_reg) | ({RAM_SEGS*RAM_SEG_BE_W{1'b1}} >> (RAM_SEGS*RAM_SEG_BE_W-1-end_offset_reg)); + end + for (integer i = 0; i < RAM_SEGS; i = i + 1) begin + ram_wr_cmd_addr_int[i] = addr_delay_reg[RAM_ADDR_W-1:RAM_ADDR_W-RAM_SEG_ADDR_W]; + if (ram_mask_1_reg[i]) begin + ram_wr_cmd_addr_int[i] = addr_delay_reg[RAM_ADDR_W-1:RAM_ADDR_W-RAM_SEG_ADDR_W]+1; + end + end + ram_wr_cmd_data_int = RAM_DATA_W'({3{m_axi_rdata_int_reg}} >> (AXI_DATA_W - offset_reg*AXI_BYTE_SIZE)); + ram_wr_cmd_valid_int = '0; + + if (m_axi_rvalid_int_reg) begin + ram_wr_cmd_valid_int = ram_mask_reg; + end + + // AXI read response handling + case (axi_state_reg) + AXI_STATE_IDLE: begin + // idle state, wait for read data + m_axi_rready_next = &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + + if (USE_AXI_ID) begin + op_tag_next = OP_TAG_W'(m_axi_rd.rid); + end else begin + op_tag_next = OP_TAG_W'(op_tbl_read_complete_ptr_reg); + end + ram_sel_next = op_tbl_ram_sel[op_tag_next]; + addr_next = op_tbl_ram_addr[op_tag_next]; + op_count_next = op_tbl_len[op_tag_next]; + zero_len_next = op_tbl_zero_len[op_tag_next]; + offset_next = OFFSET_W'(op_tbl_ram_addr[op_tag_next][RAM_OFFSET_W-1:0]-RAM_OFFSET_W'(op_tbl_axi_addr[op_tag_next] & OFFSET_MASK)); + + if (m_axi_rd.rready && m_axi_rd.rvalid) begin + if (op_count_next > 13'(AXI_BYTE_LANES)-13'(op_tbl_axi_addr[op_tag_next] & OFFSET_MASK)) begin + cycle_byte_count_next = (OFFSET_W+1)'(AXI_BYTE_LANES)-(OFFSET_W+1)'(op_tbl_axi_addr[op_tag_next] & OFFSET_MASK); + end else begin + cycle_byte_count_next = (OFFSET_W+1)'(op_count_next); + end + start_offset_next = RAM_OFFSET_W'(addr_next); + {ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1; + + ram_mask_0_next = {RAM_SEGS{1'b1}} << (start_offset_next >> $clog2(RAM_SEG_BE_W)); + ram_mask_1_next = {RAM_SEGS{1'b1}} >> (RAM_SEGS-1-(end_offset_next >> $clog2(RAM_SEG_BE_W))); + + if (!ram_wrap_next) begin + ram_mask_next = ram_mask_0_next & ram_mask_1_next; + ram_mask_0_next = ram_mask_0_next & ram_mask_1_next; + ram_mask_1_next = '0; + end else begin + ram_mask_next = ram_mask_0_next | ram_mask_1_next; + end + + addr_delay_next = addr_next; + addr_next = addr_next + AXI_ADDR_W'(cycle_byte_count_next); + op_count_next = op_count_next - 13'(cycle_byte_count_next); + + m_axi_rdata_int_next = m_axi_rd.rdata; + m_axi_rvalid_int_next = 1'b1; + + status_fifo_mask_next = 1'b1; + status_fifo_finish_next = 1'b0; + status_fifo_error_next = DMA_ERROR_NONE; + status_fifo_we_next = 1'b1; + + if (zero_len_next) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + end + + if (m_axi_rd.rresp == AXI_RESP_SLVERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_SLVERR; + end else if (m_axi_rd.rresp == AXI_RESP_DECERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_DECERR; + end + + stat_rd_req_finish_tag_next = op_tag_next; + stat_rd_req_finish_status_next = status_fifo_error_next; + stat_rd_req_finish_valid_next = 1'b0; + + if (!USE_AXI_ID) begin + op_tbl_read_complete_en = 1'b1; + end + + if (m_axi_rd.rlast) begin + status_fifo_finish_next = 1'b1; + stat_rd_req_finish_valid_next = 1'b1; + axi_state_next = AXI_STATE_IDLE; + end else begin + axi_state_next = AXI_STATE_WRITE; + end + end else begin + axi_state_next = AXI_STATE_IDLE; + end + end + AXI_STATE_WRITE: begin + // write state - generate write operations + m_axi_rready_next = &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + + if (m_axi_rd.rready && m_axi_rd.rvalid) begin + + if (op_count_next > 13'(AXI_BYTE_LANES)) begin + cycle_byte_count_next = (OFFSET_W+1)'(AXI_BYTE_LANES); + end else begin + cycle_byte_count_next = (OFFSET_W+1)'(op_count_next); + end + start_offset_next = RAM_OFFSET_W'(addr_next); + {ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1; + + ram_mask_0_next = {RAM_SEGS{1'b1}} << (start_offset_next >> $clog2(RAM_SEG_BE_W)); + ram_mask_1_next = {RAM_SEGS{1'b1}} >> (RAM_SEGS-1-(end_offset_next >> $clog2(RAM_SEG_BE_W))); + + if (!ram_wrap_next) begin + ram_mask_next = ram_mask_0_next & ram_mask_1_next; + ram_mask_0_next = ram_mask_0_next & ram_mask_1_next; + ram_mask_1_next = '0; + end else begin + ram_mask_next = ram_mask_0_next | ram_mask_1_next; + end + + addr_delay_next = addr_next; + addr_next = addr_next + AXI_ADDR_W'(cycle_byte_count_next); + op_count_next = op_count_next - 13'(cycle_byte_count_next); + + m_axi_rdata_int_next = m_axi_rd.rdata; + m_axi_rvalid_int_next = 1'b1; + + status_fifo_mask_next = 1'b1; + status_fifo_finish_next = 1'b0; + status_fifo_error_next = DMA_ERROR_NONE; + status_fifo_we_next = 1'b1; + + if (m_axi_rd.rresp == AXI_RESP_SLVERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_SLVERR; + end else if (m_axi_rd.rresp == AXI_RESP_DECERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_DECERR; + end + + stat_rd_req_finish_tag_next = op_tag_next; + stat_rd_req_finish_status_next = status_fifo_error_next; + stat_rd_req_finish_valid_next = 1'b0; + + if (m_axi_rd.rlast) begin + status_fifo_finish_next = 1'b1; + stat_rd_req_finish_valid_next = 1'b1; + axi_state_next = AXI_STATE_IDLE; + end else begin + axi_state_next = AXI_STATE_WRITE; + end + end else begin + axi_state_next = AXI_STATE_WRITE; + end + end + endcase + + status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg; + + status_fifo_wr_op_tag = op_tag_reg; + status_fifo_wr_mask = status_fifo_mask_reg ? ram_mask_reg : 0; + status_fifo_wr_finish = status_fifo_finish_reg; + status_fifo_wr_error = status_fifo_error_reg; + status_fifo_we = 1'b0; + + if (status_fifo_we_reg) begin + status_fifo_wr_op_tag = op_tag_reg; + status_fifo_wr_mask = status_fifo_mask_reg ? ram_mask_reg : 0; + status_fifo_wr_finish = status_fifo_finish_reg; + status_fifo_wr_error = status_fifo_error_reg; + status_fifo_we = 1'b1; + end + + status_fifo_rd_op_tag_next = status_fifo_rd_op_tag_reg; + status_fifo_rd_mask_next = status_fifo_rd_mask_reg; + status_fifo_rd_finish_next = status_fifo_rd_finish_reg; + status_fifo_rd_valid_next = status_fifo_rd_valid_reg; + status_fifo_rd_error_next = status_fifo_rd_error_reg; + + op_tbl_update_status_ptr = status_fifo_rd_op_tag_reg; + op_tbl_update_status_error = status_fifo_rd_error_reg; + op_tbl_update_status_en = 1'b0; + + op_tbl_write_complete_ptr = status_fifo_rd_op_tag_reg; + op_tbl_write_complete_en = 1'b0; + + if (status_fifo_rd_valid_reg && (status_fifo_rd_mask_reg & ~out_done) == 0) begin + // got write completion, pop and return status + status_fifo_rd_valid_next = 1'b0; + op_tbl_update_status_en = 1'b1; + + out_done_ack = status_fifo_rd_mask_reg; + + if (status_fifo_rd_finish_reg) begin + // mark done + op_tbl_write_complete_ptr = status_fifo_rd_op_tag_reg; + op_tbl_write_complete_en = 1'b1; + end + end + + if (!status_fifo_rd_valid_next && status_fifo_rd_ptr_reg != status_fifo_wr_ptr_reg) begin + // status FIFO not empty + status_fifo_rd_op_tag_next = status_fifo_op_tag[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + status_fifo_rd_mask_next = status_fifo_mask[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + status_fifo_rd_finish_next = status_fifo_finish[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + status_fifo_rd_error_next = status_fifo_error[status_fifo_rd_ptr_reg[STATUS_FIFO_AW-1:0]]; + status_fifo_rd_valid_next = 1'b1; + status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg + 1; + end + + // commit operations in-order + op_tbl_finish_en = 1'b0; + dec_active_op = 1'b0; + + if (rd_desc_sts_valid_reg) begin + rd_desc_sts_error_next = DMA_ERROR_NONE; + end else begin + rd_desc_sts_error_next = rd_desc_sts_error_reg; + end + + rd_desc_sts_tag_next = op_tbl_tag[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + rd_desc_sts_valid_next = 1'b0; + + stat_rd_op_finish_tag_next = stat_rd_op_finish_tag_reg; + stat_rd_op_finish_status_next = rd_desc_sts_error_next; + stat_rd_op_finish_valid_next = 1'b0; + + if (op_tbl_active[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] && op_tbl_write_complete[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] && op_tbl_finish_ptr_reg != op_tbl_start_ptr_reg) begin + op_tbl_finish_en = 1'b1; + dec_active_op = 1'b1; + + if (op_tbl_error_a[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] != op_tbl_error_b[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]) begin + rd_desc_sts_error_next = op_tbl_error_code[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + end + + stat_rd_op_finish_status_next = rd_desc_sts_error_next; + + if (op_tbl_last[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]) begin + rd_desc_sts_tag_next = op_tbl_tag[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + rd_desc_sts_valid_next = 1'b1; + stat_rd_op_finish_tag_next = stat_rd_op_finish_tag_reg + 1; + stat_rd_op_finish_valid_next = 1'b1; + end + end +end + +always_ff @(posedge clk) begin + req_state_reg <= req_state_next; + axi_state_reg <= axi_state_next; + + req_axi_addr_reg <= req_axi_addr_next; + req_ram_sel_reg <= req_ram_sel_next; + req_ram_addr_reg <= req_ram_addr_next; + req_op_count_reg <= req_op_count_next; + req_tr_count_reg <= req_tr_count_next; + req_zero_len_reg <= req_zero_len_next; + req_tag_reg <= req_tag_next; + + ram_sel_reg <= ram_sel_next; + addr_reg <= addr_next; + addr_delay_reg <= addr_delay_next; + op_count_reg <= op_count_next; + zero_len_reg <= zero_len_next; + ram_mask_reg <= ram_mask_next; + ram_mask_0_reg <= ram_mask_0_next; + ram_mask_1_reg <= ram_mask_1_next; + ram_wrap_reg <= ram_wrap_next; + cycle_byte_count_reg <= cycle_byte_count_next; + start_offset_reg <= start_offset_next; + end_offset_reg <= end_offset_next; + offset_reg <= offset_next; + op_tag_reg <= op_tag_next; + + m_axi_rdata_int_reg <= m_axi_rdata_int_next; + m_axi_rvalid_int_reg <= m_axi_rvalid_int_next; + + m_axi_arid_reg <= m_axi_arid_next; + m_axi_araddr_reg <= m_axi_araddr_next; + m_axi_arlen_reg <= m_axi_arlen_next; + m_axi_arvalid_reg <= m_axi_arvalid_next; + m_axi_rready_reg <= m_axi_rready_next; + + rd_desc_req_ready_reg <= rd_desc_req_ready_next; + + rd_desc_sts_tag_reg <= rd_desc_sts_tag_next; + rd_desc_sts_error_reg <= rd_desc_sts_error_next; + rd_desc_sts_valid_reg <= rd_desc_sts_valid_next; + + status_busy_reg <= active_op_count_reg != 0; + + stat_rd_op_start_tag_reg <= stat_rd_op_start_tag_next; + stat_rd_op_start_valid_reg <= stat_rd_op_start_valid_next; + stat_rd_op_finish_tag_reg <= stat_rd_op_finish_tag_next; + stat_rd_op_finish_status_reg <= stat_rd_op_finish_status_next; + stat_rd_op_finish_valid_reg <= stat_rd_op_finish_valid_next; + stat_rd_req_start_tag_reg <= stat_rd_req_start_tag_next; + stat_rd_req_start_len_reg <= stat_rd_req_start_len_next; + stat_rd_req_start_valid_reg <= stat_rd_req_start_valid_next; + stat_rd_req_finish_tag_reg <= stat_rd_req_finish_tag_next; + stat_rd_req_finish_status_reg <= stat_rd_req_finish_status_next; + stat_rd_req_finish_valid_reg <= stat_rd_req_finish_valid_next; + stat_rd_op_tbl_full_reg <= stat_rd_op_tbl_full_next; + stat_rd_tx_stall_reg <= stat_rd_tx_stall_next; + + if (status_fifo_we) begin + status_fifo_op_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_op_tag; + status_fifo_mask[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_mask; + status_fifo_finish[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_finish; + status_fifo_error[status_fifo_wr_ptr_reg[STATUS_FIFO_AW-1:0]] <= status_fifo_wr_error; + status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1; + end + status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next; + + status_fifo_mask_reg <= status_fifo_mask_next; + status_fifo_finish_reg <= status_fifo_finish_next; + status_fifo_error_reg <= status_fifo_error_next; + status_fifo_we_reg <= status_fifo_we_next; + + status_fifo_rd_op_tag_reg <= status_fifo_rd_op_tag_next; + status_fifo_rd_mask_reg <= status_fifo_rd_mask_next; + status_fifo_rd_finish_reg <= status_fifo_rd_finish_next; + status_fifo_rd_error_reg <= status_fifo_rd_error_next; + status_fifo_rd_valid_reg <= status_fifo_rd_valid_next; + + status_fifo_half_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**(STATUS_FIFO_AW-1); + + active_op_count_reg <= active_op_count_reg + OP_TAG_W'(inc_active_op) - OP_TAG_W'(dec_active_op); + + if (op_tbl_start_en) begin + op_tbl_start_ptr_reg <= op_tbl_start_ptr_reg + 1; + op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= 1'b1; + op_tbl_axi_addr[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_axi_addr; + op_tbl_ram_sel[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_ram_sel; + op_tbl_ram_addr[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_ram_addr; + op_tbl_len[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_len; + op_tbl_zero_len[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_zero_len; + op_tbl_cycle_count[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_cycle_count; + op_tbl_tag[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_tag; + op_tbl_last[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_last; + op_tbl_write_complete[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= 1'b0; + op_tbl_error_a[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_error_b[op_tbl_start_ptr_reg[OP_TAG_W-1:0]]; + end + + if (!USE_AXI_ID && op_tbl_read_complete_en) begin + op_tbl_read_complete_ptr_reg <= op_tbl_read_complete_ptr_reg + 1; + end + + if (op_tbl_update_status_en) begin + if (op_tbl_update_status_error != 0) begin + op_tbl_error_code[op_tbl_update_status_ptr] <= op_tbl_update_status_error; + op_tbl_error_b[op_tbl_update_status_ptr] <= !op_tbl_error_a[op_tbl_update_status_ptr]; + end + end + + if (op_tbl_write_complete_en) begin + op_tbl_write_complete[op_tbl_write_complete_ptr] <= 1'b1; + end + + if (op_tbl_finish_en) begin + op_tbl_finish_ptr_reg <= op_tbl_finish_ptr_reg + 1; + op_tbl_active[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] <= 1'b0; + end + + if (rst) begin + req_state_reg <= REQ_STATE_IDLE; + axi_state_reg <= AXI_STATE_IDLE; + + m_axi_arvalid_reg <= 1'b0; + m_axi_rready_reg <= 1'b0; + + rd_desc_req_ready_reg <= 1'b0; + rd_desc_sts_error_reg <= 4'd0; + rd_desc_sts_valid_reg <= 1'b0; + + status_busy_reg <= 1'b0; + + stat_rd_op_start_tag_reg <= '0; + stat_rd_op_start_valid_reg <= 1'b0; + stat_rd_op_finish_tag_reg <= '0; + stat_rd_op_finish_valid_reg <= 1'b0; + stat_rd_req_start_valid_reg <= 1'b0; + stat_rd_req_finish_valid_reg <= 1'b0; + stat_rd_op_tbl_full_reg <= 1'b0; + stat_rd_tx_stall_reg <= 1'b0; + + status_fifo_wr_ptr_reg <= '0; + status_fifo_rd_ptr_reg <= '0; + status_fifo_we_reg <= 1'b0; + status_fifo_rd_valid_reg <= 1'b0; + + active_op_count_reg <= '0; + + op_tbl_start_ptr_reg <= '0; + op_tbl_read_complete_ptr_reg <= '0; + op_tbl_finish_ptr_reg <= '0; + op_tbl_active <= '0; + end +end + +// output datapath logic (write data) +for (genvar n = 0; n < RAM_SEGS; n = n + 1) begin + + logic [RAM_SEL_W-1:0] ram_wr_cmd_sel_reg = '0; + logic [RAM_SEG_BE_W-1:0] ram_wr_cmd_be_reg = '0; + logic [RAM_SEG_ADDR_W-1:0] ram_wr_cmd_addr_reg = '0; + logic [RAM_SEG_DATA_W-1:0] ram_wr_cmd_data_reg = '0; + logic ram_wr_cmd_valid_reg = 1'b0; + + logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0; + logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0; + logic out_fifo_half_full_reg = 1'b0; + + wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}}); + wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + + (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) + logic [RAM_SEL_W-1:0] out_fifo_wr_cmd_sel[2**OUTPUT_FIFO_AW]; + (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) + logic [RAM_SEG_BE_W-1:0] out_fifo_wr_cmd_be[2**OUTPUT_FIFO_AW]; + (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) + logic [RAM_SEG_ADDR_W-1:0] out_fifo_wr_cmd_addr[2**OUTPUT_FIFO_AW]; + (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) + logic [RAM_SEG_DATA_W-1:0] out_fifo_wr_cmd_data[2**OUTPUT_FIFO_AW]; + + logic [OUTPUT_FIFO_AW+1-1:0] done_count_reg = '0; + logic done_reg = 1'b0; + + assign ram_wr_cmd_ready_int[n] = !out_fifo_half_full_reg; + + assign dma_ram_wr.wr_cmd_sel[n] = ram_wr_cmd_sel_reg; + assign dma_ram_wr.wr_cmd_be[n] = ram_wr_cmd_be_reg; + assign dma_ram_wr.wr_cmd_addr[n] = ram_wr_cmd_addr_reg; + assign dma_ram_wr.wr_cmd_data[n] = ram_wr_cmd_data_reg; + assign dma_ram_wr.wr_cmd_valid[n] = ram_wr_cmd_valid_reg; + + assign out_done[n] = done_reg; + + always_ff @(posedge clk) begin + ram_wr_cmd_valid_reg <= ram_wr_cmd_valid_reg && !dma_ram_wr.wr_cmd_ready[n]; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1); + + if (!out_fifo_full && ram_wr_cmd_valid_int[n]) begin + out_fifo_wr_cmd_sel[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= ram_wr_cmd_sel_int[n]; + out_fifo_wr_cmd_be[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= ram_wr_cmd_be_int[n]; + out_fifo_wr_cmd_addr[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= ram_wr_cmd_addr_int[n]; + out_fifo_wr_cmd_data[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= ram_wr_cmd_data_int[n]; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!ram_wr_cmd_valid_reg || dma_ram_wr.wr_cmd_ready[n])) begin + ram_wr_cmd_sel_reg <= out_fifo_wr_cmd_sel[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + ram_wr_cmd_be_reg <= out_fifo_wr_cmd_be[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + ram_wr_cmd_addr_reg <= out_fifo_wr_cmd_addr[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + ram_wr_cmd_data_reg <= out_fifo_wr_cmd_data[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + ram_wr_cmd_valid_reg <= 1'b1; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + + if (done_count_reg < 2**OUTPUT_FIFO_AW && dma_ram_wr.wr_done[n] && !out_done_ack[n]) begin + done_count_reg <= done_count_reg + 1; + done_reg <= 1; + end else if (done_count_reg > 0 && !dma_ram_wr.wr_done[n] && out_done_ack[n]) begin + done_count_reg <= done_count_reg - 1; + done_reg <= done_count_reg > 1; + end + + if (rst) begin + out_fifo_wr_ptr_reg <= '0; + out_fifo_rd_ptr_reg <= '0; + ram_wr_cmd_valid_reg <= 1'b0; + done_count_reg <= '0; + done_reg <= 1'b0; + end + end + +end + +endmodule + +`resetall diff --git a/src/dma/rtl/taxi_dma_if_axi_wr.sv b/src/dma/rtl/taxi_dma_if_axi_wr.sv new file mode 100644 index 0000000..e25c602 --- /dev/null +++ b/src/dma/rtl/taxi_dma_if_axi_wr.sv @@ -0,0 +1,1159 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2021-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI DMA write interface + */ +module taxi_dma_if_axi_wr # +( + // Maximum AXI burst length to generate + parameter AXI_MAX_BURST_LEN = 256, + // Operation table size + parameter OP_TBL_SIZE = 32, + // Use AXI ID signals + parameter logic USE_AXI_ID = 1'b1 +) +( + input wire logic clk, + input wire logic rst, + + /* + * AXI master interface + */ + taxi_axi_if.wr_mst m_axi_wr, + + /* + * Write descriptor + */ + taxi_dma_desc_if.req_snk wr_desc_req, + taxi_dma_desc_if.sts_src wr_desc_sts, + + /* + * RAM interface + */ + taxi_dma_ram_if.rd_mst dma_ram_rd, + + /* + * Configuration + */ + input wire logic enable, + + /* + * Status + */ + output wire logic status_busy, + + /* + * Statistics + */ + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_op_start_tag, + output wire logic stat_wr_op_start_valid, + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag, + output wire logic [3:0] stat_wr_op_finish_status, + output wire logic stat_wr_op_finish_valid, + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_req_start_tag, + output wire logic [12:0] stat_wr_req_start_len, + output wire logic stat_wr_req_start_valid, + output wire logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag, + output wire logic [3:0] stat_wr_req_finish_status, + output wire logic stat_wr_req_finish_valid, + output wire logic stat_wr_op_tbl_full, + output wire logic stat_wr_tx_stall +); + +// TODO cleanup +// verilator lint_off WIDTHEXPAND + +// extract parameters +localparam AXI_DATA_W = m_axi_wr.DATA_W; +localparam AXI_ADDR_W = m_axi_wr.ADDR_W; +localparam AXI_STRB_W = m_axi_wr.STRB_W; +localparam AXI_ID_W = m_axi_wr.ID_W; +localparam AXI_MAX_BURST_LEN_INT = AXI_MAX_BURST_LEN < m_axi_wr.MAX_BURST_LEN ? AXI_MAX_BURST_LEN : m_axi_wr.MAX_BURST_LEN; + +localparam IMM_EN = wr_desc_req.IMM_EN; +localparam IMM_W = wr_desc_req.IMM_W; +localparam LEN_W = wr_desc_req.LEN_W; +localparam TAG_W = wr_desc_req.TAG_W; + +localparam RAM_SEGS = dma_ram_rd.SEGS; +localparam RAM_SEG_ADDR_W = dma_ram_rd.SEG_ADDR_W; +localparam RAM_SEG_DATA_W = dma_ram_rd.SEG_DATA_W; +localparam RAM_SEG_BE_W = dma_ram_rd.SEG_BE_W; +localparam RAM_SEL_W = dma_ram_rd.SEL_W; + +localparam RAM_ADDR_W = RAM_SEG_ADDR_W+$clog2(RAM_SEGS*RAM_SEG_BE_W); +localparam RAM_DATA_W = RAM_SEGS*RAM_SEG_DATA_W; +localparam RAM_WORD_W = RAM_SEG_BE_W; +localparam RAM_WORD_SIZE = RAM_SEG_DATA_W/RAM_WORD_W; + +localparam AXI_WORD_W = AXI_STRB_W; +localparam AXI_WORD_SIZE = AXI_DATA_W/AXI_WORD_W; +localparam AXI_BURST_SIZE = $clog2(AXI_STRB_W); +localparam AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN << AXI_BURST_SIZE; + +localparam OFFSET_W = AXI_STRB_W > 1 ? $clog2(AXI_STRB_W) : 1; +localparam OFFSET_MASK = AXI_STRB_W > 1 ? {OFFSET_W{1'b1}} : 0; +localparam RAM_OFFSET_W = $clog2(RAM_SEGS*RAM_SEG_BE_W); +localparam ADDR_MASK = {AXI_ADDR_W{1'b1}} << $clog2(AXI_STRB_W); +localparam CYCLE_COUNT_W = LEN_W - AXI_BURST_SIZE + 1; + +localparam MASK_FIFO_AW = $clog2(OP_TBL_SIZE)+1; + +localparam OP_TAG_W = $clog2(OP_TBL_SIZE); + +localparam OUTPUT_FIFO_AW = 5; + +// check configuration +if (AXI_WORD_SIZE * AXI_STRB_W != AXI_DATA_W) + $fatal(0, "Error: AXI data width not evenly divisible (instance %m)"); + +if (AXI_WORD_SIZE != RAM_WORD_SIZE) + $fatal(0, "Error: word size mismatch (instance %m)"); + +if (2**$clog2(AXI_WORD_W) != AXI_WORD_W) + $fatal(0, "Error: AXI word width must be even power of two (instance %m)"); + +if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256) + $fatal(0, "Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)"); + +if (RAM_SEGS < 2) + $fatal(0, "Error: RAM interface requires at least 2 segments (instance %m)"); + +if (RAM_DATA_W != AXI_DATA_W*2) + $fatal(0, "Error: RAM interface width must be double the AXI interface width (instance %m)"); + +if (2**$clog2(RAM_WORD_W) != RAM_WORD_W) + $fatal(0, "Error: RAM word width must be even power of two (instance %m)"); + +if (OP_TBL_SIZE > 2**AXI_ID_W) + $fatal(0, "Error: AXI_ID_W insufficient for requested OP_TBL_SIZE (instance %m)"); + +if (IMM_EN && IMM_W > AXI_DATA_W) + $fatal(0, "Error: IMM_W must not be larger than the AXI interface width (instance %m)"); + +if (wr_desc_req.SRC_ADDR_W < RAM_ADDR_W || wr_desc_req.DST_ADDR_W < AXI_ADDR_W) + $fatal(0, "Error: Descriptor address width is not sufficient (instance %m)"); + +localparam logic [1:0] + AXI_RESP_OKAY = 2'b00, + AXI_RESP_EXOKAY = 2'b01, + AXI_RESP_SLVERR = 2'b10, + AXI_RESP_DECERR = 2'b11; + +localparam logic [3:0] + DMA_ERROR_NONE = 4'd0, + DMA_ERROR_TIMEOUT = 4'd1, + DMA_ERROR_PARITY = 4'd2, + DMA_ERROR_AXI_RD_SLVERR = 4'd4, + DMA_ERROR_AXI_RD_DECERR = 4'd5, + DMA_ERROR_AXI_WR_SLVERR = 4'd6, + DMA_ERROR_AXI_WR_DECERR = 4'd7, + DMA_ERROR_PCIE_FLR = 4'd8, + DMA_ERROR_PCIE_CPL_POISONED = 4'd9, + DMA_ERROR_PCIE_CPL_STATUS_UR = 4'd10, + DMA_ERROR_PCIE_CPL_STATUS_CA = 4'd11; + +localparam logic [0:0] + REQ_STATE_IDLE = 1'd0, + REQ_STATE_START = 1'd1; + +logic [0:0] req_state_reg = REQ_STATE_IDLE, req_state_next; + +localparam logic [0:0] + READ_STATE_IDLE = 1'd0, + READ_STATE_READ = 1'd1; + +logic [0:0] read_state_reg = READ_STATE_IDLE, read_state_next; + +localparam logic [0:0] + AXI_STATE_IDLE = 1'd0, + AXI_STATE_TRANSFER = 1'd1; + +logic [0:0] axi_state_reg = AXI_STATE_IDLE, axi_state_next; + +// datapath control signals +logic mask_fifo_we; + +logic read_cmd_ready; + +logic [AXI_ADDR_W-1:0] req_axi_addr_reg = '0, req_axi_addr_next; +logic [RAM_SEL_W-1:0] ram_sel_reg = '0, ram_sel_next; +logic [RAM_ADDR_W-1:0] ram_addr_reg = '0, ram_addr_next; +logic [IMM_W-1:0] imm_reg = '0, imm_next; +logic imm_en_reg = 1'b0, imm_en_next; +logic [LEN_W-1:0] op_count_reg = '0, op_count_next; +logic zero_len_reg = 1'b0, zero_len_next; +logic [LEN_W-1:0] tr_count_reg = '0, tr_count_next; +logic [12:0] tr_word_count_reg = '0, tr_word_count_next; +logic [TAG_W-1:0] tag_reg = '0, tag_next; + +logic [AXI_ADDR_W-1:0] read_axi_addr_reg = '0, read_axi_addr_next; +logic [RAM_SEL_W-1:0] read_ram_sel_reg = '0, read_ram_sel_next; +logic [RAM_ADDR_W-1:0] read_ram_addr_reg = '0, read_ram_addr_next; +logic read_imm_en_reg = 1'b0, read_imm_en_next; +logic [12:0] read_len_reg = '0, read_len_next; +logic [RAM_SEGS-1:0] read_ram_mask_reg = '0, read_ram_mask_next; +logic [RAM_SEGS-1:0] read_ram_mask_0_reg = '0, read_ram_mask_0_next; +logic [RAM_SEGS-1:0] read_ram_mask_1_reg = '0, read_ram_mask_1_next; +logic ram_wrap_reg = 1'b0, ram_wrap_next; +logic [CYCLE_COUNT_W-1:0] read_cycle_count_reg = '0, read_cycle_count_next; +logic read_last_cycle_reg = 1'b0, read_last_cycle_next; +logic [OFFSET_W+1-1:0] cycle_byte_count_reg = '0, cycle_byte_count_next; +logic [RAM_OFFSET_W-1:0] start_offset_reg = '0, start_offset_next; +logic [RAM_OFFSET_W-1:0] end_offset_reg = '0, end_offset_next; + +logic [AXI_ADDR_W-1:0] axi_addr_reg = '0, axi_addr_next; +logic [IMM_W-1:0] axi_imm_reg = '0, axi_imm_next; +logic axi_imm_en_reg = 1'b0, axi_imm_en_next; +logic [12:0] axi_len_reg = '0, axi_len_next; +logic axi_zero_len_reg = 1'b0, axi_zero_len_next; +logic [RAM_OFFSET_W-1:0] offset_reg = '0, offset_next; +logic [AXI_STRB_W-1:0] strb_offset_mask_reg = '1, strb_offset_mask_next; +logic [OFFSET_W-1:0] last_cycle_offset_reg = '0, last_cycle_offset_next; +logic [RAM_SEGS-1:0] ram_mask_reg = '0, ram_mask_next; +logic ram_mask_valid_reg = 1'b0, ram_mask_valid_next; +logic [CYCLE_COUNT_W-1:0] cycle_count_reg = '0, cycle_count_next; +logic last_cycle_reg = 1'b0, last_cycle_next; + +logic [AXI_ADDR_W-1:0] read_cmd_axi_addr_reg = '0, read_cmd_axi_addr_next; +logic [RAM_SEL_W-1:0] read_cmd_ram_sel_reg = '0, read_cmd_ram_sel_next; +logic [RAM_ADDR_W-1:0] read_cmd_ram_addr_reg = '0, read_cmd_ram_addr_next; +logic read_cmd_imm_en_reg = 1'b0, read_cmd_imm_en_next; +logic [12:0] read_cmd_len_reg = '0, read_cmd_len_next; +logic [CYCLE_COUNT_W-1:0] read_cmd_cycle_count_reg = '0, read_cmd_cycle_count_next; +logic read_cmd_last_cycle_reg = 1'b0, read_cmd_last_cycle_next; +logic read_cmd_valid_reg = 1'b0, read_cmd_valid_next; + +logic [MASK_FIFO_AW+1-1:0] mask_fifo_wr_ptr_reg = '0; +logic [MASK_FIFO_AW+1-1:0] mask_fifo_rd_ptr_reg = '0, mask_fifo_rd_ptr_next; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [RAM_SEGS-1:0] mask_fifo_mask[2**MASK_FIFO_AW]; +logic [RAM_SEGS-1:0] mask_fifo_wr_mask; + +wire mask_fifo_empty = mask_fifo_wr_ptr_reg == mask_fifo_rd_ptr_reg; +wire mask_fifo_full = mask_fifo_wr_ptr_reg == (mask_fifo_rd_ptr_reg ^ (1 << MASK_FIFO_AW)); + +logic [OP_TAG_W+1-1:0] active_op_count_reg = '0; +logic inc_active_op; +logic dec_active_op; + +logic [AXI_ID_W-1:0] m_axi_awid_reg = '0, m_axi_awid_next; +logic [AXI_ADDR_W-1:0] m_axi_awaddr_reg = '0, m_axi_awaddr_next; +logic [7:0] m_axi_awlen_reg = '0, m_axi_awlen_next; +logic m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next; +logic m_axi_bready_reg = 1'b0, m_axi_bready_next; + +logic wr_desc_req_ready_reg = 1'b0, wr_desc_req_ready_next; + +logic [TAG_W-1:0] wr_desc_sts_tag_reg = '0, wr_desc_sts_tag_next; +logic [3:0] wr_desc_sts_error_reg = 4'd0, wr_desc_sts_error_next; +logic wr_desc_sts_valid_reg = 1'b0, wr_desc_sts_valid_next; + +logic [RAM_SEGS-1:0][RAM_SEL_W-1:0] ram_rd_cmd_sel_reg = '0, ram_rd_cmd_sel_next; +logic [RAM_SEGS-1:0][RAM_SEG_ADDR_W-1:0] ram_rd_cmd_addr_reg = '0, ram_rd_cmd_addr_next; +logic [RAM_SEGS-1:0] ram_rd_cmd_valid_reg = '0, ram_rd_cmd_valid_next; +logic [RAM_SEGS-1:0] ram_rd_resp_ready_cmb; + +logic status_busy_reg = 1'b0; + +logic [OP_TAG_W-1:0] stat_wr_op_start_tag_reg = '0, stat_wr_op_start_tag_next; +logic stat_wr_op_start_valid_reg = 1'b0, stat_wr_op_start_valid_next; +logic [OP_TAG_W-1:0] stat_wr_op_finish_tag_reg = '0, stat_wr_op_finish_tag_next; +logic [3:0] stat_wr_op_finish_status_reg = '0, stat_wr_op_finish_status_next; +logic stat_wr_op_finish_valid_reg = 1'b0, stat_wr_op_finish_valid_next; +logic [OP_TAG_W-1:0] stat_wr_req_start_tag_reg = '0, stat_wr_req_start_tag_next; +logic [12:0] stat_wr_req_start_len_reg = '0, stat_wr_req_start_len_next; +logic stat_wr_req_start_valid_reg = 1'b0, stat_wr_req_start_valid_next; +logic [OP_TAG_W-1:0] stat_wr_req_finish_tag_reg = '0, stat_wr_req_finish_tag_next; +logic [3:0] stat_wr_req_finish_status_reg = '0, stat_wr_req_finish_status_next; +logic stat_wr_req_finish_valid_reg = 1'b0, stat_wr_req_finish_valid_next; +logic stat_wr_op_tbl_full_reg = 1'b0, stat_wr_op_tbl_full_next; +logic stat_wr_tx_stall_reg = 1'b0, stat_wr_tx_stall_next; + +// internal datapath +logic [AXI_DATA_W-1:0] m_axi_wdata_int; +logic [AXI_STRB_W-1:0] m_axi_wstrb_int; +logic m_axi_wlast_int; +logic m_axi_wvalid_int; +wire m_axi_wready_int; + +assign m_axi_wr.awid = USE_AXI_ID ? m_axi_awid_reg : '0; +assign m_axi_wr.awaddr = m_axi_awaddr_reg; +assign m_axi_wr.awlen = m_axi_awlen_reg; +assign m_axi_wr.awsize = 3'(AXI_BURST_SIZE); +assign m_axi_wr.awburst = 2'b01; +assign m_axi_wr.awlock = 1'b0; +assign m_axi_wr.awcache = 4'b0011; +assign m_axi_wr.awprot = 3'b010; +assign m_axi_wr.awvalid = m_axi_awvalid_reg; +assign m_axi_wr.bready = m_axi_bready_reg; + +assign wr_desc_req.req_ready = wr_desc_req_ready_reg; + +assign wr_desc_sts.sts_tag = wr_desc_sts_tag_reg; +assign wr_desc_sts.sts_error = wr_desc_sts_error_reg; +assign wr_desc_sts.sts_valid = wr_desc_sts_valid_reg; + +assign dma_ram_rd.rd_cmd_sel = ram_rd_cmd_sel_reg; +assign dma_ram_rd.rd_cmd_addr = ram_rd_cmd_addr_reg; +assign dma_ram_rd.rd_cmd_valid = ram_rd_cmd_valid_reg; +assign dma_ram_rd.rd_resp_ready = ram_rd_resp_ready_cmb; + +assign status_busy = status_busy_reg; + +assign stat_wr_op_start_tag = stat_wr_op_start_tag_reg; +assign stat_wr_op_start_valid = stat_wr_op_start_valid_reg; +assign stat_wr_op_finish_tag = stat_wr_op_finish_tag_reg; +assign stat_wr_op_finish_status = stat_wr_op_finish_status_reg; +assign stat_wr_op_finish_valid = stat_wr_op_finish_valid_reg; +assign stat_wr_req_start_tag = stat_wr_req_start_tag_reg; +assign stat_wr_req_start_len = stat_wr_req_start_len_reg; +assign stat_wr_req_start_valid = stat_wr_req_start_valid_reg; +assign stat_wr_req_finish_tag = stat_wr_req_finish_tag_reg; +assign stat_wr_req_finish_status = stat_wr_req_finish_status_reg; +assign stat_wr_req_finish_valid = stat_wr_req_finish_valid_reg; +assign stat_wr_op_tbl_full = stat_wr_op_tbl_full_reg; +assign stat_wr_tx_stall = stat_wr_tx_stall_reg; + +// operation tag management +logic [OP_TAG_W+1-1:0] op_tbl_start_ptr_reg = '0; +logic [AXI_ADDR_W-1:0] op_tbl_start_axi_addr; +logic [IMM_W-1:0] op_tbl_start_imm; +logic op_tbl_start_imm_en; +logic [12:0] op_tbl_start_len; +logic op_tbl_start_zero_len; +logic [CYCLE_COUNT_W-1:0] op_tbl_start_cycle_count; +logic [RAM_OFFSET_W-1:0] op_tbl_start_offset; +logic [TAG_W-1:0] op_tbl_start_tag; +logic op_tbl_start_last; +logic op_tbl_start_en; +logic [OP_TAG_W+1-1:0] op_tbl_tx_start_ptr_reg = '0; +logic op_tbl_tx_start_en; +logic [OP_TAG_W+1-1:0] op_tbl_tx_finish_ptr_reg = '0; +logic op_tbl_tx_finish_en; +logic [OP_TAG_W-1:0] op_tbl_write_complete_ptr; +logic [3:0] op_tbl_write_complete_error; +logic op_tbl_write_complete_en; +logic [OP_TAG_W+1-1:0] op_tbl_finish_ptr_reg = '0; +logic op_tbl_finish_en; + +logic [2**OP_TAG_W-1:0] op_tbl_active = '0; +logic [2**OP_TAG_W-1:0] op_tbl_write_complete = '0; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXI_ADDR_W-1:0] op_tbl_axi_addr[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [IMM_W-1:0] op_tbl_imm[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_imm_en[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [12:0] op_tbl_len[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_zero_len[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [CYCLE_COUNT_W-1:0] op_tbl_cycle_count[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [RAM_OFFSET_W-1:0] op_tbl_offset[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [TAG_W-1:0] op_tbl_tag[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic op_tbl_last[2**OP_TAG_W]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [3:0] op_tbl_error_code[2**OP_TAG_W]; + +initial begin + for (integer i = 0; i < 2**OP_TAG_W; i = i + 1) begin + op_tbl_axi_addr[i] = '0; + op_tbl_imm[i] = '0; + op_tbl_imm_en[i] = '0; + op_tbl_len[i] = '0; + op_tbl_zero_len[i] = 1'b0; + op_tbl_cycle_count[i] = '0; + op_tbl_offset[i] = '0; + op_tbl_tag[i] = '0; + op_tbl_last[i] = '0; + op_tbl_error_code[i] = '0; + end +end + +always_comb begin + req_state_next = REQ_STATE_IDLE; + + wr_desc_req_ready_next = 1'b0; + + stat_wr_op_start_tag_next = stat_wr_op_start_tag_reg; + stat_wr_op_start_valid_next = 1'b0; + stat_wr_req_start_tag_next = stat_wr_req_start_tag_reg; + stat_wr_req_start_len_next = stat_wr_req_start_len_reg; + stat_wr_req_start_valid_next = 1'b0; + stat_wr_op_tbl_full_next = !(!op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W)); + stat_wr_tx_stall_next = (m_axi.awvalid && !m_axi.awready) || (m_axi.wvalid && !m_axi.wready); + + tag_next = tag_reg; + req_axi_addr_next = req_axi_addr_reg; + ram_sel_next = ram_sel_reg; + ram_addr_next = ram_addr_reg; + imm_next = imm_reg; + imm_en_next = imm_en_reg; + op_count_next = op_count_reg; + zero_len_next = zero_len_reg; + tr_count_next = tr_count_reg; + tr_word_count_next = tr_word_count_reg; + + read_cmd_axi_addr_next = read_cmd_axi_addr_reg; + read_cmd_ram_sel_next = read_cmd_ram_sel_reg; + read_cmd_ram_addr_next = read_cmd_ram_addr_reg; + read_cmd_imm_en_next = read_cmd_imm_en_reg; + read_cmd_len_next = read_cmd_len_reg; + read_cmd_cycle_count_next = read_cmd_cycle_count_reg; + read_cmd_last_cycle_next = read_cmd_last_cycle_reg; + read_cmd_valid_next = read_cmd_valid_reg && !read_cmd_ready; + + op_tbl_start_axi_addr = req_axi_addr_reg; + op_tbl_start_imm = imm_reg; + op_tbl_start_imm_en = imm_en_reg; + op_tbl_start_len = '0; + op_tbl_start_zero_len = zero_len_reg; + op_tbl_start_cycle_count = '0; + op_tbl_start_offset = RAM_OFFSET_W'(req_axi_addr_reg & OFFSET_MASK) - RAM_OFFSET_W'(ram_addr_reg); + op_tbl_start_tag = tag_reg; + op_tbl_start_last = '0; + op_tbl_start_en = 1'b0; + + inc_active_op = 1'b0; + + // TLP segmentation + case (req_state_reg) + REQ_STATE_IDLE: begin + // idle state, wait for incoming descriptor + wr_desc_req_ready_next = !op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W) && enable; + + req_axi_addr_next = wr_desc_req.req_dst_addr; + if (IMM_EN && wr_desc_req.req_imm_en) begin + ram_sel_next = '0; + ram_addr_next = '0; + end else begin + ram_sel_next = wr_desc_req.req_src_sel; + ram_addr_next = wr_desc_req.req_src_addr; + end + imm_next = wr_desc_req.req_imm; + imm_en_next = IMM_EN && wr_desc_req.req_imm_en; + if (wr_desc_req.req_len == 0) begin + // zero-length operation + op_count_next = 1; + zero_len_next = 1'b1; + end else begin + op_count_next = wr_desc_req.req_len; + zero_len_next = 1'b0; + end + tag_next = wr_desc_req.req_tag; + + if (op_count_next <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(op_count_next & 12'hfff)) >> 12 != 0 || op_count_next >> 12 != 0) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = 13'(op_count_next); + end + end else begin + // packet larger than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(req_axi_addr_next & OFFSET_MASK); + end + end + + if (wr_desc_req.req_ready & wr_desc_req.req_valid) begin + wr_desc_req_ready_next = 1'b0; + + stat_wr_op_start_tag_next = stat_wr_op_start_tag_reg+1; + stat_wr_op_start_valid_next = 1'b1; + + req_state_next = REQ_STATE_START; + end else begin + req_state_next = REQ_STATE_IDLE; + end + end + REQ_STATE_START: begin + // start state, compute length + if (!op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W) && (ram_rd_cmd_valid_reg == 0 || dma_ram_rd.rd_cmd_ready != 0) && (!read_cmd_valid_reg || read_cmd_ready)) begin + read_cmd_axi_addr_next = req_axi_addr_reg; + read_cmd_ram_sel_next = ram_sel_reg; + read_cmd_ram_addr_next = ram_addr_reg; + read_cmd_imm_en_next = imm_en_reg; + read_cmd_len_next = tr_word_count_next; + read_cmd_cycle_count_next = CYCLE_COUNT_W'(tr_word_count_next + 13'(req_axi_addr_reg & OFFSET_MASK) - 13'd1) >> AXI_BURST_SIZE; + op_tbl_start_cycle_count = read_cmd_cycle_count_next; + read_cmd_last_cycle_next = read_cmd_cycle_count_next == 0; + read_cmd_valid_next = 1'b1; + + req_axi_addr_next = req_axi_addr_reg + AXI_ADDR_W'(tr_word_count_next); + ram_addr_next = ram_addr_reg + RAM_ADDR_W'(tr_word_count_next); + op_count_next = op_count_reg - LEN_W'(tr_word_count_next); + + op_tbl_start_axi_addr = req_axi_addr_reg; + op_tbl_start_imm = imm_reg; + op_tbl_start_imm_en = imm_en_reg; + op_tbl_start_len = tr_word_count_next; + op_tbl_start_zero_len = zero_len_reg; + op_tbl_start_offset = RAM_OFFSET_W'(req_axi_addr_reg & OFFSET_MASK)-ram_addr_reg[RAM_OFFSET_W-1:0]; + op_tbl_start_tag = tag_reg; + op_tbl_start_last = op_count_reg == LEN_W'(tr_word_count_next); + op_tbl_start_en = 1'b1; + inc_active_op = 1'b1; + + stat_wr_req_start_tag_next = op_tbl_start_ptr_reg[OP_TAG_W-1:0]; + stat_wr_req_start_len_next = zero_len_reg ? '0 : tr_word_count_next; + stat_wr_req_start_valid_next = 1'b1; + + if (op_count_next <= LEN_W'(AXI_MAX_BURST_SIZE) - LEN_W'(req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin + // packet smaller than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(op_count_next & 12'hfff)) >> 12 != 0 || op_count_next >> 12 != 0) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = 13'(op_count_next); + end + end else begin + // packet larger than max burst size + if ((12'(req_axi_addr_next & 12'hfff) + 12'(AXI_MAX_BURST_SIZE)) >> 12 != 0) begin + // crosses 4k boundary + tr_word_count_next = 13'h1000 - req_axi_addr_next[11:0]; + end else begin + // does not cross 4k boundary + tr_word_count_next = 13'(AXI_MAX_BURST_SIZE) - 13'(req_axi_addr_next & OFFSET_MASK); + end + end + + if (!op_tbl_start_last) begin + req_state_next = REQ_STATE_START; + end else begin + wr_desc_req_ready_next = !op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] && ($unsigned(op_tbl_start_ptr_reg - op_tbl_finish_ptr_reg) < 2**OP_TAG_W) && enable; + req_state_next = REQ_STATE_IDLE; + end + end else begin + req_state_next = REQ_STATE_START; + end + end + endcase +end + +always_comb begin + read_state_next = READ_STATE_IDLE; + + read_cmd_ready = 1'b0; + + ram_rd_cmd_sel_next = ram_rd_cmd_sel_reg; + ram_rd_cmd_addr_next = ram_rd_cmd_addr_reg; + ram_rd_cmd_valid_next = ram_rd_cmd_valid_reg & ~dma_ram_rd.rd_cmd_ready; + + read_axi_addr_next = read_axi_addr_reg; + read_ram_sel_next = read_ram_sel_reg; + read_ram_addr_next = read_ram_addr_reg; + read_imm_en_next = read_imm_en_reg; + read_len_next = read_len_reg; + read_ram_mask_next = read_ram_mask_reg; + read_ram_mask_0_next = read_ram_mask_0_reg; + read_ram_mask_1_next = read_ram_mask_1_reg; + ram_wrap_next = ram_wrap_reg; + read_cycle_count_next = read_cycle_count_reg; + read_last_cycle_next = read_last_cycle_reg; + cycle_byte_count_next = cycle_byte_count_reg; + start_offset_next = start_offset_reg; + end_offset_next = end_offset_reg; + + mask_fifo_wr_mask = read_ram_mask_reg; + mask_fifo_we = 1'b0; + + // Read request generation + case (read_state_reg) + READ_STATE_IDLE: begin + // idle state, wait for read command + + read_axi_addr_next = read_cmd_axi_addr_reg; + read_ram_sel_next = read_cmd_ram_sel_reg; + read_ram_addr_next = read_cmd_ram_addr_reg; + read_imm_en_next = read_cmd_imm_en_reg; + read_len_next = read_cmd_len_reg; + read_cycle_count_next = read_cmd_cycle_count_reg; + read_last_cycle_next = read_cmd_last_cycle_reg; + + if (read_len_next > 13'(AXI_STRB_W)-13'(read_axi_addr_next & OFFSET_MASK)) begin + cycle_byte_count_next = (OFFSET_W+1)'(AXI_STRB_W)-(OFFSET_W+1)'(read_axi_addr_next & OFFSET_MASK); + end else begin + cycle_byte_count_next = (OFFSET_W+1)'(read_len_next); + end + start_offset_next = RAM_OFFSET_W'(read_ram_addr_next); + {ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1; + + read_ram_mask_0_next = {RAM_SEGS{1'b1}} << (start_offset_next >> $clog2(RAM_SEG_BE_W)); + read_ram_mask_1_next = {RAM_SEGS{1'b1}} >> (RAM_SEGS-1-(end_offset_next >> $clog2(RAM_SEG_BE_W))); + + if (!ram_wrap_next) begin + read_ram_mask_next = read_ram_mask_0_next & read_ram_mask_1_next; + read_ram_mask_0_next = read_ram_mask_0_next & read_ram_mask_1_next; + read_ram_mask_1_next = '0; + end else begin + read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next; + end + + if (read_cmd_valid_reg) begin + read_cmd_ready = 1'b1; + read_state_next = READ_STATE_READ; + end else begin + read_state_next = READ_STATE_IDLE; + end + end + READ_STATE_READ: begin + // read state - start new read operations + + if ((dma_ram_rd.rd_cmd_valid & ~dma_ram_rd.rd_cmd_ready & read_ram_mask_reg) == 0 && !mask_fifo_full) begin + + // update counters + read_ram_addr_next = read_ram_addr_reg + RAM_ADDR_W'(cycle_byte_count_reg); + read_len_next = read_len_reg - 13'(cycle_byte_count_reg); + read_cycle_count_next = read_cycle_count_reg - 1; + read_last_cycle_next = read_cycle_count_next == 0; + + for (integer i = 0; i < RAM_SEGS; i = i + 1) begin + if (read_ram_mask_reg[i]) begin + ram_rd_cmd_sel_next[i] = read_ram_sel_reg; + ram_rd_cmd_addr_next[i] = read_ram_addr_reg[RAM_ADDR_W-1:RAM_ADDR_W-RAM_SEG_ADDR_W]; + ram_rd_cmd_valid_next[i] = !(IMM_EN && read_imm_en_reg); + end + if (read_ram_mask_1_reg[i]) begin + ram_rd_cmd_addr_next[i] = read_ram_addr_reg[RAM_ADDR_W-1:RAM_ADDR_W-RAM_SEG_ADDR_W]+1; + end + end + + mask_fifo_wr_mask = (IMM_EN && read_imm_en_reg) ? 0 : read_ram_mask_reg; + mask_fifo_we = 1'b1; + + if (read_len_next > 13'(AXI_STRB_W)) begin + cycle_byte_count_next = (OFFSET_W+1)'(AXI_STRB_W); + end else begin + cycle_byte_count_next = (OFFSET_W+1)'(read_len_next); + end + start_offset_next = RAM_OFFSET_W'(read_ram_addr_next); + {ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1; + + read_ram_mask_0_next = {RAM_SEGS{1'b1}} << (start_offset_next >> $clog2(RAM_SEG_BE_W)); + read_ram_mask_1_next = {RAM_SEGS{1'b1}} >> (RAM_SEGS-1-(end_offset_next >> $clog2(RAM_SEG_BE_W))); + + if (!ram_wrap_next) begin + read_ram_mask_next = read_ram_mask_0_next & read_ram_mask_1_next; + read_ram_mask_0_next = read_ram_mask_0_next & read_ram_mask_1_next; + read_ram_mask_1_next = '0; + end else begin + read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next; + end + + if (!read_last_cycle_reg) begin + read_state_next = READ_STATE_READ; + end else if (read_cmd_valid_reg) begin + + read_axi_addr_next = read_cmd_axi_addr_reg; + read_ram_sel_next = read_cmd_ram_sel_reg; + read_ram_addr_next = read_cmd_ram_addr_reg; + read_imm_en_next = read_cmd_imm_en_reg; + read_len_next = read_cmd_len_reg; + read_cycle_count_next = read_cmd_cycle_count_reg; + read_last_cycle_next = read_cmd_last_cycle_reg; + + if (read_len_next > 13'(AXI_STRB_W)-13'(read_axi_addr_next & OFFSET_MASK)) begin + cycle_byte_count_next = (OFFSET_W+1)'(AXI_STRB_W)-(OFFSET_W+1)'(read_axi_addr_next & OFFSET_MASK); + end else begin + cycle_byte_count_next = (OFFSET_W+1)'(read_len_next); + end + start_offset_next = RAM_OFFSET_W'(read_ram_addr_next); + {ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1; + + read_ram_mask_0_next = {RAM_SEGS{1'b1}} << (start_offset_next >> $clog2(RAM_SEG_BE_W)); + read_ram_mask_1_next = {RAM_SEGS{1'b1}} >> (RAM_SEGS-1-(end_offset_next >> $clog2(RAM_SEG_BE_W))); + + if (!ram_wrap_next) begin + read_ram_mask_next = read_ram_mask_0_next & read_ram_mask_1_next; + read_ram_mask_0_next = read_ram_mask_0_next & read_ram_mask_1_next; + read_ram_mask_1_next = '0; + end else begin + read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next; + end + + read_cmd_ready = 1'b1; + + read_state_next = READ_STATE_READ; + end else begin + read_state_next = READ_STATE_IDLE; + end + end else begin + read_state_next = READ_STATE_READ; + end + end + endcase +end + +always_comb begin + axi_state_next = AXI_STATE_IDLE; + + ram_rd_resp_ready_cmb = '0; + + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg; + stat_wr_op_finish_status_next = stat_wr_op_finish_status_reg; + stat_wr_op_finish_valid_next = 1'b0; + stat_wr_req_finish_tag_next = stat_wr_req_finish_tag_reg; + stat_wr_req_finish_status_next = stat_wr_req_finish_status_reg; + stat_wr_req_finish_valid_next = 1'b0; + + axi_addr_next = axi_addr_reg; + axi_imm_next = axi_imm_reg; + axi_imm_en_next = axi_imm_en_reg; + axi_len_next = axi_len_reg; + axi_zero_len_next = axi_zero_len_reg; + offset_next = offset_reg; + strb_offset_mask_next = strb_offset_mask_reg; + last_cycle_offset_next = last_cycle_offset_reg; + ram_mask_next = ram_mask_reg; + ram_mask_valid_next = ram_mask_valid_reg; + cycle_count_next = cycle_count_reg; + last_cycle_next = last_cycle_reg; + + mask_fifo_rd_ptr_next = mask_fifo_rd_ptr_reg; + + op_tbl_tx_start_en = 1'b0; + op_tbl_tx_finish_en = 1'b0; + + m_axi_awid_next = m_axi_awid_reg; + m_axi_awaddr_next = m_axi_awaddr_reg; + m_axi_awlen_next = m_axi_awlen_reg; + m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_wr.awready; + m_axi_bready_next = 1'b0; + + m_axi_wdata_int = AXI_DATA_W'(((IMM_EN && axi_imm_en_reg) ? {2{RAM_DATA_W'(axi_imm_reg)}} : {2{dma_ram_rd.rd_resp_data}}) >> (RAM_DATA_W-offset_reg*AXI_WORD_SIZE)); + m_axi_wstrb_int = strb_offset_mask_reg; + m_axi_wlast_int = 1'b0; + m_axi_wvalid_int = 1'b0; + + // read response processing and AXI write generation + case (axi_state_reg) + AXI_STATE_IDLE: begin + // idle state, wait for command + ram_rd_resp_ready_cmb = '0; + + axi_addr_next = op_tbl_axi_addr[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_imm_next = op_tbl_imm[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_imm_en_next = op_tbl_imm_en[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_len_next = op_tbl_len[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_zero_len_next = op_tbl_zero_len[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + offset_next = op_tbl_offset[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + strb_offset_mask_next = axi_zero_len_next ? '0 : ({AXI_STRB_W{1'b1}} << (axi_addr_next & OFFSET_MASK)); + last_cycle_offset_next = OFFSET_W'(axi_addr_next) + OFFSET_W'(axi_len_next & OFFSET_MASK); + cycle_count_next = op_tbl_cycle_count[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + last_cycle_next = op_tbl_cycle_count[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]] == 0; + + if (op_tbl_active[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]] && op_tbl_tx_start_ptr_reg != op_tbl_start_ptr_reg && (!m_axi_awvalid_reg || m_axi_wr.awready)) begin + m_axi_awid_next = op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]; + m_axi_awaddr_next = axi_addr_next; + m_axi_awlen_next = 8'(cycle_count_next); + m_axi_awvalid_next = 1'b1; + op_tbl_tx_start_en = 1'b1; + axi_state_next = AXI_STATE_TRANSFER; + end else begin + axi_state_next = AXI_STATE_IDLE; + end + end + AXI_STATE_TRANSFER: begin + // transfer state, transfer data + ram_rd_resp_ready_cmb = '0; + + if ((ram_mask_reg & ~dma_ram_rd.rd_resp_valid) == 0 && ram_mask_valid_reg && m_axi_wready_int) begin + // transfer in read data + ram_rd_resp_ready_cmb = ram_mask_reg; + ram_mask_valid_next = 1'b0; + + // update counters + cycle_count_next = cycle_count_reg - 1; + last_cycle_next = cycle_count_next == 0; + offset_next = offset_reg + RAM_OFFSET_W'(AXI_STRB_W); + strb_offset_mask_next = '1; + + m_axi_wdata_int = AXI_DATA_W'(((IMM_EN && axi_imm_en_reg) ? {2{RAM_DATA_W'(axi_imm_reg)}} : {2{dma_ram_rd.rd_resp_data}}) >> (RAM_DATA_W-offset_reg*AXI_WORD_SIZE)); + m_axi_wstrb_int = strb_offset_mask_reg; + m_axi_wlast_int = 1'b0; + m_axi_wvalid_int = 1'b1; + + if (last_cycle_reg) begin + // no more data to transfer, finish operation + m_axi_wlast_int = 1'b1; + op_tbl_tx_finish_en = 1'b1; + + if (last_cycle_offset_reg != 0) begin + m_axi_wstrb_int = strb_offset_mask_reg & {AXI_STRB_W{1'b1}} >> ((OFFSET_W+1)'(AXI_STRB_W) - last_cycle_offset_reg); + end + + // skip idle state if possible + axi_addr_next = op_tbl_axi_addr[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_imm_next = op_tbl_imm[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_imm_en_next = op_tbl_imm_en[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_len_next = op_tbl_len[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + axi_zero_len_next = op_tbl_zero_len[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + offset_next = op_tbl_offset[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + strb_offset_mask_next = axi_zero_len_next ? '0 : ({AXI_STRB_W{1'b1}} << (axi_addr_next & OFFSET_MASK)); + last_cycle_offset_next = OFFSET_W'(axi_addr_next) + OFFSET_W'(axi_len_next & OFFSET_MASK); + cycle_count_next = op_tbl_cycle_count[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]]; + last_cycle_next = op_tbl_cycle_count[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]] == 0; + + if (op_tbl_active[op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]] && op_tbl_tx_start_ptr_reg != op_tbl_start_ptr_reg && (!m_axi_awvalid_reg || m_axi_wr.awready)) begin + m_axi_awid_next = op_tbl_tx_start_ptr_reg[OP_TAG_W-1:0]; + m_axi_awaddr_next = axi_addr_next; + m_axi_awlen_next = 8'(cycle_count_next); + m_axi_awvalid_next = 1'b1; + op_tbl_tx_start_en = 1'b1; + axi_state_next = AXI_STATE_TRANSFER; + end else begin + axi_state_next = AXI_STATE_IDLE; + end + end else begin + axi_state_next = AXI_STATE_TRANSFER; + end + end else begin + axi_state_next = AXI_STATE_TRANSFER; + end + end + endcase + + if (!ram_mask_valid_next && !mask_fifo_empty) begin + ram_mask_next = mask_fifo_mask[mask_fifo_rd_ptr_reg[MASK_FIFO_AW-1:0]]; + ram_mask_valid_next = 1'b1; + mask_fifo_rd_ptr_next = mask_fifo_rd_ptr_reg+1; + end + + op_tbl_write_complete_ptr = m_axi_wr.bid; + if (m_axi_wr.bresp == AXI_RESP_SLVERR) begin + op_tbl_write_complete_error = DMA_ERROR_AXI_WR_SLVERR; + end else if (m_axi_wr.bresp == AXI_RESP_DECERR) begin + op_tbl_write_complete_error = DMA_ERROR_AXI_WR_DECERR; + end else begin + op_tbl_write_complete_error = DMA_ERROR_NONE; + end + op_tbl_write_complete_en = 1'b0; + + wr_desc_sts_tag_next = op_tbl_tag[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + if (wr_desc_sts_valid_reg) begin + wr_desc_sts_error_next = DMA_ERROR_NONE; + end else begin + wr_desc_sts_error_next = wr_desc_sts_error_reg; + end + wr_desc_sts_valid_next = 1'b0; + + stat_wr_req_finish_status_next = op_tbl_write_complete_error; + stat_wr_req_finish_valid_next = 1'b0; + + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg; + stat_wr_op_finish_status_next = wr_desc_sts_error_next; + stat_wr_op_finish_valid_next = 1'b0; + + if (USE_AXI_ID) begin + // accept write completions + stat_wr_req_finish_tag_next = m_axi_wr.bid; + + m_axi_bready_next = 1'b1; + if (m_axi_wr.bready && m_axi_wr.bvalid) begin + op_tbl_write_complete_ptr = m_axi_wr.bid; + op_tbl_write_complete_en = 1'b1; + stat_wr_req_finish_valid_next = 1'b1; + end + + // commit operations in-order + op_tbl_finish_en = 1'b0; + dec_active_op = 1'b0; + + if (op_tbl_active[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] && op_tbl_write_complete[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] && op_tbl_finish_ptr_reg != op_tbl_tx_finish_ptr_reg) begin + op_tbl_finish_en = 1'b1; + dec_active_op = 1'b1; + + if (op_tbl_error_code[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] != DMA_ERROR_NONE) begin + wr_desc_sts_error_next = op_tbl_error_code[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + end + + stat_wr_op_finish_status_next = wr_desc_sts_error_next; + + if (op_tbl_last[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]) begin + wr_desc_sts_tag_next = op_tbl_tag[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + wr_desc_sts_valid_next = 1'b1; + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg + 1; + stat_wr_op_finish_valid_next = 1'b1; + end + end + end else begin + // accept write completions + op_tbl_finish_en = 1'b0; + dec_active_op = 1'b0; + + stat_wr_req_finish_tag_next = op_tbl_finish_ptr_reg[OP_TAG_W-1:0]; + + m_axi_bready_next = 1'b1; + if (m_axi_wr.bready && m_axi_wr.bvalid) begin + op_tbl_finish_en = 1'b1; + dec_active_op = 1'b1; + stat_wr_req_finish_valid_next = 1'b1; + + if (m_axi_wr.bresp == AXI_RESP_SLVERR) begin + wr_desc_sts_error_next = DMA_ERROR_AXI_WR_SLVERR; + end else if (m_axi_wr.bresp == AXI_RESP_DECERR) begin + wr_desc_sts_error_next = DMA_ERROR_AXI_WR_DECERR; + end + + stat_wr_op_finish_status_next = wr_desc_sts_error_next; + + if (op_tbl_last[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]) begin + wr_desc_sts_tag_next = op_tbl_tag[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]]; + wr_desc_sts_valid_next = 1'b1; + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg + 1; + stat_wr_op_finish_valid_next = 1'b1; + end + end + end +end + +always_ff @(posedge clk) begin + req_state_reg <= req_state_next; + read_state_reg <= read_state_next; + axi_state_reg <= axi_state_next; + + req_axi_addr_reg <= req_axi_addr_next; + ram_sel_reg <= ram_sel_next; + ram_addr_reg <= ram_addr_next; + imm_reg <= imm_next; + imm_en_reg <= imm_en_next; + op_count_reg <= op_count_next; + zero_len_reg <= zero_len_next; + tr_count_reg <= tr_count_next; + tr_word_count_reg <= tr_word_count_next; + tag_reg <= tag_next; + + read_axi_addr_reg <= read_axi_addr_next; + read_ram_sel_reg <= read_ram_sel_next; + read_ram_addr_reg <= read_ram_addr_next; + read_imm_en_reg <= read_imm_en_next; + read_len_reg <= read_len_next; + read_ram_mask_reg <= read_ram_mask_next; + read_ram_mask_0_reg <= read_ram_mask_0_next; + read_ram_mask_1_reg <= read_ram_mask_1_next; + ram_wrap_reg <= ram_wrap_next; + read_cycle_count_reg <= read_cycle_count_next; + read_last_cycle_reg <= read_last_cycle_next; + cycle_byte_count_reg <= cycle_byte_count_next; + start_offset_reg <= start_offset_next; + end_offset_reg <= end_offset_next; + + axi_addr_reg <= axi_addr_next; + axi_imm_reg <= axi_imm_next; + axi_imm_en_reg <= axi_imm_en_next; + axi_len_reg <= axi_len_next; + axi_zero_len_reg <= axi_zero_len_next; + offset_reg <= offset_next; + strb_offset_mask_reg <= strb_offset_mask_next; + last_cycle_offset_reg <= last_cycle_offset_next; + ram_mask_reg <= ram_mask_next; + ram_mask_valid_reg <= ram_mask_valid_next; + cycle_count_reg <= cycle_count_next; + last_cycle_reg <= last_cycle_next; + + read_cmd_axi_addr_reg <= read_cmd_axi_addr_next; + read_cmd_ram_sel_reg <= read_cmd_ram_sel_next; + read_cmd_ram_addr_reg <= read_cmd_ram_addr_next; + read_cmd_imm_en_reg <= read_cmd_imm_en_next; + read_cmd_len_reg <= read_cmd_len_next; + read_cmd_cycle_count_reg <= read_cmd_cycle_count_next; + read_cmd_last_cycle_reg <= read_cmd_last_cycle_next; + read_cmd_valid_reg <= read_cmd_valid_next; + + m_axi_awid_reg <= m_axi_awid_next; + m_axi_awaddr_reg <= m_axi_awaddr_next; + m_axi_awlen_reg <= m_axi_awlen_next; + m_axi_awvalid_reg <= m_axi_awvalid_next; + m_axi_bready_reg <= m_axi_bready_next; + + wr_desc_req_ready_reg <= wr_desc_req_ready_next; + + wr_desc_sts_tag_reg <= wr_desc_sts_tag_next; + wr_desc_sts_error_reg <= wr_desc_sts_error_next; + wr_desc_sts_valid_reg <= wr_desc_sts_valid_next; + + status_busy_reg <= active_op_count_reg != 0; + + stat_wr_op_start_tag_reg <= stat_wr_op_start_tag_next; + stat_wr_op_start_valid_reg <= stat_wr_op_start_valid_next; + stat_wr_op_finish_tag_reg <= stat_wr_op_finish_tag_next; + stat_wr_op_finish_status_reg <= stat_wr_op_finish_status_next; + stat_wr_op_finish_valid_reg <= stat_wr_op_finish_valid_next; + stat_wr_req_start_tag_reg <= stat_wr_req_start_tag_next; + stat_wr_req_start_len_reg <= stat_wr_req_start_len_next; + stat_wr_req_start_valid_reg <= stat_wr_req_start_valid_next; + stat_wr_req_finish_tag_reg <= stat_wr_req_finish_tag_next; + stat_wr_req_finish_status_reg <= stat_wr_req_finish_status_next; + stat_wr_req_finish_valid_reg <= stat_wr_req_finish_valid_next; + stat_wr_op_tbl_full_reg <= stat_wr_op_tbl_full_next; + stat_wr_tx_stall_reg <= stat_wr_tx_stall_next; + + ram_rd_cmd_sel_reg <= ram_rd_cmd_sel_next; + ram_rd_cmd_addr_reg <= ram_rd_cmd_addr_next; + ram_rd_cmd_valid_reg <= ram_rd_cmd_valid_next; + + active_op_count_reg <= active_op_count_reg + OP_TAG_W'(inc_active_op) - OP_TAG_W'(dec_active_op); + + if (mask_fifo_we) begin + mask_fifo_mask[mask_fifo_wr_ptr_reg[MASK_FIFO_AW-1:0]] <= mask_fifo_wr_mask; + mask_fifo_wr_ptr_reg <= mask_fifo_wr_ptr_reg + 1; + end + mask_fifo_rd_ptr_reg <= mask_fifo_rd_ptr_next; + + if (op_tbl_start_en) begin + op_tbl_start_ptr_reg <= op_tbl_start_ptr_reg + 1; + op_tbl_active[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= 1'b1; + op_tbl_write_complete[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= 1'b0; + op_tbl_axi_addr[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_axi_addr; + op_tbl_imm[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_imm; + op_tbl_imm_en[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_imm_en; + op_tbl_len[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_len; + op_tbl_zero_len[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_zero_len; + op_tbl_cycle_count[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_cycle_count; + op_tbl_offset[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_offset; + op_tbl_tag[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_tag; + op_tbl_last[op_tbl_start_ptr_reg[OP_TAG_W-1:0]] <= op_tbl_start_last; + end + + if (op_tbl_tx_start_en) begin + op_tbl_tx_start_ptr_reg <= op_tbl_tx_start_ptr_reg + 1; + end + + if (op_tbl_tx_finish_en) begin + op_tbl_tx_finish_ptr_reg <= op_tbl_tx_finish_ptr_reg + 1; + end + + if (USE_AXI_ID && op_tbl_write_complete_en) begin + op_tbl_write_complete[op_tbl_write_complete_ptr] <= 1'b1; + op_tbl_error_code[op_tbl_write_complete_ptr] <= op_tbl_write_complete_error; + end + + if (op_tbl_finish_en) begin + op_tbl_finish_ptr_reg <= op_tbl_finish_ptr_reg + 1; + op_tbl_active[op_tbl_finish_ptr_reg[OP_TAG_W-1:0]] <= 1'b0; + end + + if (rst) begin + req_state_reg <= REQ_STATE_IDLE; + read_state_reg <= READ_STATE_IDLE; + axi_state_reg <= AXI_STATE_IDLE; + + read_cmd_valid_reg <= 1'b0; + + ram_mask_valid_reg <= 1'b0; + + m_axi_awvalid_reg <= 1'b0; + m_axi_bready_reg <= 1'b0; + + wr_desc_req_ready_reg <= 1'b0; + wr_desc_sts_error_reg <= 4'd0; + wr_desc_sts_valid_reg <= 1'b0; + + status_busy_reg <= 1'b0; + + stat_wr_op_start_tag_reg <= '0; + stat_wr_op_start_valid_reg <= 1'b0; + stat_wr_op_finish_tag_reg <= '0; + stat_wr_op_finish_valid_reg <= 1'b0; + stat_wr_req_start_valid_reg <= 1'b0; + stat_wr_req_finish_valid_reg <= 1'b0; + stat_wr_op_tbl_full_reg <= 1'b0; + stat_wr_tx_stall_reg <= 1'b0; + + ram_rd_cmd_valid_reg <= '0; + + active_op_count_reg <= '0; + + mask_fifo_wr_ptr_reg <= '0; + mask_fifo_rd_ptr_reg <= '0; + + op_tbl_start_ptr_reg <= '0; + op_tbl_tx_start_ptr_reg <= '0; + op_tbl_tx_finish_ptr_reg <= '0; + op_tbl_finish_ptr_reg <= '0; + op_tbl_active <= '0; + end +end + +// output datapath logic +logic [AXI_DATA_W-1:0] m_axi_wdata_reg = '0; +logic [AXI_STRB_W-1:0] m_axi_wstrb_reg = '0; +logic m_axi_wlast_reg = 1'b0; +logic m_axi_wvalid_reg = 1'b0; + +logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_wr_ptr_reg = '0; +logic [OUTPUT_FIFO_AW+1-1:0] out_fifo_rd_ptr_reg = '0; +logic out_fifo_half_full_reg = 1'b0; + +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_AW{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXI_DATA_W-1:0] out_fifo_wdata[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic [AXI_STRB_W-1:0] out_fifo_wstrb[2**OUTPUT_FIFO_AW]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +logic out_fifo_wlast[2**OUTPUT_FIFO_AW]; + +assign m_axi_wready_int = !out_fifo_half_full_reg; + +assign m_axi_wr.wdata = m_axi_wdata_reg; +assign m_axi_wr.wstrb = m_axi_wstrb_reg; +assign m_axi_wr.wvalid = m_axi_wvalid_reg; +assign m_axi_wr.wlast = m_axi_wlast_reg; + +always_ff @(posedge clk) begin + m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wr.wready; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_AW-1); + + if (!out_fifo_full && m_axi_wvalid_int) begin + out_fifo_wdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wdata_int; + out_fifo_wstrb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wstrb_int; + out_fifo_wlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_AW-1:0]] <= m_axi_wlast_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!m_axi_wvalid_reg || m_axi_wr.wready)) begin + m_axi_wdata_reg <= out_fifo_wdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axi_wstrb_reg <= out_fifo_wstrb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axi_wlast_reg <= out_fifo_wlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_AW-1:0]]; + m_axi_wvalid_reg <= 1'b1; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + + if (rst) begin + out_fifo_wr_ptr_reg <= '0; + out_fifo_rd_ptr_reg <= '0; + m_axi_wvalid_reg <= 1'b0; + end +end + +endmodule + +`resetall diff --git a/src/dma/tb/taxi_dma_if_axi/Makefile b/src/dma/tb/taxi_dma_if_axi/Makefile new file mode 100644 index 0000000..0b85a36 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi/Makefile @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +# +# Copyright (c) 2020-2025 FPGA Ninja, LLC +# +# Authors: +# - Alex Forencich + +TOPLEVEL_LANG = verilog + +SIM ?= verilator +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +RTL_DIR = ../../rtl +LIB_DIR = ../../lib +TAXI_SRC_DIR = $(LIB_DIR)/taxi/src + +DUT = taxi_dma_if_axi +COCOTB_TEST_MODULES = test_$(DUT) +COCOTB_TOPLEVEL = test_$(DUT) +MODULE = $(COCOTB_TEST_MODULES) +TOPLEVEL = $(COCOTB_TOPLEVEL) +VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv +VERILOG_SOURCES += $(RTL_DIR)/$(DUT).f + +# handle file list files +process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1))) +process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f)) +uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1)) +VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES))) + +# module parameters +export PARAM_AXI_DATA_W := 64 +export PARAM_AXI_ADDR_W := 16 +export PARAM_AXI_STRB_W := $(shell expr $(PARAM_AXI_DATA_W) / 8 ) +export PARAM_AXI_ID_W := 8 +export PARAM_AXI_MAX_BURST_LEN := 256 +export PARAM_RAM_SEL_W := 2 +export PARAM_RAM_ADDR_W := 16 +export PARAM_RAM_SEGS := 2 +export PARAM_IMM_EN := 1 +export PARAM_IMM_W := $(PARAM_AXI_DATA_W) +export PARAM_LEN_W := 16 +export PARAM_TAG_W := 8 +export PARAM_RD_OP_TBL_SIZE := $(shell python -c "print(2**$(PARAM_AXI_ID_W))") +export PARAM_WR_OP_TBL_SIZE := $(shell python -c "print(2**$(PARAM_AXI_ID_W))") +export PARAM_RD_USE_AXI_ID := 0 +export PARAM_WR_USE_AXI_ID := 1 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) +else ifeq ($(SIM), verilator) + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + VERILATOR_TRACE = 1 + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/src/dma/tb/taxi_dma_if_axi/dma_psdp_ram.py b/src/dma/tb/taxi_dma_if_axi/dma_psdp_ram.py new file mode 120000 index 0000000..6613351 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi/dma_psdp_ram.py @@ -0,0 +1 @@ +../dma_psdp_ram.py \ No newline at end of file diff --git a/src/dma/tb/taxi_dma_if_axi/test_taxi_dma_if_axi.py b/src/dma/tb/taxi_dma_if_axi/test_taxi_dma_if_axi.py new file mode 100644 index 0000000..bc80a83 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi/test_taxi_dma_if_axi.py @@ -0,0 +1,347 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2021-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import itertools +import logging +import os +import sys + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiBus, AxiRam +from cocotbext.axi.stream import define_stream + +try: + from dma_psdp_ram import PsdpRam, PsdpRamBus +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + from dma_psdp_ram import PsdpRam, PsdpRamBus + finally: + del sys.path[0] + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["req_src_addr", "req_src_sel", "req_src_asid", "req_dst_addr", "req_dst_sel", "req_dst_asid", "req_len", "req_tag", "req_valid", "req_ready"], + optional_signals=["req_imm", "req_imm_en", "req_id", "req_dest", "req_user"] +) + +DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus", + signals=["sts_tag", "sts_error", "sts_valid"], + optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"] +) + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 10, units="ns").start()) + + # AXI RAM + self.axi_ram = AxiRam(AxiBus.from_entity(dut.m_axi), dut.clk, dut.rst, size=2**16) + + # DMA RAM + self.dma_ram = PsdpRam(PsdpRamBus.from_entity(dut.dma_ram), dut.clk, dut.rst, size=2**16) + + # Control + self.read_desc_source = DescSource(DescBus.from_entity(dut.rd_desc), dut.clk, dut.rst) + self.read_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.rd_desc), dut.clk, dut.rst) + + self.write_desc_source = DescSource(DescBus.from_entity(dut.wr_desc), dut.clk, dut.rst) + self.write_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.wr_desc), dut.clk, dut.rst) + + dut.read_enable.setimmediatevalue(0) + dut.write_enable.setimmediatevalue(0) + + def set_idle_generator(self, generator=None): + if generator: + self.axi_ram.write_if.b_channel.set_pause_generator(generator()) + self.axi_ram.read_if.r_channel.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + self.axi_ram.write_if.aw_channel.set_pause_generator(generator()) + self.axi_ram.write_if.w_channel.set_pause_generator(generator()) + self.axi_ram.read_if.ar_channel.set_pause_generator(generator()) + self.dma_ram.write_if.set_pause_generator(generator()) + self.dma_ram.read_if.set_pause_generator(generator()) + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.write_if.byte_lanes + ram_byte_lanes = tb.dma_ram.write_if.byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.req_tag) + + axi_offsets = list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)) + if os.getenv("OFFSET_GROUP") is not None: + group = int(os.getenv("OFFSET_GROUP")) + axi_offsets = axi_offsets[group::8] + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.write_enable.value = 1 + + for length in list(range(1, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for axi_offset in axi_offsets: + for ram_offset in range(1): + tb.log.info("length %d, axi_offset %d, ram_offset %d", length, axi_offset, ram_offset) + axi_addr = axi_offset+0x1000 + ram_addr = ram_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.dma_ram.write(ram_addr & 0xffff80, b'\x55'*(len(test_data)+256)) + tb.axi_ram.write(axi_addr-128, b'\xaa'*(len(test_data)+256)) + tb.dma_ram.write(ram_addr, test_data) + + tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM ")) + + desc = DescTransaction(req_dst_addr=axi_addr, req_src_addr=ram_addr, req_src_sel=0, req_len=len(test_data), req_tag=cur_tag) + await tb.write_desc_source.send(desc) + + status = await tb.write_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axi_ram.read(axi_addr-1, len(test_data)+2) == b'\xaa'+test_data+b'\xaa' + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.write_if.byte_lanes + ram_byte_lanes = tb.dma_ram.write_if.byte_lanes + tag_count = 2**len(tb.read_desc_source.bus.req_tag) + + axi_offsets = list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)) + if os.getenv("OFFSET_GROUP") is not None: + group = int(os.getenv("OFFSET_GROUP")) + axi_offsets = axi_offsets[group::8] + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.read_enable.value = 1 + + for length in list(range(1, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for axi_offset in axi_offsets: + for ram_offset in range(1): + tb.log.info("length %d, axi_offset %d, ram_offset %d", length, axi_offset, ram_offset) + axi_addr = axi_offset+0x1000 + ram_addr = ram_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axi_ram.write(axi_addr, test_data) + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + tb.dma_ram.write(ram_addr-256, b'\xaa'*(len(test_data)+512)) + + desc = DescTransaction(req_src_addr=axi_addr, req_dst_addr=ram_addr, req_dst_sel=0, req_len=len(test_data), req_tag=cur_tag) + await tb.read_desc_source.send(desc) + + status = await tb.read_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM ")) + + assert tb.dma_ram.read(ram_addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8 + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_write_imm(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.write_if.byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.req_tag) + + axi_offsets = list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)) + if os.getenv("OFFSET_GROUP") is not None: + group = int(os.getenv("OFFSET_GROUP")) + axi_offsets = axi_offsets[group::8] + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.write_enable.value = 1 + + for length in list(range(1, len(dut.wr_desc.req_imm) // 8)): + for axi_offset in axi_offsets: + tb.log.info("length %d, axi_offset %d", length, axi_offset) + axi_addr = axi_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + imm = int.from_bytes(test_data, 'little') + + tb.axi_ram.write(axi_addr-128, b'\xaa'*(len(test_data)+256)) + + tb.log.debug("Immediate: 0x%x", imm) + + desc = DescTransaction(req_dst_addr=axi_addr, req_src_addr=0, req_src_sel=0, req_imm=imm, req_imm_en=1, req_len=len(test_data), req_tag=cur_tag) + await tb.write_desc_source.send(desc) + + status = await tb.write_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axi_ram.read(axi_addr-1, len(test_data)+2) == b'\xaa'+test_data+b'\xaa' + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if getattr(cocotb, 'top', None) is not None: + + for test in [run_test_write, run_test_read, run_test_write_imm]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib')) +taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src')) + + +def process_f_files(files): + lst = {} + for f in files: + if f[-2:].lower() == '.f': + with open(f, 'r') as fp: + l = fp.read().split() + for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]): + lst[os.path.basename(f)] = f + else: + lst[os.path.basename(f)] = f + return list(lst.values()) + + +@pytest.mark.parametrize("offset_group", list(range(8))) +@pytest.mark.parametrize("axi_data_w", [64, 128]) +def test_taxi_dma_if_axi(request, axi_data_w, offset_group): + dut = "taxi_dma_if_axi" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = module + + verilog_sources = [ + os.path.join(tests_dir, f"{toplevel}.sv"), + os.path.join(rtl_dir, f"{dut}.f"), + ] + + verilog_sources = process_f_files(verilog_sources) + + parameters = {} + + parameters['AXI_DATA_W'] = axi_data_w + parameters['AXI_ADDR_W'] = 16 + parameters['AXI_STRB_W'] = parameters['AXI_DATA_W'] // 8 + parameters['AXI_ID_W'] = 8 + parameters['AXI_MAX_BURST_LEN'] = 256 + parameters['RAM_SEL_W'] = 2 + parameters['RAM_ADDR_W'] = 16 + parameters['RAM_SEGS'] = 2 + parameters['IMM_EN'] = 1 + parameters['IMM_W'] = parameters['AXI_DATA_W'] + parameters['LEN_W'] = 16 + parameters['TAG_W'] = 8 + parameters['RD_OP_TBL_SIZE'] = 2**parameters['AXI_ID_W'] + parameters['WR_OP_TBL_SIZE'] = 2**parameters['AXI_ID_W'] + parameters['RD_USE_AXI_ID'] = 0 + parameters['WR_USE_AXI_ID'] = 1 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + extra_env['OFFSET_GROUP'] = str(offset_group) + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + simulator="verilator", + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/src/dma/tb/taxi_dma_if_axi/test_taxi_dma_if_axi.sv b/src/dma/tb/taxi_dma_if_axi/test_taxi_dma_if_axi.sv new file mode 100644 index 0000000..68ffcb2 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi/test_taxi_dma_if_axi.sv @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI DMA interface testbench + */ +module test_taxi_dma_if_axi # +( + /* verilator lint_off WIDTHTRUNC */ + parameter AXI_DATA_W = 64, + parameter AXI_ADDR_W = 16, + parameter AXI_STRB_W = AXI_DATA_W / 8, + parameter AXI_ID_W = 8, + parameter AXI_MAX_BURST_LEN = 256, + parameter RAM_SEL_W = 2, + parameter RAM_ADDR_W = 16, + parameter RAM_SEGS = 2, + parameter logic IMM_EN = 1, + parameter IMM_W = AXI_DATA_W, + parameter LEN_W = 16, + parameter TAG_W = 8, + parameter RD_OP_TBL_SIZE = 2**AXI_ID_W, + parameter WR_OP_TBL_SIZE = 2**AXI_ID_W, + parameter logic RD_USE_AXI_ID = 1'b0, + parameter logic WR_USE_AXI_ID = 1'b1 + /* verilator lint_on WIDTHTRUNC */ +) +(); + +localparam RAM_DATA_W = AXI_DATA_W*2; +localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS; +localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8; +localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W); + +logic clk; +logic rst; + +taxi_axi_if #( + .DATA_W(AXI_DATA_W), + .ADDR_W(AXI_ADDR_W), + .STRB_W(AXI_STRB_W), + .ID_W(AXI_ID_W), + .AWUSER_EN(1'b0), + .WUSER_EN(1'b0), + .BUSER_EN(1'b0), + .ARUSER_EN(1'b0), + .RUSER_EN(1'b0), + .MAX_BURST_LEN(AXI_MAX_BURST_LEN) +) m_axi(); + +taxi_dma_desc_if #( + .SRC_ADDR_W(AXI_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(RAM_ADDR_W), + .DST_SEL_EN(1'b1), + .DST_SEL_W(RAM_SEL_W), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(1'b0), + .DEST_EN(1'b0), + .USER_EN(1'b0) +) rd_desc(); + +taxi_dma_desc_if #( + .SRC_ADDR_W(RAM_ADDR_W), + .SRC_SEL_EN(1'b1), + .SRC_SEL_W(RAM_SEL_W), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(AXI_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(IMM_EN), + .IMM_W(IMM_W), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(1'b0), + .DEST_EN(1'b0), + .USER_EN(1'b0) +) wr_desc(); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W) +) dma_ram(); + +logic read_enable; +logic write_enable; + +logic status_rd_busy; +logic status_wr_busy; + +logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_start_tag; +logic stat_rd_op_start_valid; +logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag; +logic [3:0] stat_rd_op_finish_status; +logic stat_rd_op_finish_valid; +logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_req_start_tag; +logic [12:0] stat_rd_req_start_len; +logic stat_rd_req_start_valid; +logic [$clog2(RD_OP_TBL_SIZE)-1:0] stat_rd_req_finish_tag; +logic [3:0] stat_rd_req_finish_status; +logic stat_rd_req_finish_valid; +logic stat_rd_op_tbl_full; +logic stat_rd_tx_stall; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_start_tag; +logic stat_wr_op_start_valid; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag; +logic [3:0] stat_wr_op_finish_status; +logic stat_wr_op_finish_valid; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_start_tag; +logic [12:0] stat_wr_req_start_len; +logic stat_wr_req_start_valid; +logic [$clog2(WR_OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag; +logic [3:0] stat_wr_req_finish_status; +logic stat_wr_req_finish_valid; +logic stat_wr_op_tbl_full; +logic stat_wr_tx_stall; + +taxi_dma_if_axi #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .RD_OP_TBL_SIZE(RD_OP_TBL_SIZE), + .WR_OP_TBL_SIZE(WR_OP_TBL_SIZE), + .RD_USE_AXI_ID(RD_USE_AXI_ID), + .WR_USE_AXI_ID(WR_USE_AXI_ID) +) +uut ( + .clk(clk), + .rst(rst), + + /* + * AXI master interface + */ + .m_axi_wr(m_axi), + .m_axi_rd(m_axi), + + /* + * Read descriptor + */ + .rd_desc_req(rd_desc), + .rd_desc_sts(rd_desc), + + /* + * Write descriptor + */ + .wr_desc_req(wr_desc), + .wr_desc_sts(wr_desc), + + /* + * RAM interface + */ + .dma_ram_wr(dma_ram), + .dma_ram_rd(dma_ram), + + /* + * Configuration + */ + .read_enable(read_enable), + .write_enable(write_enable), + + /* + * Status + */ + .status_rd_busy(status_rd_busy), + .status_wr_busy(status_wr_busy), + + /* + * Statistics + */ + .stat_rd_op_start_tag(stat_rd_op_start_tag), + .stat_rd_op_start_valid(stat_rd_op_start_valid), + .stat_rd_op_finish_tag(stat_rd_op_finish_tag), + .stat_rd_op_finish_status(stat_rd_op_finish_status), + .stat_rd_op_finish_valid(stat_rd_op_finish_valid), + .stat_rd_req_start_tag(stat_rd_req_start_tag), + .stat_rd_req_start_len(stat_rd_req_start_len), + .stat_rd_req_start_valid(stat_rd_req_start_valid), + .stat_rd_req_finish_tag(stat_rd_req_finish_tag), + .stat_rd_req_finish_status(stat_rd_req_finish_status), + .stat_rd_req_finish_valid(stat_rd_req_finish_valid), + .stat_rd_op_tbl_full(stat_rd_op_tbl_full), + .stat_rd_tx_stall(stat_rd_tx_stall), + .stat_wr_op_start_tag(stat_wr_op_start_tag), + .stat_wr_op_start_valid(stat_wr_op_start_valid), + .stat_wr_op_finish_tag(stat_wr_op_finish_tag), + .stat_wr_op_finish_status(stat_wr_op_finish_status), + .stat_wr_op_finish_valid(stat_wr_op_finish_valid), + .stat_wr_req_start_tag(stat_wr_req_start_tag), + .stat_wr_req_start_len(stat_wr_req_start_len), + .stat_wr_req_start_valid(stat_wr_req_start_valid), + .stat_wr_req_finish_tag(stat_wr_req_finish_tag), + .stat_wr_req_finish_status(stat_wr_req_finish_status), + .stat_wr_req_finish_valid(stat_wr_req_finish_valid), + .stat_wr_op_tbl_full(stat_wr_op_tbl_full), + .stat_wr_tx_stall(stat_wr_tx_stall) +); + +endmodule + +`resetall diff --git a/src/dma/tb/taxi_dma_if_axi_rd/Makefile b/src/dma/tb/taxi_dma_if_axi_rd/Makefile new file mode 100644 index 0000000..e382df7 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_rd/Makefile @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +# +# Copyright (c) 2020-2025 FPGA Ninja, LLC +# +# Authors: +# - Alex Forencich + +TOPLEVEL_LANG = verilog + +SIM ?= verilator +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +RTL_DIR = ../../rtl +LIB_DIR = ../../lib +TAXI_SRC_DIR = $(LIB_DIR)/taxi/src + +DUT = taxi_dma_if_axi_rd +COCOTB_TEST_MODULES = test_$(DUT) +COCOTB_TOPLEVEL = test_$(DUT) +MODULE = $(COCOTB_TEST_MODULES) +TOPLEVEL = $(COCOTB_TOPLEVEL) +VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv +VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv +VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_desc_if.sv +VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_ram_if.sv +VERILOG_SOURCES += $(TAXI_SRC_DIR)/axi/rtl/taxi_axi_if.sv + +# handle file list files +process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1))) +process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f)) +uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1)) +VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES))) + +# module parameters +export PARAM_AXI_DATA_W := 64 +export PARAM_AXI_ADDR_W := 16 +export PARAM_AXI_STRB_W := $(shell expr $(PARAM_AXI_DATA_W) / 8 ) +export PARAM_AXI_ID_W := 8 +export PARAM_AXI_MAX_BURST_LEN := 256 +export PARAM_RAM_SEL_W := 2 +export PARAM_RAM_ADDR_W := 16 +export PARAM_RAM_SEGS := 2 +export PARAM_LEN_W := 16 +export PARAM_TAG_W := 8 +export PARAM_OP_TBL_SIZE := $(shell python -c "print(2**$(PARAM_AXI_ID_W))") +export PARAM_USE_AXI_ID := 1 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) +else ifeq ($(SIM), verilator) + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + VERILATOR_TRACE = 1 + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/src/dma/tb/taxi_dma_if_axi_rd/dma_psdp_ram.py b/src/dma/tb/taxi_dma_if_axi_rd/dma_psdp_ram.py new file mode 120000 index 0000000..6613351 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_rd/dma_psdp_ram.py @@ -0,0 +1 @@ +../dma_psdp_ram.py \ No newline at end of file diff --git a/src/dma/tb/taxi_dma_if_axi_rd/test_taxi_dma_if_axi_rd.py b/src/dma/tb/taxi_dma_if_axi_rd/test_taxi_dma_if_axi_rd.py new file mode 100644 index 0000000..1844306 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_rd/test_taxi_dma_if_axi_rd.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2021-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import itertools +import logging +import os +import sys + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiReadBus, AxiRamRead +from cocotbext.axi.stream import define_stream + +try: + from dma_psdp_ram import PsdpRamWrite, PsdpRamWriteBus +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + from dma_psdp_ram import PsdpRamWrite, PsdpRamWriteBus + finally: + del sys.path[0] + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["req_src_addr", "req_src_sel", "req_src_asid", "req_dst_addr", "req_dst_sel", "req_dst_asid", "req_len", "req_tag", "req_valid", "req_ready"], + optional_signals=["req_imm", "req_imm_en", "req_id", "req_dest", "req_user"] +) + +DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus", + signals=["sts_tag", "sts_error", "sts_valid"], + optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"] +) + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 10, units="ns").start()) + + # AXI RAM + self.axi_ram = AxiRamRead(AxiReadBus.from_entity(dut.m_axi), dut.clk, dut.rst, size=2**16) + + # DMA RAM + self.dma_ram = PsdpRamWrite(PsdpRamWriteBus.from_entity(dut.dma_ram), dut.clk, dut.rst, size=2**16) + + # Control + self.read_desc_source = DescSource(DescBus.from_entity(dut.rd_desc), dut.clk, dut.rst) + self.read_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.rd_desc), dut.clk, dut.rst) + + dut.enable.setimmediatevalue(0) + + def set_idle_generator(self, generator=None): + if generator: + self.axi_ram.r_channel.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + self.axi_ram.ar_channel.set_pause_generator(generator()) + self.dma_ram.set_pause_generator(generator()) + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.byte_lanes + ram_byte_lanes = tb.dma_ram.byte_lanes + tag_count = 2**len(tb.read_desc_source.bus.req_tag) + + axi_offsets = list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)) + if os.getenv("OFFSET_GROUP") is not None: + group = int(os.getenv("OFFSET_GROUP")) + axi_offsets = axi_offsets[group::8] + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.enable.value = 1 + + for length in list(range(0, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for axi_offset in axi_offsets: + for ram_offset in range(ram_byte_lanes+1): + tb.log.info("length %d, axi_offset %d, ram_offset %d", length, axi_offset, ram_offset) + axi_addr = axi_offset+0x1000 + ram_addr = ram_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.axi_ram.write(axi_addr, test_data) + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + tb.dma_ram.write(ram_addr-256, b'\xaa'*(len(test_data)+512)) + + desc = DescTransaction(req_src_addr=axi_addr, req_dst_addr=ram_addr, req_dst_sel=0, req_len=len(test_data), req_tag=cur_tag) + await tb.read_desc_source.send(desc) + + status = await tb.read_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM ")) + + assert tb.dma_ram.read(ram_addr-8, len(test_data)+16) == b'\xaa'*8+test_data+b'\xaa'*8 + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if getattr(cocotb, 'top', None) is not None: + + factory = TestFactory(run_test_read) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib')) +taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src')) + + +def process_f_files(files): + lst = {} + for f in files: + if f[-2:].lower() == '.f': + with open(f, 'r') as fp: + l = fp.read().split() + for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]): + lst[os.path.basename(f)] = f + else: + lst[os.path.basename(f)] = f + return list(lst.values()) + + +@pytest.mark.parametrize("offset_group", list(range(8))) +@pytest.mark.parametrize("axi_data_w", [64, 128]) +def test_taxi_dma_if_axi_rd(request, axi_data_w, offset_group): + dut = "taxi_dma_if_axi_rd" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = module + + verilog_sources = [ + os.path.join(tests_dir, f"{toplevel}.sv"), + os.path.join(rtl_dir, f"{dut}.sv"), + os.path.join(rtl_dir, "taxi_dma_desc_if.sv"), + os.path.join(rtl_dir, "taxi_dma_ram_if.sv"), + os.path.join(taxi_src_dir, "axi", "rtl", "taxi_axi_if.sv"), + ] + + verilog_sources = process_f_files(verilog_sources) + + parameters = {} + + parameters['AXI_DATA_W'] = axi_data_w + parameters['AXI_ADDR_W'] = 16 + parameters['AXI_STRB_W'] = parameters['AXI_DATA_W'] // 8 + parameters['AXI_ID_W'] = 8 + parameters['AXI_MAX_BURST_LEN'] = 256 + parameters['RAM_SEL_W'] = 2 + parameters['RAM_ADDR_W'] = 16 + parameters['RAM_SEGS'] = 2 + parameters['LEN_W'] = 16 + parameters['TAG_W'] = 8 + parameters['OP_TBL_SIZE'] = 2**parameters['AXI_ID_W'] + parameters['USE_AXI_ID'] = 0 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + extra_env['OFFSET_GROUP'] = str(offset_group) + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + simulator="verilator", + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/src/dma/tb/taxi_dma_if_axi_rd/test_taxi_dma_if_axi_rd.sv b/src/dma/tb/taxi_dma_if_axi_rd/test_taxi_dma_if_axi_rd.sv new file mode 100644 index 0000000..e0e1d99 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_rd/test_taxi_dma_if_axi_rd.sv @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI DMA interface testbench + */ +module test_taxi_dma_if_axi_rd # +( + /* verilator lint_off WIDTHTRUNC */ + parameter AXI_DATA_W = 64, + parameter AXI_ADDR_W = 16, + parameter AXI_STRB_W = AXI_DATA_W / 8, + parameter AXI_ID_W = 8, + parameter AXI_MAX_BURST_LEN = 256, + parameter RAM_SEL_W = 2, + parameter RAM_ADDR_W = 16, + parameter RAM_SEGS = 2, + parameter logic IMM_EN = 1, + parameter IMM_W = AXI_DATA_W, + parameter LEN_W = 16, + parameter TAG_W = 8, + parameter OP_TBL_SIZE = 2**AXI_ID_W, + parameter logic USE_AXI_ID = 1'b0 + /* verilator lint_on WIDTHTRUNC */ +) +(); + +localparam RAM_DATA_W = AXI_DATA_W*2; +localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS; +localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8; +localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W); + +logic clk; +logic rst; + +taxi_axi_if #( + .DATA_W(AXI_DATA_W), + .ADDR_W(AXI_ADDR_W), + .STRB_W(AXI_STRB_W), + .ID_W(AXI_ID_W), + .AWUSER_EN(1'b0), + .WUSER_EN(1'b0), + .BUSER_EN(1'b0), + .ARUSER_EN(1'b0), + .RUSER_EN(1'b0), + .MAX_BURST_LEN(AXI_MAX_BURST_LEN) +) m_axi(); + +taxi_dma_desc_if #( + .SRC_ADDR_W(AXI_ADDR_W), + .SRC_SEL_EN(1'b0), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(RAM_ADDR_W), + .DST_SEL_EN(1'b1), + .DST_SEL_W(RAM_SEL_W), + .DST_ASID_EN(1'b0), + .IMM_EN(1'b0), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(1'b0), + .DEST_EN(1'b0), + .USER_EN(1'b0) +) rd_desc(); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W) +) dma_ram(); + +logic enable; + +logic status_busy; + +logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_op_start_tag; +logic stat_rd_op_start_valid; +logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_op_finish_tag; +logic [3:0] stat_rd_op_finish_status; +logic stat_rd_op_finish_valid; +logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_req_start_tag; +logic [12:0] stat_rd_req_start_len; +logic stat_rd_req_start_valid; +logic [$clog2(OP_TBL_SIZE)-1:0] stat_rd_req_finish_tag; +logic [3:0] stat_rd_req_finish_status; +logic stat_rd_req_finish_valid; +logic stat_rd_op_tbl_full; +logic stat_rd_tx_stall; + +taxi_dma_if_axi_rd #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .OP_TBL_SIZE(OP_TBL_SIZE), + .USE_AXI_ID(USE_AXI_ID) +) +uut ( + .clk(clk), + .rst(rst), + + /* + * AXI master interface + */ + .m_axi_rd(m_axi), + + /* + * Read descriptor + */ + .rd_desc_req(rd_desc), + .rd_desc_sts(rd_desc), + + /* + * RAM interface + */ + .dma_ram_wr(dma_ram), + + /* + * Configuration + */ + .enable(enable), + + /* + * Status + */ + .status_busy(status_busy), + + /* + * Statistics + */ + .stat_rd_op_start_tag(stat_rd_op_start_tag), + .stat_rd_op_start_valid(stat_rd_op_start_valid), + .stat_rd_op_finish_tag(stat_rd_op_finish_tag), + .stat_rd_op_finish_status(stat_rd_op_finish_status), + .stat_rd_op_finish_valid(stat_rd_op_finish_valid), + .stat_rd_req_start_tag(stat_rd_req_start_tag), + .stat_rd_req_start_len(stat_rd_req_start_len), + .stat_rd_req_start_valid(stat_rd_req_start_valid), + .stat_rd_req_finish_tag(stat_rd_req_finish_tag), + .stat_rd_req_finish_status(stat_rd_req_finish_status), + .stat_rd_req_finish_valid(stat_rd_req_finish_valid), + .stat_rd_op_tbl_full(stat_rd_op_tbl_full), + .stat_rd_tx_stall(stat_rd_tx_stall) +); + +endmodule + +`resetall diff --git a/src/dma/tb/taxi_dma_if_axi_wr/Makefile b/src/dma/tb/taxi_dma_if_axi_wr/Makefile new file mode 100644 index 0000000..d395e7a --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_wr/Makefile @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: CERN-OHL-S-2.0 +# +# Copyright (c) 2020-2025 FPGA Ninja, LLC +# +# Authors: +# - Alex Forencich + +TOPLEVEL_LANG = verilog + +SIM ?= verilator +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +RTL_DIR = ../../rtl +LIB_DIR = ../../lib +TAXI_SRC_DIR = $(LIB_DIR)/taxi/src + +DUT = taxi_dma_if_axi_wr +COCOTB_TEST_MODULES = test_$(DUT) +COCOTB_TOPLEVEL = test_$(DUT) +MODULE = $(COCOTB_TEST_MODULES) +TOPLEVEL = $(COCOTB_TOPLEVEL) +VERILOG_SOURCES += $(COCOTB_TOPLEVEL).sv +VERILOG_SOURCES += $(RTL_DIR)/$(DUT).sv +VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_desc_if.sv +VERILOG_SOURCES += $(RTL_DIR)/taxi_dma_ram_if.sv +VERILOG_SOURCES += $(TAXI_SRC_DIR)/axi/rtl/taxi_axi_if.sv + +# handle file list files +process_f_file = $(call process_f_files,$(addprefix $(dir $1),$(shell cat $1))) +process_f_files = $(foreach f,$1,$(if $(filter %.f,$f),$(call process_f_file,$f),$f)) +uniq_base = $(if $1,$(call uniq_base,$(foreach f,$1,$(if $(filter-out $(notdir $(lastword $1)),$(notdir $f)),$f,))) $(lastword $1)) +VERILOG_SOURCES := $(call uniq_base,$(call process_f_files,$(VERILOG_SOURCES))) + +# module parameters +export PARAM_AXI_DATA_W := 64 +export PARAM_AXI_ADDR_W := 16 +export PARAM_AXI_STRB_W := $(shell expr $(PARAM_AXI_DATA_W) / 8 ) +export PARAM_AXI_ID_W := 8 +export PARAM_AXI_MAX_BURST_LEN := 256 +export PARAM_RAM_SEL_W := 2 +export PARAM_RAM_ADDR_W := 16 +export PARAM_RAM_SEGS := 2 +export PARAM_IMM_EN := 1 +export PARAM_IMM_W := $(PARAM_AXI_DATA_W) +export PARAM_LEN_W := 16 +export PARAM_TAG_W := 8 +export PARAM_OP_TBL_SIZE := $(shell python -c "print(2**$(PARAM_AXI_ID_W))") +export PARAM_USE_AXI_ID := 1 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(COCOTB_TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) +else ifeq ($(SIM), verilator) + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + VERILATOR_TRACE = 1 + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim diff --git a/src/dma/tb/taxi_dma_if_axi_wr/dma_psdp_ram.py b/src/dma/tb/taxi_dma_if_axi_wr/dma_psdp_ram.py new file mode 120000 index 0000000..6613351 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_wr/dma_psdp_ram.py @@ -0,0 +1 @@ +../dma_psdp_ram.py \ No newline at end of file diff --git a/src/dma/tb/taxi_dma_if_axi_wr/test_taxi_dma_if_axi_wr.py b/src/dma/tb/taxi_dma_if_axi_wr/test_taxi_dma_if_axi_wr.py new file mode 100644 index 0000000..01b075a --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_wr/test_taxi_dma_if_axi_wr.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: CERN-OHL-S-2.0 +""" + +Copyright (c) 2021-2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +""" + +import itertools +import logging +import os +import sys + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge +from cocotb.regression import TestFactory + +from cocotbext.axi import AxiWriteBus, AxiRamWrite +from cocotbext.axi.stream import define_stream + +try: + from dma_psdp_ram import PsdpRamRead, PsdpRamReadBus +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + from dma_psdp_ram import PsdpRamRead, PsdpRamReadBus + finally: + del sys.path[0] + +DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", + signals=["req_src_addr", "req_src_sel", "req_src_asid", "req_dst_addr", "req_dst_sel", "req_dst_asid", "req_len", "req_tag", "req_valid", "req_ready"], + optional_signals=["req_imm", "req_imm_en", "req_id", "req_dest", "req_user"] +) + +DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus", + signals=["sts_tag", "sts_error", "sts_valid"], + optional_signals=["sts_len", "sts_id", "sts_dest", "sts_user"] +) + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 10, units="ns").start()) + + # AXI RAM + self.axi_ram = AxiRamWrite(AxiWriteBus.from_entity(dut.m_axi), dut.clk, dut.rst, size=2**16) + + # DMA RAM + self.dma_ram = PsdpRamRead(PsdpRamReadBus.from_entity(dut.dma_ram), dut.clk, dut.rst, size=2**16) + + # Control + self.write_desc_source = DescSource(DescBus.from_entity(dut.wr_desc), dut.clk, dut.rst) + self.write_desc_status_sink = DescStatusSink(DescStatusBus.from_entity(dut.wr_desc), dut.clk, dut.rst) + + dut.enable.setimmediatevalue(0) + + def set_idle_generator(self, generator=None): + if generator: + self.axi_ram.b_channel.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + self.axi_ram.aw_channel.set_pause_generator(generator()) + self.axi_ram.w_channel.set_pause_generator(generator()) + self.dma_ram.set_pause_generator(generator()) + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.byte_lanes + ram_byte_lanes = tb.dma_ram.byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.req_tag) + + axi_offsets = list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)) + if os.getenv("OFFSET_GROUP") is not None: + group = int(os.getenv("OFFSET_GROUP")) + axi_offsets = axi_offsets[group::8] + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.enable.value = 1 + + for length in list(range(0, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for axi_offset in axi_offsets: + for ram_offset in range(ram_byte_lanes+1): + tb.log.info("length %d, axi_offset %d, ram_offset %d", length, axi_offset, ram_offset) + axi_addr = axi_offset+0x1000 + ram_addr = ram_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + tb.dma_ram.write(ram_addr & 0xffff80, b'\x55'*(len(test_data)+256)) + tb.axi_ram.write(axi_addr-128, b'\xaa'*(len(test_data)+256)) + tb.dma_ram.write(ram_addr, test_data) + + tb.log.debug("%s", tb.dma_ram.hexdump_str((ram_addr & ~0xf)-16, (((ram_addr & 0xf)+length-1) & ~0xf)+48, prefix="RAM ")) + + desc = DescTransaction(req_dst_addr=axi_addr, req_src_addr=ram_addr, req_src_sel=0, req_len=len(test_data), req_tag=cur_tag) + await tb.write_desc_source.send(desc) + + status = await tb.write_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axi_ram.read(axi_addr-1, len(test_data)+2) == b'\xaa'+test_data+b'\xaa' + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_write_imm(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.req_tag) + + axi_offsets = list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)) + if os.getenv("OFFSET_GROUP") is not None: + group = int(os.getenv("OFFSET_GROUP")) + axi_offsets = axi_offsets[group::8] + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.enable.value = 1 + + for length in list(range(1, len(dut.wr_desc.req_imm) // 8)): + for axi_offset in axi_offsets: + tb.log.info("length %d, axi_offset %d", length, axi_offset) + axi_addr = axi_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + imm = int.from_bytes(test_data, 'little') + + tb.axi_ram.write(axi_addr-128, b'\xaa'*(len(test_data)+256)) + + tb.log.debug("Immediate: 0x%x", imm) + + desc = DescTransaction(req_dst_addr=axi_addr, req_src_addr=0, req_src_sel=0, req_imm=imm, req_imm_en=1, req_len=len(test_data), req_tag=cur_tag) + await tb.write_desc_source.send(desc) + + status = await tb.write_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.sts_tag) == cur_tag + assert int(status.sts_error) == 0 + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axi_ram.read(axi_addr-1, len(test_data)+2) == b'\xaa'+test_data+b'\xaa' + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if getattr(cocotb, 'top', None) is not None: + + for test in [run_test_write, run_test_write_imm]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'lib')) +taxi_src_dir = os.path.abspath(os.path.join(lib_dir, 'taxi', 'src')) + + +def process_f_files(files): + lst = {} + for f in files: + if f[-2:].lower() == '.f': + with open(f, 'r') as fp: + l = fp.read().split() + for f in process_f_files([os.path.join(os.path.dirname(f), x) for x in l]): + lst[os.path.basename(f)] = f + else: + lst[os.path.basename(f)] = f + return list(lst.values()) + + +@pytest.mark.parametrize("offset_group", list(range(8))) +@pytest.mark.parametrize("axi_data_w", [64, 128]) +def test_taxi_dma_if_axi_wr(request, axi_data_w, offset_group): + dut = "taxi_dma_if_axi_wr" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = module + + verilog_sources = [ + os.path.join(tests_dir, f"{toplevel}.sv"), + os.path.join(rtl_dir, f"{dut}.sv"), + os.path.join(rtl_dir, "taxi_dma_desc_if.sv"), + os.path.join(rtl_dir, "taxi_dma_ram_if.sv"), + os.path.join(taxi_src_dir, "axi", "rtl", "taxi_axi_if.sv"), + ] + + verilog_sources = process_f_files(verilog_sources) + + parameters = {} + + parameters['AXI_DATA_W'] = axi_data_w + parameters['AXI_ADDR_W'] = 16 + parameters['AXI_STRB_W'] = parameters['AXI_DATA_W'] // 8 + parameters['AXI_ID_W'] = 8 + parameters['AXI_MAX_BURST_LEN'] = 256 + parameters['RAM_SEL_W'] = 2 + parameters['RAM_ADDR_W'] = 16 + parameters['RAM_SEGS'] = 2 + parameters['IMM_EN'] = 1 + parameters['IMM_W'] = parameters['AXI_DATA_W'] + parameters['LEN_W'] = 16 + parameters['TAG_W'] = 8 + parameters['OP_TBL_SIZE'] = 2**parameters['AXI_ID_W'] + parameters['USE_AXI_ID'] = 0 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + extra_env['OFFSET_GROUP'] = str(offset_group) + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + simulator="verilator", + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/src/dma/tb/taxi_dma_if_axi_wr/test_taxi_dma_if_axi_wr.sv b/src/dma/tb/taxi_dma_if_axi_wr/test_taxi_dma_if_axi_wr.sv new file mode 100644 index 0000000..2033907 --- /dev/null +++ b/src/dma/tb/taxi_dma_if_axi_wr/test_taxi_dma_if_axi_wr.sv @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: CERN-OHL-S-2.0 +/* + +Copyright (c) 2025 FPGA Ninja, LLC + +Authors: +- Alex Forencich + +*/ + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI DMA interface testbench + */ +module test_taxi_dma_if_axi_wr # +( + /* verilator lint_off WIDTHTRUNC */ + parameter AXI_DATA_W = 64, + parameter AXI_ADDR_W = 16, + parameter AXI_STRB_W = AXI_DATA_W / 8, + parameter AXI_ID_W = 8, + parameter AXI_MAX_BURST_LEN = 256, + parameter RAM_SEL_W = 2, + parameter RAM_ADDR_W = 16, + parameter RAM_SEGS = 2, + parameter logic IMM_EN = 1, + parameter IMM_W = AXI_DATA_W, + parameter LEN_W = 16, + parameter TAG_W = 8, + parameter OP_TBL_SIZE = 2**AXI_ID_W, + parameter logic USE_AXI_ID = 1'b1 + /* verilator lint_on WIDTHTRUNC */ +) +(); + +localparam RAM_DATA_W = AXI_DATA_W*2; +localparam RAM_SEG_DATA_W = RAM_DATA_W / RAM_SEGS; +localparam RAM_SEG_BE_W = RAM_SEG_DATA_W / 8; +localparam RAM_SEG_ADDR_W = RAM_ADDR_W - $clog2(RAM_SEGS*RAM_SEG_BE_W); + +logic clk; +logic rst; + +taxi_axi_if #( + .DATA_W(AXI_DATA_W), + .ADDR_W(AXI_ADDR_W), + .STRB_W(AXI_STRB_W), + .ID_W(AXI_ID_W), + .AWUSER_EN(1'b0), + .WUSER_EN(1'b0), + .BUSER_EN(1'b0), + .ARUSER_EN(1'b0), + .RUSER_EN(1'b0), + .MAX_BURST_LEN(AXI_MAX_BURST_LEN) +) m_axi(); + +taxi_dma_desc_if #( + .SRC_ADDR_W(RAM_ADDR_W), + .SRC_SEL_EN(1'b1), + .SRC_SEL_W(RAM_SEL_W), + .SRC_ASID_EN(1'b0), + .DST_ADDR_W(AXI_ADDR_W), + .DST_SEL_EN(1'b0), + .DST_ASID_EN(1'b0), + .IMM_EN(IMM_EN), + .IMM_W(IMM_W), + .LEN_W(LEN_W), + .TAG_W(TAG_W), + .ID_EN(1'b0), + .DEST_EN(1'b0), + .USER_EN(1'b0) +) wr_desc(); + +taxi_dma_ram_if #( + .SEGS(RAM_SEGS), + .SEG_ADDR_W(RAM_SEG_ADDR_W), + .SEG_DATA_W(RAM_SEG_DATA_W), + .SEG_BE_W(RAM_SEG_BE_W) +) dma_ram(); + +logic enable; + +logic status_busy; + +logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_op_start_tag; +logic stat_wr_op_start_valid; +logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_op_finish_tag; +logic [3:0] stat_wr_op_finish_status; +logic stat_wr_op_finish_valid; +logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_req_start_tag; +logic [12:0] stat_wr_req_start_len; +logic stat_wr_req_start_valid; +logic [$clog2(OP_TBL_SIZE)-1:0] stat_wr_req_finish_tag; +logic [3:0] stat_wr_req_finish_status; +logic stat_wr_req_finish_valid; +logic stat_wr_op_tbl_full; +logic stat_wr_tx_stall; + +taxi_dma_if_axi_wr #( + .AXI_MAX_BURST_LEN(AXI_MAX_BURST_LEN), + .OP_TBL_SIZE(OP_TBL_SIZE), + .USE_AXI_ID(USE_AXI_ID) +) +uut ( + .clk(clk), + .rst(rst), + + /* + * AXI master interface + */ + .m_axi_wr(m_axi), + + /* + * Write descriptor + */ + .wr_desc_req(wr_desc), + .wr_desc_sts(wr_desc), + + /* + * RAM interface + */ + .dma_ram_rd(dma_ram), + + /* + * Configuration + */ + .enable(enable), + + /* + * Status + */ + .status_busy(status_busy), + + /* + * Statistics + */ + .stat_wr_op_start_tag(stat_wr_op_start_tag), + .stat_wr_op_start_valid(stat_wr_op_start_valid), + .stat_wr_op_finish_tag(stat_wr_op_finish_tag), + .stat_wr_op_finish_status(stat_wr_op_finish_status), + .stat_wr_op_finish_valid(stat_wr_op_finish_valid), + .stat_wr_req_start_tag(stat_wr_req_start_tag), + .stat_wr_req_start_len(stat_wr_req_start_len), + .stat_wr_req_start_valid(stat_wr_req_start_valid), + .stat_wr_req_finish_tag(stat_wr_req_finish_tag), + .stat_wr_req_finish_status(stat_wr_req_finish_status), + .stat_wr_req_finish_valid(stat_wr_req_finish_valid), + .stat_wr_op_tbl_full(stat_wr_op_tbl_full), + .stat_wr_tx_stall(stat_wr_tx_stall) +); + +endmodule + +`resetall